mirror of
https://github.com/wolfpld/tracy.git
synced 2025-03-20 07:40:02 +08:00
When using the tracy ON_DEMAND mode, it is ok in most cases to drop measurements and do a GPU clock synchronization (that may stall) during the first tracyCollect. This is not enabled by default in the CMakeLists for backward compatibility and because it may be a bit intrusive. This commit also makes the OpenGL tracy TracyGpuZone* a tiny bit more efficient by not calling the threadlocal GetGpuCtx(). It is also more resilient if no context has been declared on this thread. This means that the application will not crash if a context was used on different threads even though declared only on one (thus GetGpuCtx().ptr == nullptr). Tracy does not support this scenario, so on one hand this helps users by not crashing, on the other it is an error that is now silent.
357 lines
14 KiB
C++
357 lines
14 KiB
C++
#ifndef __TRACYOPENGL_HPP__
|
|
#define __TRACYOPENGL_HPP__
|
|
|
|
#if !defined TRACY_ENABLE || defined __APPLE__
|
|
|
|
#define TracyGpuContext
|
|
#define TracyGpuContextName(x,y)
|
|
#define TracyGpuNamedZone(x,y,z)
|
|
#define TracyGpuNamedZoneC(x,y,z,w)
|
|
#define TracyGpuZone(x)
|
|
#define TracyGpuZoneC(x,y)
|
|
#define TracyGpuZoneTransient(x,y,z)
|
|
#define TracyGpuCollect
|
|
|
|
#define TracyGpuNamedZoneS(x,y,z,w)
|
|
#define TracyGpuNamedZoneCS(x,y,z,w,a)
|
|
#define TracyGpuZoneS(x,y)
|
|
#define TracyGpuZoneCS(x,y,z)
|
|
#define TracyGpuZoneTransientS(x,y,z,w)
|
|
|
|
namespace tracy
|
|
{
|
|
struct SourceLocationData;
|
|
class GpuCtxScope
|
|
{
|
|
public:
|
|
GpuCtxScope( const SourceLocationData*, bool ) {}
|
|
GpuCtxScope( const SourceLocationData*, int, bool ) {}
|
|
};
|
|
}
|
|
|
|
#else
|
|
|
|
#include <atomic>
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "Tracy.hpp"
|
|
#include "client/TracyProfiler.hpp"
|
|
#include "client/TracyCallstack.hpp"
|
|
#include "common/TracyAlign.hpp"
|
|
#include "common/TracyAlloc.hpp"
|
|
|
|
#if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT
|
|
# define GL_TIMESTAMP GL_TIMESTAMP_EXT
|
|
# define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT
|
|
# define glGetQueryObjectiv glGetQueryObjectivEXT
|
|
# define glGetQueryObjectui64v glGetQueryObjectui64vEXT
|
|
# define glQueryCounter glQueryCounterEXT
|
|
#endif
|
|
|
|
#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
|
|
#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size );
|
|
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
|
|
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
|
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
|
|
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
|
|
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
|
|
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
|
|
#else
|
|
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
|
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
|
|
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
|
|
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
|
|
# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
|
|
#endif
|
|
#define TracyGpuCollect if(tracy::GetGpuCtx().ptr){tracy::GetGpuCtx().ptr->Collect();}
|
|
|
|
#ifdef TRACY_HAS_CALLSTACK
|
|
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
|
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
|
|
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
|
|
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
|
|
# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
|
|
#else
|
|
# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active )
|
|
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active )
|
|
# define TracyGpuZoneS( name, depth ) TracyGpuZone( name )
|
|
# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color )
|
|
# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active )
|
|
#endif
|
|
|
|
namespace tracy
|
|
{
|
|
|
|
class GpuCtx
|
|
{
|
|
friend class GpuCtxScope;
|
|
|
|
enum { QueryCount = 64 * 1024 };
|
|
|
|
public:
|
|
GpuCtx()
|
|
: m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
, m_isContextReady( false )
|
|
#endif
|
|
, m_head( 0 )
|
|
, m_tail( 0 )
|
|
{
|
|
assert( m_context != 255 );
|
|
glGenQueries(QueryCount, m_query);
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
if(GetProfiler().IsConnected())
|
|
#endif
|
|
SyncClockAndSendContextInfo();
|
|
}
|
|
|
|
void Name( const char* name, uint16_t len )
|
|
{
|
|
auto ptr = (char*)tracy_malloc( len );
|
|
memcpy( ptr, name, len );
|
|
|
|
TracyLfqPrepare( QueueType::GpuContextName );
|
|
MemWrite( &item->gpuContextNameFat.context, m_context );
|
|
MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr );
|
|
MemWrite( &item->gpuContextNameFat.size, len );
|
|
#ifdef TRACY_ON_DEMAND
|
|
GetProfiler().DeferItem( *item );
|
|
#endif
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
void Collect()
|
|
{
|
|
ZoneScopedC( Color::Red4 );
|
|
|
|
#ifndef TRACY_ON_DEMAND_GPU_SYNC
|
|
if (m_tail == m_head) return;
|
|
#endif
|
|
|
|
#ifdef TRACY_ON_DEMAND
|
|
if( !GetProfiler().IsConnected() )
|
|
{
|
|
m_head = m_tail = 0;
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
m_isContextReady = false;
|
|
#endif
|
|
return;
|
|
}
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
else if (!m_isContextReady)
|
|
{
|
|
SyncClockAndSendContextInfo();
|
|
assert(m_isContextReady);
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
while( m_tail != m_head )
|
|
{
|
|
GLint available;
|
|
glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available );
|
|
if( !available ) return;
|
|
|
|
uint64_t time;
|
|
glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time );
|
|
|
|
TracyLfqPrepare( QueueType::GpuTime );
|
|
MemWrite( &item->gpuTime.gpuTime, (int64_t)time );
|
|
MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail );
|
|
MemWrite( &item->gpuTime.context, m_context );
|
|
TracyLfqCommit;
|
|
|
|
m_tail = ( m_tail + 1 ) % QueryCount;
|
|
}
|
|
}
|
|
|
|
private:
|
|
void SyncClockAndSendContextInfo()
|
|
{
|
|
ZoneScopedC( Color::Red4 );
|
|
|
|
int64_t tgpu;
|
|
glGetInteger64v( GL_TIMESTAMP, &tgpu );
|
|
int64_t tcpu = Profiler::GetTime();
|
|
|
|
GLint bits;
|
|
glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits );
|
|
|
|
const float period = 1.f;
|
|
const auto thread = GetThreadHandle();
|
|
|
|
TracyLfqPrepare(QueueType::GpuNewContext);
|
|
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
|
|
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
|
|
MemWrite( &item->gpuNewContext.thread, thread );
|
|
MemWrite( &item->gpuNewContext.period, period );
|
|
MemWrite( &item->gpuNewContext.context, m_context );
|
|
MemWrite( &item->gpuNewContext.flags, uint8_t(0) );
|
|
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
|
|
|
|
#if defined( TRACY_ON_DEMAND )
|
|
GetProfiler().DeferItem( *item );
|
|
#elif defined( TRACY_ON_DEMAND_GPU_SYNC )
|
|
m_isContextReady = true;
|
|
#endif
|
|
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
tracy_force_inline unsigned int NextQueryId()
|
|
{
|
|
const auto id = m_head;
|
|
m_head = ( m_head + 1 ) % QueryCount;
|
|
assert( m_head != m_tail );
|
|
return id;
|
|
}
|
|
|
|
tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id )
|
|
{
|
|
return m_query[id];
|
|
}
|
|
|
|
tracy_force_inline uint8_t GetId() const
|
|
{
|
|
return m_context;
|
|
}
|
|
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
tracy_force_inline bool IsContextReady() const
|
|
{
|
|
return m_isContextReady;
|
|
}
|
|
#endif
|
|
|
|
unsigned int m_query[QueryCount];
|
|
uint8_t m_context;
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
bool m_isContextReady;
|
|
#endif
|
|
unsigned int m_head;
|
|
unsigned int m_tail;
|
|
};
|
|
|
|
class GpuCtxScope
|
|
{
|
|
static tracy_force_inline GpuCtx* GetCpuCtxIfShouldCollect( bool is_active )
|
|
{
|
|
if ( !is_active ) return nullptr;
|
|
#ifdef TRACY_ON_DEMAND
|
|
if ( !GetProfiler().IsConnected() ) return nullptr;
|
|
#endif
|
|
GpuCtx* ctx = GetGpuCtx().ptr;
|
|
#ifdef TRACY_ON_DEMAND_GPU_SYNC
|
|
if ( ctx && !ctx->IsContextReady() ) return nullptr;
|
|
#endif
|
|
return ctx;
|
|
}
|
|
public:
|
|
tracy_force_inline GpuCtxScope(const SourceLocationData* srcloc, bool is_active )
|
|
: m_gpuCtx( GetCpuCtxIfShouldCollect( is_active ) )
|
|
{
|
|
if( !m_gpuCtx ) return;
|
|
|
|
const auto queryId = m_gpuCtx->NextQueryId();
|
|
glQueryCounter( m_gpuCtx->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
|
|
|
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
|
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
|
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
|
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
|
MemWrite( &item->gpuZoneBegin.context, m_gpuCtx->GetId() );
|
|
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active )
|
|
: m_gpuCtx( GetCpuCtxIfShouldCollect( is_active ) )
|
|
{
|
|
if ( !m_gpuCtx ) return;
|
|
|
|
const auto queryId = m_gpuCtx->NextQueryId();
|
|
glQueryCounter( m_gpuCtx->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
|
|
|
#ifdef TRACY_FIBERS
|
|
TracyLfqPrepare( QueueType::GpuZoneBegin );
|
|
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
|
#else
|
|
GetProfiler().SendCallstack( depth );
|
|
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
|
|
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
|
#endif
|
|
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
|
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
|
MemWrite( &item->gpuZoneBegin.context, m_gpuCtx->GetId() );
|
|
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active )
|
|
: m_gpuCtx( GetCpuCtxIfShouldCollect( is_active ) )
|
|
{
|
|
if( !m_gpuCtx ) return;
|
|
|
|
const auto queryId = m_gpuCtx->NextQueryId();
|
|
glQueryCounter( m_gpuCtx->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
|
|
|
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
|
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
|
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
|
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
|
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
|
MemWrite( &item->gpuZoneBegin.context, m_gpuCtx->GetId() );
|
|
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active )
|
|
: m_gpuCtx( GetCpuCtxIfShouldCollect( is_active ) )
|
|
{
|
|
if( !m_gpuCtx ) return;
|
|
|
|
const auto queryId = m_gpuCtx->NextQueryId();
|
|
glQueryCounter( m_gpuCtx->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
|
|
|
#ifdef TRACY_FIBERS
|
|
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc );
|
|
memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) );
|
|
#else
|
|
GetProfiler().SendCallstack( depth );
|
|
TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack );
|
|
MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() );
|
|
#endif
|
|
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
|
|
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
|
|
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
|
|
MemWrite( &item->gpuZoneBegin.context, m_gpuCtx->GetId() );
|
|
MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc );
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
tracy_force_inline ~GpuCtxScope()
|
|
{
|
|
if( !m_gpuCtx ) return;
|
|
|
|
const auto queryId = m_gpuCtx->NextQueryId();
|
|
glQueryCounter( m_gpuCtx->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
|
|
|
|
TracyLfqPrepare( QueueType::GpuZoneEnd );
|
|
MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() );
|
|
memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) );
|
|
MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) );
|
|
MemWrite( &item->gpuZoneEnd.context, m_gpuCtx->GetId() );
|
|
TracyLfqCommit;
|
|
}
|
|
|
|
private:
|
|
GpuCtx* const m_gpuCtx;
|
|
};
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|