1
0
mirror of https://github.com/wolfpld/tracy.git synced 2025-03-20 07:40:02 +08:00

Adding TRACY_VK_USE_SYMBOL_TABLE to allow for use with apps that don't static link the loader

This commit is contained in:
Blake Taylor 2023-06-02 17:28:30 -07:00
parent c57b8994f6
commit b81cdf5b01

View File

@ -5,9 +5,6 @@
#define TracyVkContext(x,y,z,w) nullptr #define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr #define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#if defined VK_EXT_host_query_reset
#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr
#endif
#define TracyVkDestroy(x) #define TracyVkDestroy(x)
#define TracyVkContextName(c,x,y) #define TracyVkContextName(c,x,y)
#define TracyVkNamedZone(c,x,y,z,w) #define TracyVkNamedZone(c,x,y,z,w)
@ -45,6 +42,41 @@ using TracyVkCtx = void*;
namespace tracy namespace tracy
{ {
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define LoadVkDeviceCoreSymbols(Operation) \
Operation(vkBeginCommandBuffer) \
Operation(vkCmdResetQueryPool) \
Operation(vkCmdWriteTimestamp) \
Operation(vkCreateQueryPool) \
Operation(vkDestroyQueryPool) \
Operation(vkEndCommandBuffer) \
Operation(vkGetQueryPoolResults) \
Operation(vkQueueSubmit) \
Operation(vkQueueWaitIdle)
#define LoadVkDeviceExtensionSymbols(Operation) \
Operation(vkGetCalibratedTimestampsEXT) \
Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
#define LoadVkInstanceCoreSymbols(Operation) \
Operation(vkGetPhysicalDeviceProperties)
struct VkSymbolTable
{
#define MAKE_PFN(name) PFN_##name name;
LoadVkDeviceCoreSymbols(MAKE_PFN)
LoadVkDeviceExtensionSymbols(MAKE_PFN)
LoadVkInstanceCoreSymbols(MAKE_PFN)
#undef MAKE_PFN
};
#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature
#else
#define VK_FUNCTION_WRAPPER(callSignature) callSignature
#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature
#endif
class VkCtx class VkCtx
{ {
friend class VkCtxScope; friend class VkCtxScope;
@ -52,7 +84,11 @@ class VkCtx
enum { QueryCount = 64 * 1024 }; enum { QueryCount = 64 * 1024 };
public: public:
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) #if defined TRACY_VK_USE_SYMBOL_TABLE
VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated )
#else
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT)
#endif
: m_device( device ) : m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
@ -60,16 +96,57 @@ public:
, m_tail( 0 ) , m_tail( 0 )
, m_oldCnt( 0 ) , m_oldCnt( 0 )
, m_queryCount( QueryCount ) , m_queryCount( QueryCount )
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT ) #if !defined TRACY_VK_USE_SYMBOL_TABLE
, m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT )
#endif
{ {
assert( m_context != 255 ); assert( m_context != 255 );
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT ) #if defined TRACY_VK_USE_SYMBOL_TABLE
PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr);
if ( calibrated )
{ {
FindAvailableTimeDomains( physdev, _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ); m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT;
} }
CreateQueryPool(); #endif
if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT )
{
uint32_t num;
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr ) );
if( num > 4 ) num = 4;
VkTimeDomainEXT data[4];
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data ) );
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
#if defined _WIN32
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
#endif
for( uint32_t i=0; i<num; i++ )
{
if( data[i] == supportedDomain )
{
m_timeDomain = data[i];
break;
}
}
}
VkPhysicalDeviceProperties prop;
VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) );
const float period = prop.limits.timestampPeriod;
VkQueryPoolCreateInfo poolInfo = {};
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
poolInfo.queryCount = m_queryCount;
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP;
while( VK_FUNCTION_WRAPPER( vkCreateQueryPool( device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) )
{
m_queryCount /= 2;
poolInfo.queryCount = m_queryCount;
}
VkCommandBufferBeginInfo beginInfo = {}; VkCommandBufferBeginInfo beginInfo = {};
beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
@ -80,84 +157,87 @@ public:
submitInfo.commandBufferCount = 1; submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &cmdbuf; submitInfo.pCommandBuffers = &cmdbuf;
vkBeginCommandBuffer( cmdbuf, &beginInfo ); VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
vkEndCommandBuffer( cmdbuf ); VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
vkQueueWaitIdle( queue ); VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
int64_t tcpu, tgpu; int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{ {
vkBeginCommandBuffer( cmdbuf, &beginInfo ); VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) );
vkEndCommandBuffer( cmdbuf ); VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
vkQueueWaitIdle( queue ); VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
tcpu = Profiler::GetTime(); tcpu = Profiler::GetTime();
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ); VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) );
vkBeginCommandBuffer( cmdbuf, &beginInfo ); VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) );
vkEndCommandBuffer( cmdbuf ); VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) );
vkQueueWaitIdle( queue ); VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) );
} }
else else
{ {
FindCalibratedTimestampDeviation(); enum { NumProbes = 32 };
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ )
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ )
{
if( minDeviation > deviation[i] )
{
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
#if defined _WIN32
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
#endif
Calibrate( device, m_prevCalibration, tgpu ); Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime(); tcpu = Profiler::GetTime();
} }
WriteInitialItem( physdev, tcpu, tgpu ); uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); auto item = Profiler::QueueSerial();
} MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#if defined VK_EXT_host_query_reset #ifdef TRACY_ON_DEMAND
/** GetProfiler().DeferItem( *item );
* This alternative constructor does not use command buffers and instead uses functionality from
* VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires
* the physical device to have another time domain apart from DEVICE to be calibrateable.
*/
VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT _vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
{
assert( m_context != 255);
assert( _vkResetQueryPool != nullptr );
assert( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT != nullptr );
assert( _vkGetCalibratedTimestampsEXT != nullptr );
FindAvailableTimeDomains( physdev, _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT );
// We require a host time domain to be available to properly calibrate.
FindCalibratedTimestampDeviation();
int64_t tgpu;
Calibrate( device, m_prevCalibration, tgpu );
int64_t tcpu = Profiler::GetTime();
CreateQueryPool();
_vkResetQueryPool( device, m_query, 0, m_queryCount );
WriteInitialItem( physdev, tcpu, tgpu );
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
}
#endif #endif
Profiler::QueueSerialFinish();
m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount );
}
~VkCtx() ~VkCtx()
{ {
tracy_free( m_res ); tracy_free( m_res );
vkDestroyQueryPool( m_device, m_query, nullptr ); VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) );
} }
void Name( const char* name, uint16_t len ) void Name( const char* name, uint16_t len )
@ -185,7 +265,7 @@ public:
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) if( !GetProfiler().IsConnected() )
{ {
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) );
m_head = m_tail = m_oldCnt = 0; m_head = m_tail = m_oldCnt = 0;
int64_t tgpu; int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
@ -204,7 +284,7 @@ public:
cnt = m_head < m_tail ? m_queryCount - m_tail : m_head - m_tail; cnt = m_head < m_tail ? m_queryCount - m_tail : m_head - m_tail;
} }
if( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) if( VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) )
{ {
m_oldCnt = cnt; m_oldCnt = cnt;
return; return;
@ -239,7 +319,7 @@ public:
} }
} }
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ) );
m_tail += cnt; m_tail += cnt;
if( m_tail == m_queryCount ) m_tail = 0; if( m_tail == m_queryCount ) m_tail = 0;
@ -285,94 +365,32 @@ private:
#endif #endif
} }
tracy_force_inline void CreateQueryPool() #if defined TRACY_VK_USE_SYMBOL_TABLE
void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr )
{ {
VkQueryPoolCreateInfo poolInfo = {}; #define VK_GET_DEVICE_SYMBOL( name ) \
poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; (PFN_##name)deviceProcAddr( m_device, #name );
poolInfo.queryCount = m_queryCount; #define VK_LOAD_DEVICE_SYMBOL( name ) \
poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; m_symbols.name = VK_GET_DEVICE_SYMBOL( name );
while ( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) #define VK_GET_INSTANCE_SYMBOL( name ) \
{ (PFN_##name)instanceProcAddr( instance, #name );
m_queryCount /= 2; #define VK_LOAD_INSTANCE_SYMBOL( name ) \
poolInfo.queryCount = m_queryCount; m_symbols.name = VK_GET_INSTANCE_SYMBOL( name );
}
}
tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL )
{ LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL )
uint32_t num; LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL )
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr ); #undef VK_LOAD_DEVICE_SYMBOL
if(num > 4) num = 4; #undef VK_LOAD_INSTANCE_SYMBOL
VkTimeDomainEXT data[4]; }
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data );
VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1;
#if defined _WIN32
supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT;
#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW
supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT;
#endif #endif
for( uint32_t i=0; i<num; i++ ) {
if(data[i] == supportedDomain) {
m_timeDomain = data[i];
break;
}
}
}
tracy_force_inline void FindCalibratedTimestampDeviation()
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
constexpr size_t NumProbes = 32;
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ ) {
m_vkGetCalibratedTimestampsEXT( m_device, 2, spec, ts, deviation + i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ ) {
if ( minDeviation > deviation[i] ) {
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
#if defined _WIN32
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
#endif
}
tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu )
{
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties( physdev, &prop );
const float period = prop.limits.timestampPeriod;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
MemWrite( &item->gpuNewContext.cpuTime, tcpu );
MemWrite( &item->gpuNewContext.gpuTime, tgpu );
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
Profiler::QueueSerialFinish();
}
VkDevice m_device; VkDevice m_device;
VkQueryPool m_query; VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain; VkTimeDomainEXT m_timeDomain;
#if defined TRACY_VK_USE_SYMBOL_TABLE
VkSymbolTable m_symbols;
#endif
uint64_t m_deviation; uint64_t m_deviation;
int64_t m_qpcToNs; int64_t m_qpcToNs;
int64_t m_prevCalibration; int64_t m_prevCalibration;
@ -403,7 +421,7 @@ public:
m_ctx = ctx; m_ctx = ctx;
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial );
@ -427,7 +445,7 @@ public:
m_ctx = ctx; m_ctx = ctx;
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
@ -451,7 +469,7 @@ public:
m_ctx = ctx; m_ctx = ctx;
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
@ -476,7 +494,7 @@ public:
m_ctx = ctx; m_ctx = ctx;
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); auto item = Profiler::QueueSerialCallstack( Callstack( depth ) );
@ -494,7 +512,7 @@ public:
if( !m_active ) return; if( !m_active ) return;
const auto queryId = m_ctx->NextQueryId(); const auto queryId = m_ctx->NextQueryId();
vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ); CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) );
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial );
@ -512,21 +530,20 @@ private:
VkCtx* m_ctx; VkCtx* m_ctx;
}; };
#if defined TRACY_VK_USE_SYMBOL_TABLE
static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false)
#else
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
return ctx;
}
#if defined VK_EXT_host_query_reset
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct );
return ctx;
}
#endif #endif
{
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
#if defined TRACY_VK_USE_SYMBOL_TABLE
new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated );
#else
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
#endif
return ctx;
}
static inline void DestroyVkContext( VkCtx* ctx ) static inline void DestroyVkContext( VkCtx* ctx )
{ {
@ -538,10 +555,15 @@ static inline void DestroyVkContext( VkCtx* ctx )
using TracyVkCtx = tracy::VkCtx*; using TracyVkCtx = tracy::VkCtx*;
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr );
#else
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); #define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#endif
#if defined TRACY_VK_USE_SYMBOL_TABLE
#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true );
#else
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); #define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#if defined VK_EXT_host_query_reset
#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct );
#endif #endif
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); #define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size ); #define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );