From b81cdf5b01e81c7f5584718c239e6c9f990b29f8 Mon Sep 17 00:00:00 2001 From: Blake Taylor Date: Fri, 2 Jun 2023 17:28:30 -0700 Subject: [PATCH] Adding TRACY_VK_USE_SYMBOL_TABLE to allow for use with apps that don't static link the loader --- public/tracy/TracyVulkan.hpp | 360 +++++++++++++++++++---------------- 1 file changed, 191 insertions(+), 169 deletions(-) diff --git a/public/tracy/TracyVulkan.hpp b/public/tracy/TracyVulkan.hpp index 694f6abf..99161b6b 100644 --- a/public/tracy/TracyVulkan.hpp +++ b/public/tracy/TracyVulkan.hpp @@ -5,9 +5,6 @@ #define TracyVkContext(x,y,z,w) nullptr #define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr -#if defined VK_EXT_host_query_reset -#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr -#endif #define TracyVkDestroy(x) #define TracyVkContextName(c,x,y) #define TracyVkNamedZone(c,x,y,z,w) @@ -45,6 +42,41 @@ using TracyVkCtx = void*; namespace tracy { +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define LoadVkDeviceCoreSymbols(Operation) \ + Operation(vkBeginCommandBuffer) \ + Operation(vkCmdResetQueryPool) \ + Operation(vkCmdWriteTimestamp) \ + Operation(vkCreateQueryPool) \ + Operation(vkDestroyQueryPool) \ + Operation(vkEndCommandBuffer) \ + Operation(vkGetQueryPoolResults) \ + Operation(vkQueueSubmit) \ + Operation(vkQueueWaitIdle) + +#define LoadVkDeviceExtensionSymbols(Operation) \ + Operation(vkGetCalibratedTimestampsEXT) \ + Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) + +#define LoadVkInstanceCoreSymbols(Operation) \ + Operation(vkGetPhysicalDeviceProperties) + +struct VkSymbolTable +{ +#define MAKE_PFN(name) PFN_##name name; + LoadVkDeviceCoreSymbols(MAKE_PFN) + LoadVkDeviceExtensionSymbols(MAKE_PFN) + LoadVkInstanceCoreSymbols(MAKE_PFN) +#undef MAKE_PFN +}; + +#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature +#else +#define VK_FUNCTION_WRAPPER(callSignature) callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature +#endif + class VkCtx { friend class VkCtxScope; @@ -52,7 +84,11 @@ class VkCtx enum { QueryCount = 64 * 1024 }; public: - VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT) +#endif : m_device( device ) , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) @@ -60,16 +96,57 @@ public: , m_tail( 0 ) , m_oldCnt( 0 ) , m_queryCount( QueryCount ) - , m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif { assert( m_context != 255 ); - if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT ) +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + if ( calibrated ) { - FindAvailableTimeDomains( physdev, _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ); + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; } - CreateQueryPool(); +#endif + + if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT ) + { + uint32_t num; + VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr ) ); + if( num > 4 ) num = 4; + VkTimeDomainEXT data[4]; + VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data ) ); + VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1; +#if defined _WIN32 + supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; +#endif + for( uint32_t i=0; i deviation[i] ) + { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + +#if defined _WIN32 + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); +#endif + Calibrate( device, m_prevCalibration, tgpu ); tcpu = Profiler::GetTime(); } - WriteInitialItem( physdev, tcpu, tgpu ); + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; - m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); - } + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); + MemWrite( &item->gpuNewContext.period, period ); + MemWrite( &item->gpuNewContext.context, m_context ); + MemWrite( &item->gpuNewContext.flags, flags ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); -#if defined VK_EXT_host_query_reset - /** - * This alternative constructor does not use command buffers and instead uses functionality from - * VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires - * the physical device to have another time domain apart from DEVICE to be calibrateable. - */ - VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT _vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) - : m_device( device ) - , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) - , m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) ) - , m_head( 0 ) - , m_tail( 0 ) - , m_oldCnt( 0 ) - , m_queryCount( QueryCount ) - , m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT ) - { - assert( m_context != 255); - assert( _vkResetQueryPool != nullptr ); - assert( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT != nullptr ); - assert( _vkGetCalibratedTimestampsEXT != nullptr ); - - FindAvailableTimeDomains( physdev, _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ); - - // We require a host time domain to be available to properly calibrate. - FindCalibratedTimestampDeviation(); - int64_t tgpu; - Calibrate( device, m_prevCalibration, tgpu ); - int64_t tcpu = Profiler::GetTime(); - - CreateQueryPool(); - _vkResetQueryPool( device, m_query, 0, m_queryCount ); - - WriteInitialItem( physdev, tcpu, tgpu ); - - m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); - } +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); #endif + Profiler::QueueSerialFinish(); + + m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + } ~VkCtx() { tracy_free( m_res ); - vkDestroyQueryPool( m_device, m_query, nullptr ); + VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) ); } void Name( const char* name, uint16_t len ) @@ -185,7 +265,7 @@ public: #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) { - vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); m_head = m_tail = m_oldCnt = 0; int64_t tgpu; if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); @@ -204,7 +284,7 @@ public: cnt = m_head < m_tail ? m_queryCount - m_tail : m_head - m_tail; } - if( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) + if( VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, m_tail, cnt, sizeof( int64_t ) * m_queryCount, m_res, sizeof( int64_t ), VK_QUERY_RESULT_64_BIT ) == VK_NOT_READY ) ) { m_oldCnt = cnt; return; @@ -239,7 +319,7 @@ public: } } - vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ) ); m_tail += cnt; if( m_tail == m_queryCount ) m_tail = 0; @@ -285,94 +365,32 @@ private: #endif } - tracy_force_inline void CreateQueryPool() +#if defined TRACY_VK_USE_SYMBOL_TABLE + void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) { - VkQueryPoolCreateInfo poolInfo = {}; - poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - poolInfo.queryCount = m_queryCount; - poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; - while ( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) - { - m_queryCount /= 2; - poolInfo.queryCount = m_queryCount; - } - } +#define VK_GET_DEVICE_SYMBOL( name ) \ + (PFN_##name)deviceProcAddr( m_device, #name ); +#define VK_LOAD_DEVICE_SYMBOL( name ) \ + m_symbols.name = VK_GET_DEVICE_SYMBOL( name ); +#define VK_GET_INSTANCE_SYMBOL( name ) \ + (PFN_##name)instanceProcAddr( instance, #name ); +#define VK_LOAD_INSTANCE_SYMBOL( name ) \ + m_symbols.name = VK_GET_INSTANCE_SYMBOL( name ); - tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) - { - uint32_t num; - _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr ); - if(num > 4) num = 4; - VkTimeDomainEXT data[4]; - _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data ); - VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1; -#if defined _WIN32 - supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; -#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW - supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; + LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) +#undef VK_LOAD_DEVICE_SYMBOL +#undef VK_LOAD_INSTANCE_SYMBOL + } #endif - for( uint32_t i=0; i deviation[i] ) { - minDeviation = deviation[i]; - } - } - m_deviation = minDeviation * 3 / 2; - -#if defined _WIN32 - m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); -#endif - } - - tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu ) - { - uint8_t flags = 0; - if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; - - VkPhysicalDeviceProperties prop; - vkGetPhysicalDeviceProperties( physdev, &prop ); - const float period = prop.limits.timestampPeriod; - - auto item = Profiler::QueueSerial(); - MemWrite( &item->hdr.type, QueueType::GpuNewContext ); - MemWrite( &item->gpuNewContext.cpuTime, tcpu ); - MemWrite( &item->gpuNewContext.gpuTime, tgpu ); - memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); - MemWrite( &item->gpuNewContext.period, period ); - MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.flags, flags ); - MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); - } VkDevice m_device; VkQueryPool m_query; VkTimeDomainEXT m_timeDomain; +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkSymbolTable m_symbols; +#endif uint64_t m_deviation; int64_t m_qpcToNs; int64_t m_prevCalibration; @@ -403,7 +421,7 @@ public: m_ctx = ctx; const auto queryId = ctx->NextQueryId(); - vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); @@ -427,7 +445,7 @@ public: m_ctx = ctx; const auto queryId = ctx->NextQueryId(); - vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); @@ -451,7 +469,7 @@ public: m_ctx = ctx; const auto queryId = ctx->NextQueryId(); - vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); auto item = Profiler::QueueSerial(); @@ -476,7 +494,7 @@ public: m_ctx = ctx; const auto queryId = ctx->NextQueryId(); - vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); @@ -494,7 +512,7 @@ public: if( !m_active ) return; const auto queryId = m_ctx->NextQueryId(); - vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) ); auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); @@ -512,21 +530,20 @@ private: VkCtx* m_ctx; }; +#if defined TRACY_VK_USE_SYMBOL_TABLE +static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false) +#else static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) -{ - auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); - new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); - return ctx; -} - -#if defined VK_EXT_host_query_reset -static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) -{ - auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); - new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct ); - return ctx; -} #endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated ); +#else + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif + return ctx; +} static inline void DestroyVkContext( VkCtx* ctx ) { @@ -538,10 +555,15 @@ static inline void DestroyVkContext( VkCtx* ctx ) using TracyVkCtx = tracy::VkCtx*; +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ); +#else #define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#endif +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true ); +#else #define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); -#if defined VK_EXT_host_query_reset -#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct ); #endif #define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); #define TracyVkContextName( ctx, name, size ) ctx->Name( name, size );