From 4f42a75df2eb7135dc8288ff14d9daedd0cdedeb Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 11:53:33 -0700 Subject: [PATCH 1/8] compressing redundant code --- public/tracy/TracyD3D12.hpp | 120 ++++++++++++------------------------ 1 file changed, 39 insertions(+), 81 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index b175316b..153f436a 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -79,6 +79,14 @@ namespace tracy int64_t m_prevCalibration = 0; int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() }; + tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + public: D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) : m_device(device) @@ -167,12 +175,7 @@ namespace tracy MemWrite(&item->gpuNewContext.context, m_context); MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); - -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem(*item); -#endif - - Profiler::QueueSerialFinish(); + SubmitQueueItem(item); m_initialized = true; } @@ -201,10 +204,7 @@ namespace tracy MemWrite( &item->gpuContextNameFat.context, m_context ); MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); MemWrite( &item->gpuContextNameFat.size, len ); -#ifdef TRACY_ON_DEMAND - GetProfiler().DeferItem( *item ); -#endif - Profiler::QueueSerialFinish(); + SubmitQueueItem(item); } void Collect() @@ -318,10 +318,20 @@ namespace tracy ID3D12GraphicsCommandList* m_cmdList = nullptr; uint32_t m_queryId = 0; // Used for tracking in nested zones. - public: - tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active) + tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation) + { + MemWrite(&item->hdr.type, type); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, srcLocation); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); + MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active) #ifdef TRACY_ON_DEMAND - : m_active(active && GetProfiler().IsConnected()) + : m_active(active&& GetProfiler().IsConnected()) #else : m_active(active) #endif @@ -331,100 +341,49 @@ namespace tracy m_ctx = ctx; m_cmdList = cmdList; - m_queryId = ctx->NextQueryId(); - cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); + m_queryId = m_ctx->NextQueryId(); + m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); + } + + public: + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast(srcLocation)); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - - Profiler::QueueSerialFinish(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); } tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active) -#ifdef TRACY_ON_DEMAND - : m_active(active&& GetProfiler().IsConnected()) -#else - : m_active(active) -#endif + : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; - m_ctx = ctx; - m_cmdList = cmdList; - - m_queryId = ctx->NextQueryId(); - cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); - auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, reinterpret_cast(srcLocation)); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - - Profiler::QueueSerialFinish(); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcLocation)); } tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active) -#ifdef TRACY_ON_DEMAND - : m_active(active&& GetProfiler().IsConnected()) -#else - : m_active(active) -#endif + : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; - m_ctx = ctx; - m_cmdList = cmdList; - - m_queryId = ctx->NextQueryId(); - cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); - const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - - Profiler::QueueSerialFinish(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); } tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active) -#ifdef TRACY_ON_DEMAND - : m_active(active&& GetProfiler().IsConnected()) -#else - : m_active(active) -#endif + : D3D12ZoneScope(ctx, cmdList, active) { if (!m_active) return; - m_ctx = ctx; - m_cmdList = cmdList; - - m_queryId = ctx->NextQueryId(); - cmdList->EndQuery(ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); - const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); - MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial); - MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); - MemWrite(&item->gpuZoneBegin.srcloc, sourceLocation); - MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); - MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); - MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); - - Profiler::QueueSerialFinish(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); } tracy_force_inline ~D3D12ZoneScope() @@ -440,7 +399,6 @@ namespace tracy MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); MemWrite(&item->gpuZoneEnd.queryId, static_cast(queryId)); MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); - Profiler::QueueSerialFinish(); m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t)); From cf38d6a102a809559190ddccdf455b83242e2bae Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 11:56:52 -0700 Subject: [PATCH 2/8] reworking context id initialization --- public/tracy/TracyD3D12.hpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index 153f436a..bca11809 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -59,11 +59,9 @@ namespace tracy static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even! - bool m_initialized = false; - ID3D12Device* m_device = nullptr; ID3D12CommandQueue* m_queue = nullptr; - uint8_t m_context; + uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? Microsoft::WRL::ComPtr m_queryHeap; Microsoft::WRL::ComPtr m_readbackBuffer; @@ -91,7 +89,6 @@ namespace tracy D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) : m_device(device) , m_queue(queue) - , m_context(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed)) { // Verify we support timestamp queries on this queue. @@ -166,18 +163,19 @@ namespace tracy assert(false && "Failed to create payload fence."); } + // all checked: ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuNewContext); MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp); MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp); memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); - MemWrite(&item->gpuNewContext.context, m_context); + MemWrite(&item->gpuNewContext.context, GetId()); MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); SubmitQueueItem(item); - - m_initialized = true; } void NewFrame() @@ -201,7 +199,7 @@ namespace tracy auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuContextName ); - MemWrite( &item->gpuContextNameFat.context, m_context ); + MemWrite( &item->gpuContextNameFat.context, GetId()); MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); MemWrite( &item->gpuContextNameFat.size, len ); SubmitQueueItem(item); @@ -255,7 +253,7 @@ namespace tracy MemWrite(&item->hdr.type, QueueType::GpuTime); MemWrite(&item->gpuTime.gpuTime, timestamp); MemWrite(&item->gpuTime.queryId, static_cast(queryId)); - MemWrite(&item->gpuTime.context, m_context); + MemWrite(&item->gpuTime.context, GetId()); Profiler::QueueSerialFinish(); } @@ -288,7 +286,7 @@ namespace tracy MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta); - MemWrite(&item->gpuCalibration.context, m_context); + MemWrite(&item->gpuCalibration.context, GetId()); Profiler::QueueSerialFinish(); } @@ -307,7 +305,7 @@ namespace tracy tracy_force_inline uint8_t GetId() const { - return m_context; + return m_contextId; } }; From 6454b0bd65f5ec968cd4c8c2156278ec9cdc3cba Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 12:01:21 -0700 Subject: [PATCH 3/8] removing windows/com header bloat --- public/tracy/TracyD3D12.hpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index bca11809..74102b04 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -40,7 +40,6 @@ using TracyD3D12Ctx = void*; #include #include #include -#include #include namespace tracy @@ -62,8 +61,8 @@ namespace tracy ID3D12Device* m_device = nullptr; ID3D12CommandQueue* m_queue = nullptr; uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? - Microsoft::WRL::ComPtr m_queryHeap; - Microsoft::WRL::ComPtr m_readbackBuffer; + ID3D12QueryHeap* m_queryHeap = nullptr; + ID3D12Resource* m_readbackBuffer = nullptr; // In-progress payload. uint32_t m_queryLimit = MaxQueries; @@ -71,7 +70,7 @@ namespace tracy uint32_t m_previousQueryCounter = 0; uint32_t m_activePayload = 0; - Microsoft::WRL::ComPtr m_payloadFence; + ID3D12Fence* m_payloadFence = nullptr; std::queue m_payloadQueue; int64_t m_prevCalibration = 0; @@ -178,6 +177,14 @@ namespace tracy SubmitQueueItem(item); } + ~D3D12QueueCtx() + { + m_payloadFence->Release(); + m_readbackBuffer->Release(); + m_queryHeap->Release(); + } + + void NewFrame() { uint32_t queryCounter = m_queryCounter.exchange(0); @@ -189,7 +196,7 @@ namespace tracy m_previousQueryCounter -= m_queryLimit; } - m_queue->Signal(m_payloadFence.Get(), ++m_activePayload); + m_queue->Signal(m_payloadFence, ++m_activePayload); } void Name( const char* name, uint16_t len ) @@ -340,7 +347,7 @@ namespace tracy m_cmdList = cmdList; m_queryId = m_ctx->NextQueryId(); - m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); } public: @@ -389,7 +396,7 @@ namespace tracy if (!m_active) return; const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot. - m_cmdList->EndQuery(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, queryId); + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId); auto* item = Profiler::QueueSerial(); MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); @@ -399,7 +406,7 @@ namespace tracy MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); Profiler::QueueSerialFinish(); - m_cmdList->ResolveQueryData(m_ctx->m_queryHeap.Get(), D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer.Get(), m_queryId * sizeof(uint64_t)); + m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t)); } }; From f9d36060df722bf842cb7593be753a9ea819baaf Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 12:08:15 -0700 Subject: [PATCH 4/8] reworking clock calibration --- public/tracy/TracyD3D12.hpp | 111 ++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 50 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index 74102b04..e9adaa43 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -73,8 +73,37 @@ namespace tracy ID3D12Fence* m_payloadFence = nullptr; std::queue m_payloadQueue; - int64_t m_prevCalibration = 0; - int64_t m_qpcToNs = int64_t{ 1000000000 / GetFrequencyQpc() }; + UINT64 m_prevCalibrationTicksCPU = 0; + + void RecalibrateClocks() + { + UINT64 cpuTimestamp; + UINT64 gpuTimestamp; + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + assert(false && "failed to obtain queue clock calibration counters."); + return; + } + + int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU; + if (cpuDeltaTicks > 0) + { + static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc(); + int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick; + // Save the device cpu timestamp, not the Tracy profiler timestamp: + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS); + MemWrite(&item->gpuCalibration.context, GetId()); + SubmitQueueItem(item); + } + } tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item) { @@ -99,26 +128,6 @@ namespace tracy assert(Success && featureData.CopyQueueTimestampQueriesSupported && "Platform does not support profiling of copy queues."); } - uint64_t timestampFrequency; - - if (FAILED(queue->GetTimestampFrequency(×tampFrequency))) - { - assert(false && "Failed to get timestamp frequency."); - } - - uint64_t cpuTimestamp; - uint64_t gpuTimestamp; - - if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) - { - assert(false && "Failed to get queue clock calibration."); - } - - // Save the device cpu timestamp, not the profiler's timestamp. - m_prevCalibration = cpuTimestamp * m_qpcToNs; - - cpuTimestamp = Profiler::GetTime(); - D3D12_QUERY_HEAP_DESC heapDesc{}; heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP; heapDesc.Count = m_queryLimit; @@ -162,6 +171,33 @@ namespace tracy assert(false && "Failed to create payload fence."); } + float period = [queue]() + { + uint64_t timestampFrequency; + if (FAILED(queue->GetTimestampFrequency(×tampFrequency))) + { + return 0.0f; + } + return static_cast( 1E+09 / static_cast(timestampFrequency) ); + }(); + + if (period == 0.0f) + { + assert(false && "Failed to get timestamp frequency."); + } + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + assert(false && "Failed to get queue clock calibration."); + } + + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + // all checked: ready to roll m_contextId = GetGpuCtxCounter().fetch_add(1); @@ -169,8 +205,8 @@ namespace tracy MemWrite(&item->hdr.type, QueueType::GpuNewContext); MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp); MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp); - memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); - MemWrite(&item->gpuNewContext.period, 1E+09f / static_cast(timestampFrequency)); + MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); MemWrite(&item->gpuNewContext.context, GetId()); MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); @@ -271,32 +307,7 @@ namespace tracy m_readbackBuffer->Unmap(0, nullptr); // Recalibrate to account for drift. - - uint64_t cpuTimestamp; - uint64_t gpuTimestamp; - - if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) - { - assert(false && "Failed to get queue clock calibration."); - } - - cpuTimestamp *= m_qpcToNs; - - const auto cpuDelta = cpuTimestamp - m_prevCalibration; - if (cpuDelta > 0) - { - m_prevCalibration = cpuTimestamp; - cpuTimestamp = Profiler::GetTime(); - - auto* item = Profiler::QueueSerial(); - MemWrite(&item->hdr.type, QueueType::GpuCalibration); - MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); - MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); - MemWrite(&item->gpuCalibration.cpuDelta, cpuDelta); - MemWrite(&item->gpuCalibration.context, GetId()); - - Profiler::QueueSerialFinish(); - } + RecalibrateClocks(); } private: From 2e3ae9588254eb641639230c0cbe019b5124487c Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 12:28:07 -0700 Subject: [PATCH 5/8] reworking TracyD3D12 macros --- public/tracy/TracyD3D12.hpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index e9adaa43..ed250e20 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -445,25 +445,29 @@ using TracyD3D12Ctx = tracy::D3D12QueueCtx*; #define TracyD3D12NewFrame(ctx) ctx->NewFrame(); +#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone +#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine) +#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; + #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, TRACY_CALLSTACK, true) -# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, TRACY_CALLSTACK, true) -# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), TRACY_CALLSTACK, active }; -# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), TRACY_CALLSTACK, active }; +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; # define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) #else -# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, ___tracy_gpu_zone, cmdList, name, true) -# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, ___tracy_gpu_zone, cmdList, name, color, true) -# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), active }; -# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), active }; +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; # define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active }; #endif #ifdef TRACY_HAS_CALLSTACK -# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, ___tracy_gpu_zone, cmdList, name, depth, true) -# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, ___tracy_gpu_zone, cmdList, name, color, depth, true) -# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), depth, active }; -# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location, TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyConcat(__tracy_gpu_source_location, TracyLine), depth, active }; +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; # define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active }; #else # define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name) From 42b088d085b35cd3f02afec3db8bcb83285a478c Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 12:46:03 -0700 Subject: [PATCH 6/8] Collect pending timestamps upon context destruction --- public/tracy/TracyD3D12.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index ed250e20..c9414f27 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -215,6 +215,11 @@ namespace tracy ~D3D12QueueCtx() { + ZoneScopedC(Color::Red4); + // collect all pending timestamps + while (m_payloadFence->GetCompletedValue() != m_activePayload) + /* busy-wait ... */; + Collect(); m_payloadFence->Release(); m_readbackBuffer->Release(); m_queryHeap->Release(); From 7b2acd2c6d22faf5cf855ccfacd04ad567c4f2e5 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 12:53:42 -0700 Subject: [PATCH 7/8] improved error reporting --- public/tracy/TracyD3D12.hpp | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index c9414f27..b375b2fe 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -42,6 +42,8 @@ using TracyD3D12Ctx = void*; #include #include +#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + namespace tracy { @@ -81,8 +83,7 @@ namespace tracy UINT64 gpuTimestamp; if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) { - assert(false && "failed to obtain queue clock calibration counters."); - return; + TracyD3D12Panic("failed to obtain queue clock calibration counters.", return); } int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU; @@ -124,8 +125,11 @@ namespace tracy { D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{}; - bool Success = SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData))); - assert(Success && featureData.CopyQueueTimestampQueriesSupported && "Platform does not support profiling of copy queues."); + HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)); + if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE)) + { + TracyD3D12Panic("Platform does not support profiling of copy queues.", return); + } } D3D12_QUERY_HEAP_DESC heapDesc{}; @@ -163,12 +167,12 @@ namespace tracy if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer)))) { - assert(false && "Failed to create query readback buffer."); + TracyD3D12Panic("Failed to create query readback buffer.", return); } if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence)))) { - assert(false && "Failed to create payload fence."); + TracyD3D12Panic("Failed to create payload fence.", return); } float period = [queue]() @@ -183,14 +187,14 @@ namespace tracy if (period == 0.0f) { - assert(false && "Failed to get timestamp frequency."); + TracyD3D12Panic("Failed to get timestamp frequency.", return); } uint64_t cpuTimestamp; uint64_t gpuTimestamp; if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) { - assert(false && "Failed to get queue clock calibration."); + TracyD3D12Panic("Failed to get queue clock calibration.", return); } // Save the device cpu timestamp, not the profiler's timestamp. @@ -282,7 +286,7 @@ namespace tracy if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping))) { - assert(false && "Failed to map readback buffer."); + TracyD3D12Panic("Failed to map readback buffer.", return); } auto* timestampData = static_cast(readbackBufferMapping); @@ -319,7 +323,11 @@ namespace tracy tracy_force_inline uint32_t NextQueryId() { uint32_t queryCounter = m_queryCounter.fetch_add(2); - assert(queryCounter < m_queryLimit && "Submitted too many GPU queries! Consider increasing MaxQueries."); + if (queryCounter >= m_queryLimit) + { + TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries."); + // #TODO: consider returning an invalid id or sentinel value here + } const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit; @@ -442,6 +450,8 @@ namespace tracy } +#undef TracyD3D12Panic + using TracyD3D12Ctx = tracy::D3D12QueueCtx*; #define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue); From 996987b966a136ac832a142690833f4697b76446 Mon Sep 17 00:00:00 2001 From: Marcos Slomp Date: Mon, 11 Sep 2023 12:59:48 -0700 Subject: [PATCH 8/8] scoping --- public/tracy/TracyD3D12.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/public/tracy/TracyD3D12.hpp b/public/tracy/TracyD3D12.hpp index b375b2fe..41567937 100644 --- a/public/tracy/TracyD3D12.hpp +++ b/public/tracy/TracyD3D12.hpp @@ -58,8 +58,6 @@ namespace tracy { friend class D3D12ZoneScope; - static constexpr uint32_t MaxQueries = 64 * 1024; // Queries are begin and end markers, so we can store half as many total time durations. Must be even! - ID3D12Device* m_device = nullptr; ID3D12CommandQueue* m_queue = nullptr; uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? @@ -67,7 +65,7 @@ namespace tracy ID3D12Resource* m_readbackBuffer = nullptr; // In-progress payload. - uint32_t m_queryLimit = MaxQueries; + uint32_t m_queryLimit = 0; std::atomic m_queryCounter = 0; uint32_t m_previousQueryCounter = 0; @@ -132,6 +130,9 @@ namespace tracy } } + static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs + m_queryLimit = MaxQueries; + D3D12_QUERY_HEAP_DESC heapDesc{}; heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP; heapDesc.Count = m_queryLimit;