From 12969ee4972c335c0845ff608d7a846938b663c8 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Fri, 2 Aug 2019 20:18:08 +0200 Subject: [PATCH] Track thread context. This change exploits the fact that events are processed in batches originating from a single thread. A single message changing thread context is enough to handle multiple messages, as opposed to inclusion of thread identifier in each message. --- client/TracyProfiler.cpp | 157 +++++++++++++++++---------------------- client/TracyProfiler.hpp | 2 + common/TracyProtocol.hpp | 2 +- common/TracyQueue.hpp | 20 ++--- server/TracyWorker.cpp | 70 +++++++++-------- server/TracyWorker.hpp | 3 + 6 files changed, 120 insertions(+), 134 deletions(-) diff --git a/client/TracyProfiler.cpp b/client/TracyProfiler.cpp index ca4f7c19..86df7500 100644 --- a/client/TracyProfiler.cpp +++ b/client/TracyProfiler.cpp @@ -1324,6 +1324,8 @@ void Profiler::Worker() LZ4_resetStream( (LZ4_stream_t*)m_stream ); m_sock->Send( &welcome, sizeof( welcome ) ); + m_threadCtx = 0; + #ifdef TRACY_ON_DEMAND OnDemandPayloadMessage onDemand; onDemand.frames = m_frameCount.load( std::memory_order_relaxed ); @@ -1671,103 +1673,78 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) const auto sz = GetQueue().try_dequeue_bulk_single( token, m_itemBuf, BulkSize, threadId ); if( sz > 0 ) { + if( threadId != m_threadCtx ) + { + QueueItem item; + MemWrite( &item.hdr.type, QueueType::ThreadContext ); + MemWrite( &item.threadCtx.thread, threadId ); + if( !AppendData( &item, QueueDataSize[(int)QueueType::ThreadContext] ) ) return DequeueStatus::ConnectionLost; + m_threadCtx = threadId; + } + auto end = m_itemBuf + sz; auto item = m_itemBuf; while( item != end ) { uint64_t ptr; const auto idx = MemRead( &item->hdr.idx ); - switch( (QueueType)idx ) + if( idx < (int)QueueType::Terminate ) { - case QueueType::ZoneText: - case QueueType::ZoneName: - MemWrite( &item->zoneText.thread, threadId ); - ptr = MemRead( &item->zoneText.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); - tracy_free( (void*)ptr ); - break; - case QueueType::Message: - case QueueType::MessageColor: - MemWrite( &item->message.thread, threadId ); - ptr = MemRead( &item->message.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); - tracy_free( (void*)ptr ); - break; - case QueueType::MessageAppInfo: - MemWrite( &item->message.thread, threadId ); - ptr = MemRead( &item->message.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); -#ifndef TRACY_ON_DEMAND - tracy_free( (void*)ptr ); -#endif - break; - case QueueType::ZoneBeginAllocSrcLoc: - case QueueType::ZoneBeginAllocSrcLocCallstack: - MemWrite( &item->zoneBegin.thread, threadId ); - ptr = MemRead( &item->zoneBegin.srcloc ); - SendSourceLocationPayload( ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::Callstack: - MemWrite( &item->callstack.thread, threadId ); - ptr = MemRead( &item->callstack.ptr ); - SendCallstackPayload( ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::CallstackAlloc: - MemWrite( &item->callstackAlloc.thread, threadId ); - ptr = MemRead( &item->callstackAlloc.nativePtr ); - CutCallstack( (void*)ptr, "lua_pcall" ); - SendCallstackPayload( ptr ); - tracy_free( (void*)ptr ); - ptr = MemRead( &item->callstackAlloc.ptr ); - SendCallstackAlloc( ptr ); - tracy_free( (void*)ptr ); - break; - case QueueType::FrameImage: - { - ptr = MemRead( &item->frameImage.image ); - const auto w = MemRead( &item->frameImage.w ); - const auto h = MemRead( &item->frameImage.h ); - const auto csz = size_t( w * h / 2 ); - SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); - tracy_free( (void*)ptr ); - break; - } - case QueueType::CrashReport: - MemWrite( &item->crashReport.thread, threadId ); - break; - case QueueType::ZoneBegin: - case QueueType::ZoneBeginCallstack: - MemWrite( &item->zoneBegin.thread, threadId ); - break; - case QueueType::ZoneEnd: - MemWrite( &item->zoneEnd.thread, threadId ); - break; - case QueueType::ZoneValidation: - MemWrite( &item->zoneValidation.thread, threadId ); - break; - case QueueType::LockWait: - case QueueType::LockSharedWait: - MemWrite( &item->lockWait.thread, threadId ); - break; - case QueueType::LockObtain: - case QueueType::LockSharedObtain: - MemWrite( &item->lockObtain.thread, threadId ); - break; - case QueueType::LockRelease: - case QueueType::LockSharedRelease: - MemWrite( &item->lockRelease.thread, threadId ); - break; - case QueueType::LockMark: - MemWrite( &item->lockMark.thread, threadId ); - break; - case QueueType::MessageLiteral: - case QueueType::MessageLiteralColor: - MemWrite( &item->message.thread, threadId ); - break; - default: - break; + switch( (QueueType)idx ) + { + case QueueType::ZoneText: + case QueueType::ZoneName: + ptr = MemRead( &item->zoneText.text ); + SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + tracy_free( (void*)ptr ); + break; + case QueueType::Message: + case QueueType::MessageColor: + ptr = MemRead( &item->message.text ); + SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + tracy_free( (void*)ptr ); + break; + case QueueType::MessageAppInfo: + ptr = MemRead( &item->message.text ); + SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + #ifndef TRACY_ON_DEMAND + tracy_free( (void*)ptr ); + #endif + break; + case QueueType::ZoneBeginAllocSrcLoc: + case QueueType::ZoneBeginAllocSrcLocCallstack: + ptr = MemRead( &item->zoneBegin.srcloc ); + SendSourceLocationPayload( ptr ); + tracy_free( (void*)ptr ); + break; + case QueueType::Callstack: + ptr = MemRead( &item->callstack.ptr ); + SendCallstackPayload( ptr ); + tracy_free( (void*)ptr ); + break; + case QueueType::CallstackAlloc: + ptr = MemRead( &item->callstackAlloc.nativePtr ); + CutCallstack( (void*)ptr, "lua_pcall" ); + SendCallstackPayload( ptr ); + tracy_free( (void*)ptr ); + ptr = MemRead( &item->callstackAlloc.ptr ); + SendCallstackAlloc( ptr ); + tracy_free( (void*)ptr ); + break; + case QueueType::FrameImage: + { + ptr = MemRead( &item->frameImage.image ); + const auto w = MemRead( &item->frameImage.w ); + const auto h = MemRead( &item->frameImage.h ); + const auto csz = size_t( w * h / 2 ); + SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); + tracy_free( (void*)ptr ); + break; + } + default: + assert( false ); + break; + } } if( !AppendData( item, QueueDataSize[idx] ) ) return DequeueStatus::ConnectionLost; item++; diff --git a/client/TracyProfiler.hpp b/client/TracyProfiler.hpp index 1fbdef53..e9334165 100644 --- a/client/TracyProfiler.hpp +++ b/client/TracyProfiler.hpp @@ -550,6 +550,8 @@ private: bool m_noExit; std::atomic m_zoneId; + uint64_t m_threadCtx; + void* m_stream; // LZ4_stream_t* char* m_buffer; int m_bufferOffset; diff --git a/common/TracyProtocol.hpp b/common/TracyProtocol.hpp index 2a6db75f..652f5cc9 100644 --- a/common/TracyProtocol.hpp +++ b/common/TracyProtocol.hpp @@ -9,7 +9,7 @@ namespace tracy { -enum : uint32_t { ProtocolVersion = 13 }; +enum : uint32_t { ProtocolVersion = 14 }; enum : uint32_t { BroadcastVersion = 0 }; using lz4sz_t = uint32_t; diff --git a/common/TracyQueue.hpp b/common/TracyQueue.hpp index 08299c22..8f4cb4f5 100644 --- a/common/TracyQueue.hpp +++ b/common/TracyQueue.hpp @@ -21,6 +21,7 @@ enum class QueueType : uint8_t FrameImage, Terminate, KeepAlive, + ThreadContext, Crash, CrashReport, ZoneBegin, @@ -69,10 +70,14 @@ enum class QueueType : uint8_t #pragma pack( 1 ) +struct QueueThreadContext +{ + uint64_t thread; +}; + struct QueueZoneBegin { int64_t time; - uint64_t thread; uint64_t srcloc; // ptr uint32_t cpu; }; @@ -80,13 +85,11 @@ struct QueueZoneBegin struct QueueZoneEnd { int64_t time; - uint64_t thread; uint32_t cpu; }; struct QueueZoneValidation { - uint64_t thread; uint32_t id; }; @@ -123,7 +126,6 @@ struct QueueSourceLocation struct QueueZoneText { - uint64_t thread; uint64_t text; // ptr }; @@ -152,7 +154,6 @@ struct QueueLockWait { uint32_t id; int64_t time; - uint64_t thread; LockType type; }; @@ -160,20 +161,17 @@ struct QueueLockObtain { uint32_t id; int64_t time; - uint64_t thread; }; struct QueueLockRelease { uint32_t id; int64_t time; - uint64_t thread; }; struct QueueLockMark { uint32_t id; - uint64_t thread; uint64_t srcloc; // ptr }; @@ -200,7 +198,6 @@ struct QueuePlotData struct QueueMessage { int64_t time; - uint64_t thread; uint64_t text; // ptr }; @@ -267,14 +264,12 @@ struct QueueCallstackMemory struct QueueCallstack { uint64_t ptr; - uint64_t thread; }; struct QueueCallstackAlloc { uint64_t ptr; uint64_t nativePtr; - uint64_t thread; }; struct QueueCallstackFrameSize @@ -293,7 +288,6 @@ struct QueueCallstackFrame struct QueueCrashReport { int64_t time; - uint64_t thread; uint64_t text; // ptr }; @@ -317,6 +311,7 @@ struct QueueItem QueueHeader hdr; union { + QueueThreadContext threadCtx; QueueZoneBegin zoneBegin; QueueZoneEnd zoneEnd; QueueZoneValidation zoneValidation; @@ -369,6 +364,7 @@ static const size_t QueueDataSize[] = { // above items must be first sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive + sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index de242e54..e675646e 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -2631,6 +2631,9 @@ bool Worker::Process( const QueueItem& ev ) { switch( ev.hdr.type ) { + case QueueType::ThreadContext: + ProcessThreadContext( ev.threadCtx ); + break; case QueueType::ZoneBegin: ProcessZoneBegin( ev.zoneBegin ); break; @@ -2781,6 +2784,11 @@ bool Worker::Process( const QueueItem& ev ) return m_failure == Failure::None; } +void Worker::ProcessThreadContext( const QueueThreadContext& ev ) +{ + m_threadCtx = ev.thread; +} + void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ) { CheckSourceLocation( ev.srcloc ); @@ -2795,7 +2803,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ) m_data.lastTime = std::max( m_data.lastTime, zone->start ); - NewZone( zone, ev.thread ); + NewZone( zone, m_threadCtx ); } void Worker::ProcessZoneBegin( const QueueZoneBegin& ev ) @@ -2809,7 +2817,7 @@ void Worker::ProcessZoneBeginCallstack( const QueueZoneBegin& ev ) auto zone = m_slab.AllocInit(); ProcessZoneBeginImpl( zone, ev ); - auto& next = m_nextCallstack[ev.thread]; + auto& next = m_nextCallstack[m_threadCtx]; next.type = NextCallstackType::Zone; next.zone = zone; } @@ -2829,7 +2837,7 @@ void Worker::ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBe m_data.lastTime = std::max( m_data.lastTime, zone->start ); - NewZone( zone, ev.thread ); + NewZone( zone, m_threadCtx ); m_pendingSourceLocationPayload.erase( it ); } @@ -2845,17 +2853,17 @@ void Worker::ProcessZoneBeginAllocSrcLocCallstack( const QueueZoneBegin& ev ) auto zone = m_slab.AllocInit(); ProcessZoneBeginAllocSrcLocImpl( zone, ev ); - auto& next = m_nextCallstack[ev.thread]; + auto& next = m_nextCallstack[m_threadCtx]; next.type = NextCallstackType::Zone; next.zone = zone; } void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) { - auto tit = m_threadMap.find( ev.thread ); + auto tit = m_threadMap.find( m_threadCtx ); if( tit == m_threadMap.end() || tit->second->zoneIdStack.empty() ) { - ZoneEndFailure( ev.thread ); + ZoneEndFailure( m_threadCtx ); return; } @@ -2863,7 +2871,7 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) auto zoneId = td->zoneIdStack.back_and_pop(); if( zoneId != td->nextZoneId ) { - ZoneStackFailure( ev.thread, td->stack.back() ); + ZoneStackFailure( m_threadCtx, td->stack.back() ); return; } td->nextZoneId = 0; @@ -2977,7 +2985,7 @@ void Worker::FrameImageTwiceFailure() void Worker::ProcessZoneValidation( const QueueZoneValidation& ev ) { - auto td = NoticeThread( ev.thread ); + auto td = NoticeThread( m_threadCtx ); td->nextZoneId = ev.id; } @@ -3098,10 +3106,10 @@ void Worker::ProcessFrameImage( const QueueFrameImage& ev ) void Worker::ProcessZoneText( const QueueZoneText& ev ) { - auto tit = m_threadMap.find( ev.thread ); + auto tit = m_threadMap.find( m_threadCtx ); if( tit == m_threadMap.end() || tit->second->stack.empty() || tit->second->nextZoneId != tit->second->zoneIdStack.back() ) { - ZoneTextFailure( ev.thread ); + ZoneTextFailure( m_threadCtx ); return; } @@ -3117,10 +3125,10 @@ void Worker::ProcessZoneText( const QueueZoneText& ev ) void Worker::ProcessZoneName( const QueueZoneText& ev ) { - auto tit = m_threadMap.find( ev.thread ); + auto tit = m_threadMap.find( m_threadCtx ); if( tit == m_threadMap.end() || tit->second->stack.empty() || tit->second->nextZoneId != tit->second->zoneIdStack.back() ) { - ZoneNameFailure( ev.thread ); + ZoneNameFailure( m_threadCtx ); return; } @@ -3197,7 +3205,7 @@ void Worker::ProcessLockWait( const QueueLockWait& ev ) lev->type = LockEvent::Type::Wait; lev->srcloc = 0; - InsertLockEvent( *it->second, lev, ev.thread ); + InsertLockEvent( *it->second, lev, m_threadCtx ); } void Worker::ProcessLockObtain( const QueueLockObtain& ev ) @@ -3211,7 +3219,7 @@ void Worker::ProcessLockObtain( const QueueLockObtain& ev ) lev->type = LockEvent::Type::Obtain; lev->srcloc = 0; - InsertLockEvent( lock, lev, ev.thread ); + InsertLockEvent( lock, lev, m_threadCtx ); } void Worker::ProcessLockRelease( const QueueLockRelease& ev ) @@ -3225,7 +3233,7 @@ void Worker::ProcessLockRelease( const QueueLockRelease& ev ) lev->type = LockEvent::Type::Release; lev->srcloc = 0; - InsertLockEvent( lock, lev, ev.thread ); + InsertLockEvent( lock, lev, m_threadCtx ); } void Worker::ProcessLockSharedWait( const QueueLockWait& ev ) @@ -3246,7 +3254,7 @@ void Worker::ProcessLockSharedWait( const QueueLockWait& ev ) lev->type = LockEvent::Type::WaitShared; lev->srcloc = 0; - InsertLockEvent( *it->second, lev, ev.thread ); + InsertLockEvent( *it->second, lev, m_threadCtx ); } void Worker::ProcessLockSharedObtain( const QueueLockObtain& ev ) @@ -3261,7 +3269,7 @@ void Worker::ProcessLockSharedObtain( const QueueLockObtain& ev ) lev->type = LockEvent::Type::ObtainShared; lev->srcloc = 0; - InsertLockEvent( lock, lev, ev.thread ); + InsertLockEvent( lock, lev, m_threadCtx ); } void Worker::ProcessLockSharedRelease( const QueueLockRelease& ev ) @@ -3276,7 +3284,7 @@ void Worker::ProcessLockSharedRelease( const QueueLockRelease& ev ) lev->type = LockEvent::Type::ReleaseShared; lev->srcloc = 0; - InsertLockEvent( lock, lev, ev.thread ); + InsertLockEvent( lock, lev, m_threadCtx ); } void Worker::ProcessLockMark( const QueueLockMark& ev ) @@ -3285,7 +3293,7 @@ void Worker::ProcessLockMark( const QueueLockMark& ev ) auto lit = m_data.lockMap.find( ev.id ); assert( lit != m_data.lockMap.end() ); auto& lockmap = *lit->second; - auto tid = lockmap.threadMap.find( ev.thread ); + auto tid = lockmap.threadMap.find( m_threadCtx ); assert( tid != lockmap.threadMap.end() ); const auto thread = tid->second; auto it = lockmap.timeline.end(); @@ -3346,10 +3354,10 @@ void Worker::ProcessMessage( const QueueMessage& ev ) auto msg = m_slab.Alloc(); msg->time = TscTime( ev.time ); msg->ref = StringRef( StringRef::Type::Idx, it->second.idx ); - msg->thread = ev.thread; + msg->thread = m_threadCtx; msg->color = 0xFFFFFFFF; m_data.lastTime = std::max( m_data.lastTime, msg->time ); - InsertMessageData( msg, ev.thread ); + InsertMessageData( msg, m_threadCtx ); m_pendingCustomStrings.erase( it ); } @@ -3359,10 +3367,10 @@ void Worker::ProcessMessageLiteral( const QueueMessage& ev ) auto msg = m_slab.Alloc(); msg->time = TscTime( ev.time ); msg->ref = StringRef( StringRef::Type::Ptr, ev.text ); - msg->thread = ev.thread; + msg->thread = m_threadCtx; msg->color = 0xFFFFFFFF; m_data.lastTime = std::max( m_data.lastTime, msg->time ); - InsertMessageData( msg, ev.thread ); + InsertMessageData( msg, m_threadCtx ); } void Worker::ProcessMessageColor( const QueueMessageColor& ev ) @@ -3372,10 +3380,10 @@ void Worker::ProcessMessageColor( const QueueMessageColor& ev ) auto msg = m_slab.Alloc(); msg->time = TscTime( ev.time ); msg->ref = StringRef( StringRef::Type::Idx, it->second.idx ); - msg->thread = ev.thread; + msg->thread = m_threadCtx; msg->color = 0xFF000000 | ( ev.r << 16 ) | ( ev.g << 8 ) | ev.b; m_data.lastTime = std::max( m_data.lastTime, msg->time ); - InsertMessageData( msg, ev.thread ); + InsertMessageData( msg, m_threadCtx ); m_pendingCustomStrings.erase( it ); } @@ -3385,10 +3393,10 @@ void Worker::ProcessMessageLiteralColor( const QueueMessageColor& ev ) auto msg = m_slab.Alloc(); msg->time = TscTime( ev.time ); msg->ref = StringRef( StringRef::Type::Ptr, ev.text ); - msg->thread = ev.thread; + msg->thread = m_threadCtx; msg->color = 0xFF000000 | ( ev.r << 16 ) | ( ev.g << 8 ) | ev.b; m_data.lastTime = std::max( m_data.lastTime, msg->time ); - InsertMessageData( msg, ev.thread ); + InsertMessageData( msg, m_threadCtx ); } void Worker::ProcessMessageAppInfo( const QueueMessage& ev ) @@ -3654,7 +3662,7 @@ void Worker::ProcessCallstack( const QueueCallstack& ev ) assert( m_pendingCallstackPtr == ev.ptr ); m_pendingCallstackPtr = 0; - auto nit = m_nextCallstack.find( ev.thread ); + auto nit = m_nextCallstack.find( m_threadCtx ); assert( nit != m_nextCallstack.end() ); auto& next = nit->second; @@ -3680,7 +3688,7 @@ void Worker::ProcessCallstackAlloc( const QueueCallstackAlloc& ev ) assert( m_pendingCallstackPtr == ev.ptr ); m_pendingCallstackPtr = 0; - auto nit = m_nextCallstack.find( ev.thread ); + auto nit = m_nextCallstack.find( m_threadCtx ); assert( nit != m_nextCallstack.end() ); auto& next = nit->second; @@ -3758,10 +3766,10 @@ void Worker::ProcessCrashReport( const QueueCrashReport& ev ) { CheckString( ev.text ); - auto& next = m_nextCallstack[ev.thread]; + auto& next = m_nextCallstack[m_threadCtx]; next.type = NextCallstackType::Crash; - m_data.crashEvent.thread = ev.thread; + m_data.crashEvent.thread = m_threadCtx; m_data.crashEvent.time = TscTime( ev.time ); m_data.crashEvent.message = ev.text; m_data.crashEvent.callstack = 0; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 4b1e44ef..2915c0de 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -361,6 +361,7 @@ private: tracy_force_inline bool DispatchProcess( const QueueItem& ev, char*& ptr ); tracy_force_inline bool Process( const QueueItem& ev ); + tracy_force_inline void ProcessThreadContext( const QueueThreadContext& ev ); tracy_force_inline void ProcessZoneBegin( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginCallstack( const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLoc( const QueueZoneBegin& ev ); @@ -551,6 +552,8 @@ private: flat_hash_map m_frameImageStaging; char* m_frameImageBuffer = nullptr; size_t m_frameImageBufferSize = 0; + + uint64_t m_threadCtx = 0; }; }