From 3a934b2ba3a3e7610733675ba57f9d3c24bc8c5d Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Sun, 22 Jul 2018 16:05:50 +0200 Subject: [PATCH] Store children vectors in a separate data collection. This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%) --- server/TracyEvent.hpp | 2 +- server/TracyView.cpp | 41 +++++++++---------- server/TracyWorker.cpp | 90 +++++++++++++++++++++++++++++++----------- server/TracyWorker.hpp | 10 +++-- 4 files changed, 95 insertions(+), 48 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 157b82e4..dbb2fcf5 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -84,7 +84,7 @@ struct ZoneEvent StringIdx name; // This must be last. All above is read/saved as-is. - Vector child; + int32_t child; }; enum { ZoneEventSize = sizeof( ZoneEvent ) }; diff --git a/server/TracyView.cpp b/server/TracyView.cpp index cf57e3b5..a796d6fb 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -1370,9 +1370,9 @@ int View::DrawZoneLevel( const Vector& vec, bool hover, double pxns, m_lastCpu = ev.cpu_start; } - if( !ev.child.empty() ) + if( ev.child >= 0 ) { - const auto d = DispatchZoneLevel( ev.child, hover, pxns, wpos, _offset, depth, yMin, yMax ); + const auto d = DispatchZoneLevel( m_worker.GetZoneChildren( ev.child ), hover, pxns, wpos, _offset, depth, yMin, yMax ); if( d > maxdepth ) maxdepth = d; } @@ -1491,9 +1491,9 @@ int View::SkipZoneLevel( const Vector& vec, bool hover, double pxns, { m_lastCpu = ev.cpu_start; - if( !ev.child.empty() ) + if( ev.child >= 0 ) { - const auto d = DispatchZoneLevel( ev.child, hover, pxns, wpos, _offset, depth, yMin, yMax ); + const auto d = DispatchZoneLevel( m_worker.GetZoneChildren( ev.child ), hover, pxns, wpos, _offset, depth, yMin, yMax ); if( d > maxdepth ) maxdepth = d; } @@ -3187,26 +3187,27 @@ void View::DrawZoneInfoWindow() } } ); - if( !ev.child.empty() ) + if( ev.child >= 0 ) { + const auto& children = m_worker.GetZoneChildren( ev.child ); bool expand = ImGui::TreeNode( "Child zones" ); ImGui::SameLine(); - ImGui::TextDisabled( "(%s)", RealToString( ev.child.size(), true ) ); + ImGui::TextDisabled( "(%s)", RealToString( children.size(), true ) ); if( expand ) { - auto ctt = std::make_unique( ev.child.size() ); - auto cti = std::make_unique( ev.child.size() ); + auto ctt = std::make_unique( children.size() ); + auto cti = std::make_unique( children.size() ); uint64_t ctime = 0; - for( size_t i=0; istart; + const auto cend = m_worker.GetZoneEnd( *children[i] ); + const auto ct = cend - children[i]->start; ctime += ct; ctt[i] = ct; cti[i] = uint32_t( i ); } - pdqsort_branchless( cti.get(), cti.get() + ev.child.size(), [&ctt] ( const auto& lhs, const auto& rhs ) { return ctt[lhs] > ctt[rhs]; } ); + pdqsort_branchless( cti.get(), cti.get() + children.size(), [&ctt] ( const auto& lhs, const auto& rhs ) { return ctt[lhs] > ctt[rhs]; } ); const auto ty = ImGui::GetTextLineHeight(); ImGui::Columns( 2 ); @@ -3216,9 +3217,9 @@ void View::DrawZoneInfoWindow() sprintf( buf, "%s (%.2f%%)", TimeToString( ztime - ctime ), double( ztime - ctime ) / ztime * 100 ); ImGui::ProgressBar( double( ztime - ctime ) / ztime, ImVec2( -1, ty ), buf ); ImGui::NextColumn(); - for( size_t i=0; ibegin() ) --it; if( zone.end >= 0 && (*it)->start > zone.end ) break; if( *it == &zone ) return parent; - if( (*it)->child.empty() ) break; + if( (*it)->child < 0 ) break; parent = *it; - timeline = &parent->child; + timeline = &m_worker.GetZoneChildren( parent->child ); } } return nullptr; @@ -6248,8 +6249,8 @@ uint64_t View::GetZoneThread( const ZoneEvent& zone ) const if( it != timeline->begin() ) --it; if( zone.end >= 0 && (*it)->start > zone.end ) break; if( *it == &zone ) return thread->id; - if( (*it)->child.empty() ) break; - timeline = &(*it)->child; + if( (*it)->child < 0 ) break; + timeline = &m_worker.GetZoneChildren( (*it)->child ); } } return 0; @@ -6323,8 +6324,8 @@ const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const if( it != timeline->begin() ) --it; if( (*it)->start > time || ( (*it)->end >= 0 && (*it)->end < time ) ) return ret; ret = *it; - if( (*it)->child.empty() ) return ret; - timeline = &(*it)->child; + if( (*it)->child < 0 ) return ret; + timeline = &m_worker.GetZoneChildren( (*it)->child ); } } diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 8fec69c1..31b7a53b 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -468,13 +468,15 @@ Worker::Worker( FileRead& f, EventType::Type eventMask ) f.Read( tid ); td->id = tid; f.Read( td->count ); + uint64_t tsz; + f.Read( tsz ); if( fileVer <= FileVersion( 0, 3, 2 ) ) { - ReadTimelinePre033( f, td->timeline, CompressThread( tid ), fileVer ); + ReadTimelinePre033( f, td->timeline, CompressThread( tid ), tsz, fileVer ); } else { - ReadTimeline( f, td->timeline, CompressThread( tid ) ); + ReadTimeline( f, td->timeline, CompressThread( tid ), tsz ); } uint64_t msz; f.Read( msz ); @@ -708,7 +710,6 @@ Worker::~Worker() for( auto& v : m_data.threads ) { - ZoneCleanup( v->timeline ); v->messages.~Vector(); } for( auto& v : m_data.gpuData ) @@ -783,8 +784,8 @@ int64_t Worker::GetZoneEnd( const ZoneEvent& ev ) for(;;) { if( ptr->end >= 0 ) return ptr->end; - if( ptr->child.empty() ) return ptr->start; - ptr = ptr->child.back(); + if( ptr->child < 0 ) return ptr->start; + ptr = GetZoneChildren( ptr->child ).back(); } } @@ -1267,7 +1268,16 @@ void Worker::NewZone( ZoneEvent* zone, uint64_t thread ) } else { - td->stack.back()->child.push_back( zone ); + auto back = td->stack.back(); + if( back->child < 0 ) + { + back->child = int32_t( m_data.m_zoneChildren.size() ); + m_data.m_zoneChildren.push_back( Vector( zone ) ); + } + else + { + m_data.m_zoneChildren[back->child].push_back( zone ); + } td->stack.push_back_non_empty( zone ); } } @@ -1683,6 +1693,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ) assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits::max() ); zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu; zone->callstack = 0; + zone->child = -1; m_data.lastTime = std::max( m_data.lastTime, zone->start ); @@ -1718,6 +1729,7 @@ void Worker::ProcessZoneBeginAllocSrcLoc( const QueueZoneBegin& ev ) assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits::max() ); zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu; zone->callstack = 0; + zone->child = -1; m_data.lastTime = std::max( m_data.lastTime, zone->start ); @@ -1752,10 +1764,13 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev ) it->second.min = std::min( it->second.min, timeSpan ); it->second.max = std::max( it->second.max, timeSpan ); it->second.total += timeSpan; - for( auto& v : zone->child ) + if( zone->child >= 0 ) { - const auto childSpan = std::max( int64_t( 0 ), v->end - v->start ); - timeSpan -= childSpan; + for( auto& v : GetZoneChildren( zone->child ) ) + { + const auto childSpan = std::max( int64_t( 0 ), v->end - v->start ); + timeSpan -= childSpan; + } } it->second.selfTotal += timeSpan; } @@ -2431,23 +2446,42 @@ void Worker::ReconstructMemAllocPlot() m_data.memory.plot = plot; } -void Worker::ReadTimeline( FileRead& f, Vector& vec, uint16_t thread ) +void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread ) { uint64_t sz; f.Read( sz ); - if( sz != 0 ) + if( sz == 0 ) { - ReadTimeline( f, vec, thread, sz ); + zone->child = -1; + } + else + { + zone->child = m_data.m_zoneChildren.size(); + // Put placeholder to have proper size of zone children in nested calls + m_data.m_zoneChildren.push_back( Vector() ); + // Real data buffer. Can't use placeholder, as the vector can be reallocated + // and the buffer address will change, but the reference won't. + Vector tmp; + ReadTimeline( f, tmp, thread, sz ); + m_data.m_zoneChildren[zone->child] = std::move( tmp ); } } -void Worker::ReadTimelinePre033( FileRead& f, Vector& vec, uint16_t thread, int fileVer ) +void Worker::ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer ) { uint64_t sz; f.Read( sz ); - if( sz != 0 ) + if( sz == 0 ) { - ReadTimelinePre033( f, vec, thread, sz, fileVer ); + zone->child = -1; + } + else + { + zone->child = m_data.m_zoneChildren.size(); + m_data.m_zoneChildren.push_back( Vector() ); + Vector tmp; + ReadTimelinePre033( f, tmp, thread, sz, fileVer ); + m_data.m_zoneChildren[zone->child] = std::move( tmp ); } } @@ -2488,10 +2522,13 @@ void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread ) it->second.min = std::min( it->second.min, timeSpan ); it->second.max = std::max( it->second.max, timeSpan ); it->second.total += timeSpan; - for( auto& v : zone->child ) + if( zone->child >= 0 ) { - const auto childSpan = std::max( int64_t( 0 ), v->end - v->start ); - timeSpan -= childSpan; + for( auto& v : GetZoneChildren( zone->child ) ) + { + const auto childSpan = std::max( int64_t( 0 ), v->end - v->start ); + timeSpan -= childSpan; + } } it->second.selfTotal += timeSpan; } @@ -2509,10 +2546,8 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint16_t thread { auto zone = m_slab.Alloc(); vec.push_back_no_space_check( zone ); - new( &zone->child ) decltype( zone->child ); - f.Read( zone, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) ); - ReadTimeline( f, zone->child, thread ); + ReadTimeline( f, zone, thread ); ReadTimelineUpdateStatistics( zone, thread ); } } @@ -2527,7 +2562,6 @@ void Worker::ReadTimelinePre033( FileRead& f, Vector& vec, uint16_t { auto zone = m_slab.Alloc(); vec.push_back_no_space_check( zone ); - new( &zone->child ) decltype( zone->child ); if( fileVer <= FileVersion( 0, 3, 1 ) ) { @@ -2541,7 +2575,7 @@ void Worker::ReadTimelinePre033( FileRead& f, Vector& vec, uint16_t f.Read( zone, 30 ); zone->name.__data = 0; } - ReadTimelinePre033( f, zone->child, thread, fileVer ); + ReadTimelinePre033( f, zone, thread, fileVer ); ReadTimelineUpdateStatistics( zone, thread ); } } @@ -2763,7 +2797,15 @@ void Worker::WriteTimeline( FileWrite& f, const Vector& vec ) for( auto& v : vec ) { f.Write( v, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) ); - WriteTimeline( f, v->child ); + if( v->child < 0 ) + { + sz = 0; + f.Write( &sz, sizeof( sz ) ); + } + else + { + WriteTimeline( f, GetZoneChildren( v->child ) ); + } } } diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 1fb5ef4f..10aec31c 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -111,6 +111,8 @@ private: flat_hash_map> threadMap; Vector threadExpand; std::pair threadLast; + + std::vector> m_zoneChildren; }; struct MbpsBlock @@ -173,7 +175,7 @@ public: // GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end // before its children have ended). // GetZoneEndDirect() will only return zone's direct timing data, without looking at children. - static int64_t GetZoneEnd( const ZoneEvent& ev ); + int64_t GetZoneEnd( const ZoneEvent& ev ); static int64_t GetZoneEnd( const GpuEvent& ev ); static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.end >= 0 ? ev.end : ev.start; } static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; } @@ -189,6 +191,8 @@ public: const char* GetZoneName( const GpuEvent& ev ) const; const char* GetZoneName( const GpuEvent& ev, const SourceLocation& srcloc ) const; + tracy_force_inline const Vector& GetZoneChildren( int32_t idx ) const { return m_data.m_zoneChildren[idx]; } + std::vector GetMatchingSourceLocation( const char* query ) const; #ifndef TRACY_NO_STATISTICS @@ -294,8 +298,8 @@ private: uint16_t CompressThreadReal( uint64_t thread ); uint16_t CompressThreadNew( uint64_t thread ); - tracy_force_inline void ReadTimeline( FileRead& f, Vector& vec, uint16_t thread ); - tracy_force_inline void ReadTimelinePre033( FileRead& f, Vector& vec, uint16_t thread, int fileVer ); + tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread ); + tracy_force_inline void ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer ); tracy_force_inline void ReadTimeline( FileRead& f, Vector& vec ); tracy_force_inline void ReadTimelinePre032( FileRead& f, Vector& vec );