mirror of
https://github.com/wolfpld/tracy.git
synced 2025-03-20 07:40:02 +08:00
Store children vectors in a separate data collection.
This reduces per-zone memory cost by 9 bytes if there are no children and increases it by 4 bytes, if there are children. This is universally a better solution, as the following data shows: +++ /home/wolf/desktop/tracy-old/android.tracy +++ Vectors: 2794480 Size 0: 2373070 (84.92%) Size 1: 70237 (2.51%) Size 2+: 351173 (12.57%) +++ /home/wolf/desktop/tracy-old/asset-new.tracy +++ Vectors: 1799227 Size 0: 1482691 (82.41%) Size 1: 93272 (5.18%) Size 2+: 223264 (12.41%) +++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++ Vectors: 1977996 Size 0: 1640817 (82.95%) Size 1: 97198 (4.91%) Size 2+: 239981 (12.13%) +++ /home/wolf/desktop/tracy-old/asset-old.tracy +++ Vectors: 1782395 Size 0: 1471437 (82.55%) Size 1: 88813 (4.98%) Size 2+: 222145 (12.46%) +++ /home/wolf/desktop/tracy-old/big.tracy +++ Vectors: 180794047 Size 0: 172696094 (95.52%) Size 1: 2799772 (1.55%) Size 2+: 5298181 (2.93%) +++ /home/wolf/desktop/tracy-old/darkrl.tracy +++ Vectors: 12014129 Size 0: 11611324 (96.65%) Size 1: 134980 (1.12%) Size 2+: 267825 (2.23%) +++ /home/wolf/desktop/tracy-old/mem.tracy +++ Vectors: 383097 Size 0: 321932 (84.03%) Size 1: 854 (0.22%) Size 2+: 60311 (15.74%) +++ /home/wolf/desktop/tracy-old/new.tracy +++ Vectors: 77536 Size 0: 63035 (81.30%) Size 1: 8886 (11.46%) Size 2+: 5615 (7.24%) +++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++ Vectors: 22940871 Size 0: 22704868 (98.97%) Size 1: 73000 (0.32%) Size 2+: 163003 (0.71%) +++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++ Vectors: 962682 Size 0: 695380 (72.23%) Size 1: 43007 (4.47%) Size 2+: 224295 (23.30%) +++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++ Vectors: 529170 Size 0: 449386 (84.92%) Size 1: 15694 (2.97%) Size 2+: 64090 (12.11%) +++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++ Vectors: 264849 Size 0: 220589 (83.29%) Size 1: 9386 (3.54%) Size 2+: 34874 (13.17%)
This commit is contained in:
parent
eb1475ebd4
commit
3a934b2ba3
@ -84,7 +84,7 @@ struct ZoneEvent
|
||||
StringIdx name;
|
||||
|
||||
// This must be last. All above is read/saved as-is.
|
||||
Vector<ZoneEvent*> child;
|
||||
int32_t child;
|
||||
};
|
||||
|
||||
enum { ZoneEventSize = sizeof( ZoneEvent ) };
|
||||
|
||||
@ -1370,9 +1370,9 @@ int View::DrawZoneLevel( const Vector<ZoneEvent*>& vec, bool hover, double pxns,
|
||||
m_lastCpu = ev.cpu_start;
|
||||
}
|
||||
|
||||
if( !ev.child.empty() )
|
||||
if( ev.child >= 0 )
|
||||
{
|
||||
const auto d = DispatchZoneLevel( ev.child, hover, pxns, wpos, _offset, depth, yMin, yMax );
|
||||
const auto d = DispatchZoneLevel( m_worker.GetZoneChildren( ev.child ), hover, pxns, wpos, _offset, depth, yMin, yMax );
|
||||
if( d > maxdepth ) maxdepth = d;
|
||||
}
|
||||
|
||||
@ -1491,9 +1491,9 @@ int View::SkipZoneLevel( const Vector<ZoneEvent*>& vec, bool hover, double pxns,
|
||||
{
|
||||
m_lastCpu = ev.cpu_start;
|
||||
|
||||
if( !ev.child.empty() )
|
||||
if( ev.child >= 0 )
|
||||
{
|
||||
const auto d = DispatchZoneLevel( ev.child, hover, pxns, wpos, _offset, depth, yMin, yMax );
|
||||
const auto d = DispatchZoneLevel( m_worker.GetZoneChildren( ev.child ), hover, pxns, wpos, _offset, depth, yMin, yMax );
|
||||
if( d > maxdepth ) maxdepth = d;
|
||||
}
|
||||
|
||||
@ -3187,26 +3187,27 @@ void View::DrawZoneInfoWindow()
|
||||
}
|
||||
} );
|
||||
|
||||
if( !ev.child.empty() )
|
||||
if( ev.child >= 0 )
|
||||
{
|
||||
const auto& children = m_worker.GetZoneChildren( ev.child );
|
||||
bool expand = ImGui::TreeNode( "Child zones" );
|
||||
ImGui::SameLine();
|
||||
ImGui::TextDisabled( "(%s)", RealToString( ev.child.size(), true ) );
|
||||
ImGui::TextDisabled( "(%s)", RealToString( children.size(), true ) );
|
||||
if( expand )
|
||||
{
|
||||
auto ctt = std::make_unique<uint64_t[]>( ev.child.size() );
|
||||
auto cti = std::make_unique<uint32_t[]>( ev.child.size() );
|
||||
auto ctt = std::make_unique<uint64_t[]>( children.size() );
|
||||
auto cti = std::make_unique<uint32_t[]>( children.size() );
|
||||
uint64_t ctime = 0;
|
||||
for( size_t i=0; i<ev.child.size(); i++ )
|
||||
for( size_t i=0; i<children.size(); i++ )
|
||||
{
|
||||
const auto cend = m_worker.GetZoneEnd( *ev.child[i] );
|
||||
const auto ct = cend - ev.child[i]->start;
|
||||
const auto cend = m_worker.GetZoneEnd( *children[i] );
|
||||
const auto ct = cend - children[i]->start;
|
||||
ctime += ct;
|
||||
ctt[i] = ct;
|
||||
cti[i] = uint32_t( i );
|
||||
}
|
||||
|
||||
pdqsort_branchless( cti.get(), cti.get() + ev.child.size(), [&ctt] ( const auto& lhs, const auto& rhs ) { return ctt[lhs] > ctt[rhs]; } );
|
||||
pdqsort_branchless( cti.get(), cti.get() + children.size(), [&ctt] ( const auto& lhs, const auto& rhs ) { return ctt[lhs] > ctt[rhs]; } );
|
||||
|
||||
const auto ty = ImGui::GetTextLineHeight();
|
||||
ImGui::Columns( 2 );
|
||||
@ -3216,9 +3217,9 @@ void View::DrawZoneInfoWindow()
|
||||
sprintf( buf, "%s (%.2f%%)", TimeToString( ztime - ctime ), double( ztime - ctime ) / ztime * 100 );
|
||||
ImGui::ProgressBar( double( ztime - ctime ) / ztime, ImVec2( -1, ty ), buf );
|
||||
ImGui::NextColumn();
|
||||
for( size_t i=0; i<ev.child.size(); i++ )
|
||||
for( size_t i=0; i<children.size(); i++ )
|
||||
{
|
||||
auto& cev = *ev.child[cti[i]];
|
||||
auto& cev = *children[cti[i]];
|
||||
const auto txt = m_worker.GetZoneName( cev );
|
||||
bool b = false;
|
||||
ImGui::PushID( (int)i );
|
||||
@ -6207,9 +6208,9 @@ const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone ) const
|
||||
if( it != timeline->begin() ) --it;
|
||||
if( zone.end >= 0 && (*it)->start > zone.end ) break;
|
||||
if( *it == &zone ) return parent;
|
||||
if( (*it)->child.empty() ) break;
|
||||
if( (*it)->child < 0 ) break;
|
||||
parent = *it;
|
||||
timeline = &parent->child;
|
||||
timeline = &m_worker.GetZoneChildren( parent->child );
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
@ -6248,8 +6249,8 @@ uint64_t View::GetZoneThread( const ZoneEvent& zone ) const
|
||||
if( it != timeline->begin() ) --it;
|
||||
if( zone.end >= 0 && (*it)->start > zone.end ) break;
|
||||
if( *it == &zone ) return thread->id;
|
||||
if( (*it)->child.empty() ) break;
|
||||
timeline = &(*it)->child;
|
||||
if( (*it)->child < 0 ) break;
|
||||
timeline = &m_worker.GetZoneChildren( (*it)->child );
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -6323,8 +6324,8 @@ const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const
|
||||
if( it != timeline->begin() ) --it;
|
||||
if( (*it)->start > time || ( (*it)->end >= 0 && (*it)->end < time ) ) return ret;
|
||||
ret = *it;
|
||||
if( (*it)->child.empty() ) return ret;
|
||||
timeline = &(*it)->child;
|
||||
if( (*it)->child < 0 ) return ret;
|
||||
timeline = &m_worker.GetZoneChildren( (*it)->child );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -468,13 +468,15 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
|
||||
f.Read( tid );
|
||||
td->id = tid;
|
||||
f.Read( td->count );
|
||||
uint64_t tsz;
|
||||
f.Read( tsz );
|
||||
if( fileVer <= FileVersion( 0, 3, 2 ) )
|
||||
{
|
||||
ReadTimelinePre033( f, td->timeline, CompressThread( tid ), fileVer );
|
||||
ReadTimelinePre033( f, td->timeline, CompressThread( tid ), tsz, fileVer );
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadTimeline( f, td->timeline, CompressThread( tid ) );
|
||||
ReadTimeline( f, td->timeline, CompressThread( tid ), tsz );
|
||||
}
|
||||
uint64_t msz;
|
||||
f.Read( msz );
|
||||
@ -708,7 +710,6 @@ Worker::~Worker()
|
||||
|
||||
for( auto& v : m_data.threads )
|
||||
{
|
||||
ZoneCleanup( v->timeline );
|
||||
v->messages.~Vector();
|
||||
}
|
||||
for( auto& v : m_data.gpuData )
|
||||
@ -783,8 +784,8 @@ int64_t Worker::GetZoneEnd( const ZoneEvent& ev )
|
||||
for(;;)
|
||||
{
|
||||
if( ptr->end >= 0 ) return ptr->end;
|
||||
if( ptr->child.empty() ) return ptr->start;
|
||||
ptr = ptr->child.back();
|
||||
if( ptr->child < 0 ) return ptr->start;
|
||||
ptr = GetZoneChildren( ptr->child ).back();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1267,7 +1268,16 @@ void Worker::NewZone( ZoneEvent* zone, uint64_t thread )
|
||||
}
|
||||
else
|
||||
{
|
||||
td->stack.back()->child.push_back( zone );
|
||||
auto back = td->stack.back();
|
||||
if( back->child < 0 )
|
||||
{
|
||||
back->child = int32_t( m_data.m_zoneChildren.size() );
|
||||
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>( zone ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
m_data.m_zoneChildren[back->child].push_back( zone );
|
||||
}
|
||||
td->stack.push_back_non_empty( zone );
|
||||
}
|
||||
}
|
||||
@ -1683,6 +1693,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev )
|
||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
||||
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
||||
zone->callstack = 0;
|
||||
zone->child = -1;
|
||||
|
||||
m_data.lastTime = std::max( m_data.lastTime, zone->start );
|
||||
|
||||
@ -1718,6 +1729,7 @@ void Worker::ProcessZoneBeginAllocSrcLoc( const QueueZoneBegin& ev )
|
||||
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
|
||||
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
|
||||
zone->callstack = 0;
|
||||
zone->child = -1;
|
||||
|
||||
m_data.lastTime = std::max( m_data.lastTime, zone->start );
|
||||
|
||||
@ -1752,11 +1764,14 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev )
|
||||
it->second.min = std::min( it->second.min, timeSpan );
|
||||
it->second.max = std::max( it->second.max, timeSpan );
|
||||
it->second.total += timeSpan;
|
||||
for( auto& v : zone->child )
|
||||
if( zone->child >= 0 )
|
||||
{
|
||||
for( auto& v : GetZoneChildren( zone->child ) )
|
||||
{
|
||||
const auto childSpan = std::max( int64_t( 0 ), v->end - v->start );
|
||||
timeSpan -= childSpan;
|
||||
}
|
||||
}
|
||||
it->second.selfTotal += timeSpan;
|
||||
}
|
||||
#endif
|
||||
@ -2431,23 +2446,42 @@ void Worker::ReconstructMemAllocPlot()
|
||||
m_data.memory.plot = plot;
|
||||
}
|
||||
|
||||
void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread )
|
||||
void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread )
|
||||
{
|
||||
uint64_t sz;
|
||||
f.Read( sz );
|
||||
if( sz != 0 )
|
||||
if( sz == 0 )
|
||||
{
|
||||
ReadTimeline( f, vec, thread, sz );
|
||||
zone->child = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
zone->child = m_data.m_zoneChildren.size();
|
||||
// Put placeholder to have proper size of zone children in nested calls
|
||||
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>() );
|
||||
// Real data buffer. Can't use placeholder, as the vector can be reallocated
|
||||
// and the buffer address will change, but the reference won't.
|
||||
Vector<ZoneEvent*> tmp;
|
||||
ReadTimeline( f, tmp, thread, sz );
|
||||
m_data.m_zoneChildren[zone->child] = std::move( tmp );
|
||||
}
|
||||
}
|
||||
|
||||
void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, int fileVer )
|
||||
void Worker::ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer )
|
||||
{
|
||||
uint64_t sz;
|
||||
f.Read( sz );
|
||||
if( sz != 0 )
|
||||
if( sz == 0 )
|
||||
{
|
||||
ReadTimelinePre033( f, vec, thread, sz, fileVer );
|
||||
zone->child = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
zone->child = m_data.m_zoneChildren.size();
|
||||
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>() );
|
||||
Vector<ZoneEvent*> tmp;
|
||||
ReadTimelinePre033( f, tmp, thread, sz, fileVer );
|
||||
m_data.m_zoneChildren[zone->child] = std::move( tmp );
|
||||
}
|
||||
}
|
||||
|
||||
@ -2488,11 +2522,14 @@ void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread )
|
||||
it->second.min = std::min( it->second.min, timeSpan );
|
||||
it->second.max = std::max( it->second.max, timeSpan );
|
||||
it->second.total += timeSpan;
|
||||
for( auto& v : zone->child )
|
||||
if( zone->child >= 0 )
|
||||
{
|
||||
for( auto& v : GetZoneChildren( zone->child ) )
|
||||
{
|
||||
const auto childSpan = std::max( int64_t( 0 ), v->end - v->start );
|
||||
timeSpan -= childSpan;
|
||||
}
|
||||
}
|
||||
it->second.selfTotal += timeSpan;
|
||||
}
|
||||
}
|
||||
@ -2509,10 +2546,8 @@ void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread
|
||||
{
|
||||
auto zone = m_slab.Alloc<ZoneEvent>();
|
||||
vec.push_back_no_space_check( zone );
|
||||
new( &zone->child ) decltype( zone->child );
|
||||
|
||||
f.Read( zone, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
|
||||
ReadTimeline( f, zone->child, thread );
|
||||
ReadTimeline( f, zone, thread );
|
||||
ReadTimelineUpdateStatistics( zone, thread );
|
||||
}
|
||||
}
|
||||
@ -2527,7 +2562,6 @@ void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
|
||||
{
|
||||
auto zone = m_slab.Alloc<ZoneEvent>();
|
||||
vec.push_back_no_space_check( zone );
|
||||
new( &zone->child ) decltype( zone->child );
|
||||
|
||||
if( fileVer <= FileVersion( 0, 3, 1 ) )
|
||||
{
|
||||
@ -2541,7 +2575,7 @@ void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
|
||||
f.Read( zone, 30 );
|
||||
zone->name.__data = 0;
|
||||
}
|
||||
ReadTimelinePre033( f, zone->child, thread, fileVer );
|
||||
ReadTimelinePre033( f, zone, thread, fileVer );
|
||||
ReadTimelineUpdateStatistics( zone, thread );
|
||||
}
|
||||
}
|
||||
@ -2763,7 +2797,15 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec )
|
||||
for( auto& v : vec )
|
||||
{
|
||||
f.Write( v, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
|
||||
WriteTimeline( f, v->child );
|
||||
if( v->child < 0 )
|
||||
{
|
||||
sz = 0;
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteTimeline( f, GetZoneChildren( v->child ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -111,6 +111,8 @@ private:
|
||||
flat_hash_map<uint64_t, uint16_t, nohash<uint64_t>> threadMap;
|
||||
Vector<uint64_t> threadExpand;
|
||||
std::pair<uint64_t, uint16_t> threadLast;
|
||||
|
||||
std::vector<Vector<ZoneEvent*>> m_zoneChildren;
|
||||
};
|
||||
|
||||
struct MbpsBlock
|
||||
@ -173,7 +175,7 @@ public:
|
||||
// GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end
|
||||
// before its children have ended).
|
||||
// GetZoneEndDirect() will only return zone's direct timing data, without looking at children.
|
||||
static int64_t GetZoneEnd( const ZoneEvent& ev );
|
||||
int64_t GetZoneEnd( const ZoneEvent& ev );
|
||||
static int64_t GetZoneEnd( const GpuEvent& ev );
|
||||
static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.end >= 0 ? ev.end : ev.start; }
|
||||
static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; }
|
||||
@ -189,6 +191,8 @@ public:
|
||||
const char* GetZoneName( const GpuEvent& ev ) const;
|
||||
const char* GetZoneName( const GpuEvent& ev, const SourceLocation& srcloc ) const;
|
||||
|
||||
tracy_force_inline const Vector<ZoneEvent*>& GetZoneChildren( int32_t idx ) const { return m_data.m_zoneChildren[idx]; }
|
||||
|
||||
std::vector<int32_t> GetMatchingSourceLocation( const char* query ) const;
|
||||
|
||||
#ifndef TRACY_NO_STATISTICS
|
||||
@ -294,8 +298,8 @@ private:
|
||||
uint16_t CompressThreadReal( uint64_t thread );
|
||||
uint16_t CompressThreadNew( uint64_t thread );
|
||||
|
||||
tracy_force_inline void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread );
|
||||
tracy_force_inline void ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, int fileVer );
|
||||
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread );
|
||||
tracy_force_inline void ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
|
||||
tracy_force_inline void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec );
|
||||
tracy_force_inline void ReadTimelinePre032( FileRead& f, Vector<GpuEvent*>& vec );
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user