1
0
mirror of https://github.com/wolfpld/tracy.git synced 2025-03-20 07:40:02 +08:00

Store children vectors in a separate data collection.

This reduces per-zone memory cost by 9 bytes if there are no children
and increases it by 4 bytes, if there are children. This is universally
a better solution, as the following data shows:

+++ /home/wolf/desktop/tracy-old/android.tracy +++
Vectors: 2794480
Size 0: 2373070 (84.92%)
Size 1: 70237 (2.51%)
Size 2+: 351173 (12.57%)
+++ /home/wolf/desktop/tracy-old/asset-new.tracy +++
Vectors: 1799227
Size 0: 1482691 (82.41%)
Size 1: 93272 (5.18%)
Size 2+: 223264 (12.41%)
+++ /home/wolf/desktop/tracy-old/asset-new-id.tracy +++
Vectors: 1977996
Size 0: 1640817 (82.95%)
Size 1: 97198 (4.91%)
Size 2+: 239981 (12.13%)
+++ /home/wolf/desktop/tracy-old/asset-old.tracy +++
Vectors: 1782395
Size 0: 1471437 (82.55%)
Size 1: 88813 (4.98%)
Size 2+: 222145 (12.46%)
+++ /home/wolf/desktop/tracy-old/big.tracy +++
Vectors: 180794047
Size 0: 172696094 (95.52%)
Size 1: 2799772 (1.55%)
Size 2+: 5298181 (2.93%)
+++ /home/wolf/desktop/tracy-old/darkrl.tracy +++
Vectors: 12014129
Size 0: 11611324 (96.65%)
Size 1: 134980 (1.12%)
Size 2+: 267825 (2.23%)
+++ /home/wolf/desktop/tracy-old/mem.tracy +++
Vectors: 383097
Size 0: 321932 (84.03%)
Size 1: 854 (0.22%)
Size 2+: 60311 (15.74%)
+++ /home/wolf/desktop/tracy-old/new.tracy +++
Vectors: 77536
Size 0: 63035 (81.30%)
Size 1: 8886 (11.46%)
Size 2+: 5615 (7.24%)
+++ /home/wolf/desktop/tracy-old/selfprofile.tracy +++
Vectors: 22940871
Size 0: 22704868 (98.97%)
Size 1: 73000 (0.32%)
Size 2+: 163003 (0.71%)
+++ /home/wolf/desktop/tracy-old/tbrowser.tracy +++
Vectors: 962682
Size 0: 695380 (72.23%)
Size 1: 43007 (4.47%)
Size 2+: 224295 (23.30%)
+++ /home/wolf/desktop/tracy-old/virtualfile_hc.tracy +++
Vectors: 529170
Size 0: 449386 (84.92%)
Size 1: 15694 (2.97%)
Size 2+: 64090 (12.11%)
+++ /home/wolf/desktop/tracy-old/zfile_hc.tracy +++
Vectors: 264849
Size 0: 220589 (83.29%)
Size 1: 9386 (3.54%)
Size 2+: 34874 (13.17%)
This commit is contained in:
Bartosz Taudul 2018-07-22 16:05:50 +02:00
parent eb1475ebd4
commit 3a934b2ba3
4 changed files with 95 additions and 48 deletions

View File

@ -84,7 +84,7 @@ struct ZoneEvent
StringIdx name;
// This must be last. All above is read/saved as-is.
Vector<ZoneEvent*> child;
int32_t child;
};
enum { ZoneEventSize = sizeof( ZoneEvent ) };

View File

@ -1370,9 +1370,9 @@ int View::DrawZoneLevel( const Vector<ZoneEvent*>& vec, bool hover, double pxns,
m_lastCpu = ev.cpu_start;
}
if( !ev.child.empty() )
if( ev.child >= 0 )
{
const auto d = DispatchZoneLevel( ev.child, hover, pxns, wpos, _offset, depth, yMin, yMax );
const auto d = DispatchZoneLevel( m_worker.GetZoneChildren( ev.child ), hover, pxns, wpos, _offset, depth, yMin, yMax );
if( d > maxdepth ) maxdepth = d;
}
@ -1491,9 +1491,9 @@ int View::SkipZoneLevel( const Vector<ZoneEvent*>& vec, bool hover, double pxns,
{
m_lastCpu = ev.cpu_start;
if( !ev.child.empty() )
if( ev.child >= 0 )
{
const auto d = DispatchZoneLevel( ev.child, hover, pxns, wpos, _offset, depth, yMin, yMax );
const auto d = DispatchZoneLevel( m_worker.GetZoneChildren( ev.child ), hover, pxns, wpos, _offset, depth, yMin, yMax );
if( d > maxdepth ) maxdepth = d;
}
@ -3187,26 +3187,27 @@ void View::DrawZoneInfoWindow()
}
} );
if( !ev.child.empty() )
if( ev.child >= 0 )
{
const auto& children = m_worker.GetZoneChildren( ev.child );
bool expand = ImGui::TreeNode( "Child zones" );
ImGui::SameLine();
ImGui::TextDisabled( "(%s)", RealToString( ev.child.size(), true ) );
ImGui::TextDisabled( "(%s)", RealToString( children.size(), true ) );
if( expand )
{
auto ctt = std::make_unique<uint64_t[]>( ev.child.size() );
auto cti = std::make_unique<uint32_t[]>( ev.child.size() );
auto ctt = std::make_unique<uint64_t[]>( children.size() );
auto cti = std::make_unique<uint32_t[]>( children.size() );
uint64_t ctime = 0;
for( size_t i=0; i<ev.child.size(); i++ )
for( size_t i=0; i<children.size(); i++ )
{
const auto cend = m_worker.GetZoneEnd( *ev.child[i] );
const auto ct = cend - ev.child[i]->start;
const auto cend = m_worker.GetZoneEnd( *children[i] );
const auto ct = cend - children[i]->start;
ctime += ct;
ctt[i] = ct;
cti[i] = uint32_t( i );
}
pdqsort_branchless( cti.get(), cti.get() + ev.child.size(), [&ctt] ( const auto& lhs, const auto& rhs ) { return ctt[lhs] > ctt[rhs]; } );
pdqsort_branchless( cti.get(), cti.get() + children.size(), [&ctt] ( const auto& lhs, const auto& rhs ) { return ctt[lhs] > ctt[rhs]; } );
const auto ty = ImGui::GetTextLineHeight();
ImGui::Columns( 2 );
@ -3216,9 +3217,9 @@ void View::DrawZoneInfoWindow()
sprintf( buf, "%s (%.2f%%)", TimeToString( ztime - ctime ), double( ztime - ctime ) / ztime * 100 );
ImGui::ProgressBar( double( ztime - ctime ) / ztime, ImVec2( -1, ty ), buf );
ImGui::NextColumn();
for( size_t i=0; i<ev.child.size(); i++ )
for( size_t i=0; i<children.size(); i++ )
{
auto& cev = *ev.child[cti[i]];
auto& cev = *children[cti[i]];
const auto txt = m_worker.GetZoneName( cev );
bool b = false;
ImGui::PushID( (int)i );
@ -6207,9 +6208,9 @@ const ZoneEvent* View::GetZoneParent( const ZoneEvent& zone ) const
if( it != timeline->begin() ) --it;
if( zone.end >= 0 && (*it)->start > zone.end ) break;
if( *it == &zone ) return parent;
if( (*it)->child.empty() ) break;
if( (*it)->child < 0 ) break;
parent = *it;
timeline = &parent->child;
timeline = &m_worker.GetZoneChildren( parent->child );
}
}
return nullptr;
@ -6248,8 +6249,8 @@ uint64_t View::GetZoneThread( const ZoneEvent& zone ) const
if( it != timeline->begin() ) --it;
if( zone.end >= 0 && (*it)->start > zone.end ) break;
if( *it == &zone ) return thread->id;
if( (*it)->child.empty() ) break;
timeline = &(*it)->child;
if( (*it)->child < 0 ) break;
timeline = &m_worker.GetZoneChildren( (*it)->child );
}
}
return 0;
@ -6323,8 +6324,8 @@ const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const
if( it != timeline->begin() ) --it;
if( (*it)->start > time || ( (*it)->end >= 0 && (*it)->end < time ) ) return ret;
ret = *it;
if( (*it)->child.empty() ) return ret;
timeline = &(*it)->child;
if( (*it)->child < 0 ) return ret;
timeline = &m_worker.GetZoneChildren( (*it)->child );
}
}

View File

@ -468,13 +468,15 @@ Worker::Worker( FileRead& f, EventType::Type eventMask )
f.Read( tid );
td->id = tid;
f.Read( td->count );
uint64_t tsz;
f.Read( tsz );
if( fileVer <= FileVersion( 0, 3, 2 ) )
{
ReadTimelinePre033( f, td->timeline, CompressThread( tid ), fileVer );
ReadTimelinePre033( f, td->timeline, CompressThread( tid ), tsz, fileVer );
}
else
{
ReadTimeline( f, td->timeline, CompressThread( tid ) );
ReadTimeline( f, td->timeline, CompressThread( tid ), tsz );
}
uint64_t msz;
f.Read( msz );
@ -708,7 +710,6 @@ Worker::~Worker()
for( auto& v : m_data.threads )
{
ZoneCleanup( v->timeline );
v->messages.~Vector();
}
for( auto& v : m_data.gpuData )
@ -783,8 +784,8 @@ int64_t Worker::GetZoneEnd( const ZoneEvent& ev )
for(;;)
{
if( ptr->end >= 0 ) return ptr->end;
if( ptr->child.empty() ) return ptr->start;
ptr = ptr->child.back();
if( ptr->child < 0 ) return ptr->start;
ptr = GetZoneChildren( ptr->child ).back();
}
}
@ -1267,7 +1268,16 @@ void Worker::NewZone( ZoneEvent* zone, uint64_t thread )
}
else
{
td->stack.back()->child.push_back( zone );
auto back = td->stack.back();
if( back->child < 0 )
{
back->child = int32_t( m_data.m_zoneChildren.size() );
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>( zone ) );
}
else
{
m_data.m_zoneChildren[back->child].push_back( zone );
}
td->stack.push_back_non_empty( zone );
}
}
@ -1683,6 +1693,7 @@ void Worker::ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev )
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
zone->callstack = 0;
zone->child = -1;
m_data.lastTime = std::max( m_data.lastTime, zone->start );
@ -1718,6 +1729,7 @@ void Worker::ProcessZoneBeginAllocSrcLoc( const QueueZoneBegin& ev )
assert( ev.cpu == 0xFFFFFFFF || ev.cpu <= std::numeric_limits<int8_t>::max() );
zone->cpu_start = ev.cpu == 0xFFFFFFFF ? -1 : (int8_t)ev.cpu;
zone->callstack = 0;
zone->child = -1;
m_data.lastTime = std::max( m_data.lastTime, zone->start );
@ -1752,11 +1764,14 @@ void Worker::ProcessZoneEnd( const QueueZoneEnd& ev )
it->second.min = std::min( it->second.min, timeSpan );
it->second.max = std::max( it->second.max, timeSpan );
it->second.total += timeSpan;
for( auto& v : zone->child )
if( zone->child >= 0 )
{
for( auto& v : GetZoneChildren( zone->child ) )
{
const auto childSpan = std::max( int64_t( 0 ), v->end - v->start );
timeSpan -= childSpan;
}
}
it->second.selfTotal += timeSpan;
}
#endif
@ -2431,23 +2446,42 @@ void Worker::ReconstructMemAllocPlot()
m_data.memory.plot = plot;
}
void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread )
void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread )
{
uint64_t sz;
f.Read( sz );
if( sz != 0 )
if( sz == 0 )
{
ReadTimeline( f, vec, thread, sz );
zone->child = -1;
}
else
{
zone->child = m_data.m_zoneChildren.size();
// Put placeholder to have proper size of zone children in nested calls
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>() );
// Real data buffer. Can't use placeholder, as the vector can be reallocated
// and the buffer address will change, but the reference won't.
Vector<ZoneEvent*> tmp;
ReadTimeline( f, tmp, thread, sz );
m_data.m_zoneChildren[zone->child] = std::move( tmp );
}
}
void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, int fileVer )
void Worker::ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer )
{
uint64_t sz;
f.Read( sz );
if( sz != 0 )
if( sz == 0 )
{
ReadTimelinePre033( f, vec, thread, sz, fileVer );
zone->child = -1;
}
else
{
zone->child = m_data.m_zoneChildren.size();
m_data.m_zoneChildren.push_back( Vector<ZoneEvent*>() );
Vector<ZoneEvent*> tmp;
ReadTimelinePre033( f, tmp, thread, sz, fileVer );
m_data.m_zoneChildren[zone->child] = std::move( tmp );
}
}
@ -2488,11 +2522,14 @@ void Worker::ReadTimelineUpdateStatistics( ZoneEvent* zone, uint16_t thread )
it->second.min = std::min( it->second.min, timeSpan );
it->second.max = std::max( it->second.max, timeSpan );
it->second.total += timeSpan;
for( auto& v : zone->child )
if( zone->child >= 0 )
{
for( auto& v : GetZoneChildren( zone->child ) )
{
const auto childSpan = std::max( int64_t( 0 ), v->end - v->start );
timeSpan -= childSpan;
}
}
it->second.selfTotal += timeSpan;
}
}
@ -2509,10 +2546,8 @@ void Worker::ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread
{
auto zone = m_slab.Alloc<ZoneEvent>();
vec.push_back_no_space_check( zone );
new( &zone->child ) decltype( zone->child );
f.Read( zone, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
ReadTimeline( f, zone->child, thread );
ReadTimeline( f, zone, thread );
ReadTimelineUpdateStatistics( zone, thread );
}
}
@ -2527,7 +2562,6 @@ void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
{
auto zone = m_slab.Alloc<ZoneEvent>();
vec.push_back_no_space_check( zone );
new( &zone->child ) decltype( zone->child );
if( fileVer <= FileVersion( 0, 3, 1 ) )
{
@ -2541,7 +2575,7 @@ void Worker::ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t
f.Read( zone, 30 );
zone->name.__data = 0;
}
ReadTimelinePre033( f, zone->child, thread, fileVer );
ReadTimelinePre033( f, zone, thread, fileVer );
ReadTimelineUpdateStatistics( zone, thread );
}
}
@ -2763,7 +2797,15 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<ZoneEvent*>& vec )
for( auto& v : vec )
{
f.Write( v, sizeof( ZoneEvent ) - sizeof( ZoneEvent::child ) );
WriteTimeline( f, v->child );
if( v->child < 0 )
{
sz = 0;
f.Write( &sz, sizeof( sz ) );
}
else
{
WriteTimeline( f, GetZoneChildren( v->child ) );
}
}
}

View File

@ -111,6 +111,8 @@ private:
flat_hash_map<uint64_t, uint16_t, nohash<uint64_t>> threadMap;
Vector<uint64_t> threadExpand;
std::pair<uint64_t, uint16_t> threadLast;
std::vector<Vector<ZoneEvent*>> m_zoneChildren;
};
struct MbpsBlock
@ -173,7 +175,7 @@ public:
// GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end
// before its children have ended).
// GetZoneEndDirect() will only return zone's direct timing data, without looking at children.
static int64_t GetZoneEnd( const ZoneEvent& ev );
int64_t GetZoneEnd( const ZoneEvent& ev );
static int64_t GetZoneEnd( const GpuEvent& ev );
static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.end >= 0 ? ev.end : ev.start; }
static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; }
@ -189,6 +191,8 @@ public:
const char* GetZoneName( const GpuEvent& ev ) const;
const char* GetZoneName( const GpuEvent& ev, const SourceLocation& srcloc ) const;
tracy_force_inline const Vector<ZoneEvent*>& GetZoneChildren( int32_t idx ) const { return m_data.m_zoneChildren[idx]; }
std::vector<int32_t> GetMatchingSourceLocation( const char* query ) const;
#ifndef TRACY_NO_STATISTICS
@ -294,8 +298,8 @@ private:
uint16_t CompressThreadReal( uint64_t thread );
uint16_t CompressThreadNew( uint64_t thread );
tracy_force_inline void ReadTimeline( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread );
tracy_force_inline void ReadTimelinePre033( FileRead& f, Vector<ZoneEvent*>& vec, uint16_t thread, int fileVer );
tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, uint16_t thread );
tracy_force_inline void ReadTimelinePre033( FileRead& f, ZoneEvent* zone, uint16_t thread, int fileVer );
tracy_force_inline void ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec );
tracy_force_inline void ReadTimelinePre032( FileRead& f, Vector<GpuEvent*>& vec );