mirror of
https://github.com/wolfpld/tracy.git
synced 2025-03-20 07:40:02 +08:00
Pack child into GPU start/end in GpuEvent (saves 4 bytes).
long 5152 MB -> 5061 MB
This commit is contained in:
parent
7319293081
commit
25b610a36f
@ -222,17 +222,22 @@ struct GpuEvent
|
||||
tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); }
|
||||
tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; }
|
||||
tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); }
|
||||
tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; }
|
||||
tracy_force_inline void SetGpuStart( int64_t gpuStart ) { assert( gpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); }
|
||||
tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; }
|
||||
tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); }
|
||||
tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); }
|
||||
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); }
|
||||
tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); }
|
||||
tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); }
|
||||
tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); }
|
||||
tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); }
|
||||
|
||||
uint64_t _cpuStart_srcloc;
|
||||
uint64_t _cpuEnd_thread;
|
||||
int64_t gpuStart;
|
||||
int64_t gpuEnd;
|
||||
uint64_t _gpuStart_child1;
|
||||
uint64_t _gpuEnd_child2;
|
||||
Int24 callstack;
|
||||
int32_t child;
|
||||
};
|
||||
|
||||
enum { GpuEventSize = sizeof( GpuEvent ) };
|
||||
|
||||
@ -2106,9 +2106,9 @@ void View::DrawZones()
|
||||
for( auto& td : v->threadData )
|
||||
{
|
||||
assert( !td.second.timeline.empty() );
|
||||
if( td.second.timeline.front()->gpuStart >= 0 )
|
||||
if( td.second.timeline.front()->GpuStart() >= 0 )
|
||||
{
|
||||
const auto begin = td.second.timeline.front()->gpuStart;
|
||||
const auto begin = td.second.timeline.front()->GpuStart();
|
||||
const auto drift = GpuDrift( v );
|
||||
if( !singleThread ) offset += sstep;
|
||||
const auto partDepth = DispatchGpuZoneLevel( td.second.timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift );
|
||||
@ -2176,7 +2176,7 @@ void View::DrawZones()
|
||||
int64_t t1 = std::numeric_limits<int64_t>::min();
|
||||
for( auto& td : v->threadData )
|
||||
{
|
||||
const auto _t0 = td.second.timeline.front()->gpuStart;
|
||||
const auto _t0 = td.second.timeline.front()->GpuStart();
|
||||
if( _t0 >= 0 )
|
||||
{
|
||||
// FIXME
|
||||
@ -2241,7 +2241,7 @@ void View::DrawZones()
|
||||
int64_t t0 = std::numeric_limits<int64_t>::max();
|
||||
for( auto& td : v->threadData )
|
||||
{
|
||||
const auto _t0 = td.second.timeline.front()->gpuStart;
|
||||
const auto _t0 = td.second.timeline.front()->GpuStart();
|
||||
if( _t0 >= 0 )
|
||||
{
|
||||
t0 = std::min( t0, _t0 );
|
||||
@ -3318,10 +3318,10 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
const auto delay = m_worker.GetDelay();
|
||||
const auto resolution = m_worker.GetResolution();
|
||||
// cast to uint64_t, so that unended zones (end = -1) are still drawn
|
||||
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
|
||||
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
|
||||
if( it == vec.end() ) return depth;
|
||||
|
||||
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuStart, begin, drift ) < (uint64_t)r; } );
|
||||
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuStart(), begin, drift ) < (uint64_t)r; } );
|
||||
if( it == zitend ) return depth;
|
||||
|
||||
const auto w = ImGui::GetWindowContentRegionWidth() - 1;
|
||||
@ -3339,7 +3339,7 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
const auto color = GetZoneColor( ev );
|
||||
auto end = m_worker.GetZoneEnd( ev );
|
||||
if( end == std::numeric_limits<int64_t>::max() ) break;
|
||||
const auto start = AdjustGpuTime( ev.gpuStart, begin, drift );
|
||||
const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift );
|
||||
end = AdjustGpuTime( end, begin, drift );
|
||||
const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 );
|
||||
if( zsz < MinVisSize )
|
||||
@ -3352,7 +3352,7 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
for(;;)
|
||||
{
|
||||
const auto prevIt = it;
|
||||
it = std::lower_bound( it, zitend, std::max<int64_t>( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
|
||||
it = std::lower_bound( it, zitend, std::max<int64_t>( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
|
||||
if( it == prevIt ) ++it;
|
||||
num += std::distance( prevIt, it );
|
||||
if( it == zitend ) break;
|
||||
@ -3409,9 +3409,9 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
}
|
||||
else
|
||||
{
|
||||
if( ev.child >= 0 )
|
||||
if( ev.Child() >= 0 )
|
||||
{
|
||||
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.child ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
|
||||
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
|
||||
if( d > maxdepth ) maxdepth = d;
|
||||
}
|
||||
|
||||
@ -3433,7 +3433,7 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
DrawTextContrast( draw, wpos + ImVec2( std::max( std::max( 0., px0 ), std::min( double( w - tsz.x ), x ) ), offset ), 0xFFFFFFFF, zoneName );
|
||||
ImGui::PopClipRect();
|
||||
}
|
||||
else if( ev.gpuStart == ev.gpuEnd )
|
||||
else if( ev.GpuStart() == ev.GpuEnd() )
|
||||
{
|
||||
DrawTextContrast( draw, wpos + ImVec2( px0 + ( px1 - px0 - tsz.x ) * 0.5, offset ), 0xFFFFFFFF, zoneName );
|
||||
}
|
||||
@ -3478,10 +3478,10 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
const auto delay = m_worker.GetDelay();
|
||||
const auto resolution = m_worker.GetResolution();
|
||||
// cast to uint64_t, so that unended zones (end = -1) are still drawn
|
||||
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
|
||||
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
|
||||
if( it == vec.end() ) return depth;
|
||||
|
||||
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuStart, begin, drift ) < (uint64_t)r; } );
|
||||
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuStart(), begin, drift ) < (uint64_t)r; } );
|
||||
if( it == zitend ) return depth;
|
||||
|
||||
depth++;
|
||||
@ -3492,7 +3492,7 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
auto& ev = **it;
|
||||
auto end = m_worker.GetZoneEnd( ev );
|
||||
if( end == std::numeric_limits<int64_t>::max() ) break;
|
||||
const auto start = AdjustGpuTime( ev.gpuStart, begin, drift );
|
||||
const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift );
|
||||
end = AdjustGpuTime( end, begin, drift );
|
||||
const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 );
|
||||
if( zsz < MinVisSize )
|
||||
@ -3502,7 +3502,7 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
for(;;)
|
||||
{
|
||||
const auto prevIt = it;
|
||||
it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
|
||||
it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
|
||||
if( it == prevIt ) ++it;
|
||||
if( it == zitend ) break;
|
||||
const auto nend = AdjustGpuTime( m_worker.GetZoneEnd( **it ), begin, drift );
|
||||
@ -3514,9 +3514,9 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
|
||||
}
|
||||
else
|
||||
{
|
||||
if( ev.child >= 0 )
|
||||
if( ev.Child() >= 0 )
|
||||
{
|
||||
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.child ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
|
||||
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
|
||||
if( d > maxdepth ) maxdepth = d;
|
||||
}
|
||||
++it;
|
||||
@ -6489,9 +6489,9 @@ void View::DrawGpuInfoWindow()
|
||||
ImGui::BeginChild( "##gpuinfo" );
|
||||
|
||||
const auto end = m_worker.GetZoneEnd( ev );
|
||||
const auto ztime = end - ev.gpuStart;
|
||||
const auto ztime = end - ev.GpuStart();
|
||||
const auto selftime = GetZoneSelfTime( ev );
|
||||
TextFocused( "Time from start of program:", TimeToString( ev.gpuStart ) );
|
||||
TextFocused( "Time from start of program:", TimeToString( ev.GpuStart() ) );
|
||||
TextFocused( "GPU execution time:", TimeToString( ztime ) );
|
||||
TextFocused( "GPU self time:", TimeToString( selftime ) );
|
||||
if( ztime != 0 )
|
||||
@ -6503,15 +6503,15 @@ void View::DrawGpuInfoWindow()
|
||||
auto ctx = GetZoneCtx( ev );
|
||||
if( !ctx )
|
||||
{
|
||||
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) );
|
||||
TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
|
||||
assert( td != ctx->threadData.end() );
|
||||
const auto begin = td->second.timeline.front()->gpuStart;
|
||||
const auto begin = td->second.timeline.front()->GpuStart();
|
||||
const auto drift = GpuDrift( ctx );
|
||||
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) );
|
||||
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) );
|
||||
}
|
||||
|
||||
ImGui::Separator();
|
||||
@ -6542,7 +6542,7 @@ void View::DrawGpuInfoWindow()
|
||||
{
|
||||
ImGui::SameLine();
|
||||
}
|
||||
ImGui::TextDisabled( "(%s) %s:%i", TimeToString( m_worker.GetZoneEnd( *v ) - v->gpuStart ), fileName, srcloc.line );
|
||||
ImGui::TextDisabled( "(%s) %s:%i", TimeToString( m_worker.GetZoneEnd( *v ) - v->GpuStart() ), fileName, srcloc.line );
|
||||
ImGui::PopID();
|
||||
if( ImGui::IsItemClicked( 1 ) )
|
||||
{
|
||||
@ -6570,9 +6570,9 @@ void View::DrawGpuInfoWindow()
|
||||
}
|
||||
} );
|
||||
|
||||
if( ev.child >= 0 )
|
||||
if( ev.Child() >= 0 )
|
||||
{
|
||||
const auto& children = m_worker.GetGpuChildren( ev.child );
|
||||
const auto& children = m_worker.GetGpuChildren( ev.Child() );
|
||||
bool expand = ImGui::TreeNode( "Child zones" );
|
||||
ImGui::SameLine();
|
||||
ImGui::TextDisabled( "(%s)", RealToString( children.size(), true ) );
|
||||
@ -6599,7 +6599,7 @@ void View::DrawGpuInfoWindow()
|
||||
{
|
||||
const auto& child = *children[i];
|
||||
const auto cend = m_worker.GetZoneEnd( child );
|
||||
const auto ct = cend - child.gpuStart;
|
||||
const auto ct = cend - child.GpuStart();
|
||||
const auto srcloc = child.SrcLoc();
|
||||
ctime += ct;
|
||||
|
||||
@ -6689,7 +6689,7 @@ void View::DrawGpuInfoWindow()
|
||||
{
|
||||
const auto& child = *children[cgr.v[i]];
|
||||
const auto cend = m_worker.GetZoneEnd( child );
|
||||
const auto ct = cend - child.gpuStart;
|
||||
const auto ct = cend - child.GpuStart();
|
||||
ctt[i] = ct;
|
||||
cti[i] = uint32_t( i );
|
||||
}
|
||||
@ -6739,7 +6739,7 @@ void View::DrawGpuInfoWindow()
|
||||
{
|
||||
const auto& child = *children[i];
|
||||
const auto cend = m_worker.GetZoneEnd( child );
|
||||
const auto ct = cend - child.gpuStart;
|
||||
const auto ct = cend - child.GpuStart();
|
||||
ctime += ct;
|
||||
ctt[i] = ct;
|
||||
cti[i] = uint32_t( i );
|
||||
@ -6902,7 +6902,7 @@ void View::DrawOptions()
|
||||
size_t lastidx = 0;
|
||||
for( size_t j=timeline.size()-1; j > 0; j-- )
|
||||
{
|
||||
if( timeline[j]->gpuEnd >= 0 )
|
||||
if( timeline[j]->GpuEnd() >= 0 )
|
||||
{
|
||||
lastidx = j;
|
||||
break;
|
||||
@ -6921,7 +6921,7 @@ void View::DrawOptions()
|
||||
const auto p1 = dist( gen );
|
||||
if( p0 != p1 )
|
||||
{
|
||||
slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
|
||||
slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
|
||||
}
|
||||
}
|
||||
while( idx < NumSlopes );
|
||||
@ -12903,19 +12903,19 @@ void View::ZoomToZone( const ZoneEvent& ev )
|
||||
void View::ZoomToZone( const GpuEvent& ev )
|
||||
{
|
||||
const auto end = m_worker.GetZoneEnd( ev );
|
||||
if( end - ev.gpuStart <= 0 ) return;
|
||||
if( end - ev.GpuStart() <= 0 ) return;
|
||||
auto ctx = GetZoneCtx( ev );
|
||||
if( !ctx )
|
||||
{
|
||||
ZoomToRange( ev.gpuStart, end );
|
||||
ZoomToRange( ev.GpuStart(), end );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
|
||||
assert( td != ctx->threadData.end() );
|
||||
const auto begin = td->second.timeline.front()->gpuStart;
|
||||
const auto begin = td->second.timeline.front()->GpuStart();
|
||||
const auto drift = GpuDrift( ctx );
|
||||
ZoomToRange( AdjustGpuTime( ev.gpuStart, begin, drift ), AdjustGpuTime( end, begin, drift ) );
|
||||
ZoomToRange( AdjustGpuTime( ev.GpuStart(), begin, drift ), AdjustGpuTime( end, begin, drift ) );
|
||||
}
|
||||
}
|
||||
|
||||
@ -13096,7 +13096,7 @@ void View::ZoneTooltip( const GpuEvent& ev )
|
||||
const auto tid = GetZoneThread( ev );
|
||||
const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() );
|
||||
const auto end = m_worker.GetZoneEnd( ev );
|
||||
const auto ztime = end - ev.gpuStart;
|
||||
const auto ztime = end - ev.GpuStart();
|
||||
const auto selftime = GetZoneSelfTime( ev );
|
||||
|
||||
ImGui::BeginTooltip();
|
||||
@ -13121,15 +13121,15 @@ void View::ZoneTooltip( const GpuEvent& ev )
|
||||
auto ctx = GetZoneCtx( ev );
|
||||
if( !ctx )
|
||||
{
|
||||
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) );
|
||||
TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
|
||||
assert( td != ctx->threadData.end() );
|
||||
const auto begin = td->second.timeline.front()->gpuStart;
|
||||
const auto begin = td->second.timeline.front()->GpuStart();
|
||||
const auto drift = GpuDrift( ctx );
|
||||
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) );
|
||||
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) );
|
||||
}
|
||||
|
||||
ImGui::EndTooltip();
|
||||
@ -13248,13 +13248,13 @@ const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const
|
||||
if( timeline->empty() ) continue;
|
||||
for(;;)
|
||||
{
|
||||
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } );
|
||||
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } );
|
||||
if( it != timeline->begin() ) --it;
|
||||
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
|
||||
if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break;
|
||||
if( *it == &zone ) return parent;
|
||||
if( (*it)->child < 0 ) break;
|
||||
if( (*it)->Child() < 0 ) break;
|
||||
parent = *it;
|
||||
timeline = &m_worker.GetGpuChildren( parent->child );
|
||||
timeline = &m_worker.GetGpuChildren( parent->Child() );
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -13297,12 +13297,12 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const
|
||||
if( timeline->empty() ) continue;
|
||||
for(;;)
|
||||
{
|
||||
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } );
|
||||
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } );
|
||||
if( it != timeline->begin() ) --it;
|
||||
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
|
||||
if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break;
|
||||
if( *it == &zone ) return ctx->thread;
|
||||
if( (*it)->child < 0 ) break;
|
||||
timeline = &m_worker.GetGpuChildren( (*it)->child );
|
||||
if( (*it)->Child() < 0 ) break;
|
||||
timeline = &m_worker.GetGpuChildren( (*it)->Child() );
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@ -13323,12 +13323,12 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const
|
||||
if( timeline->empty() ) continue;
|
||||
for(;;)
|
||||
{
|
||||
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } );
|
||||
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } );
|
||||
if( it != timeline->begin() ) --it;
|
||||
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
|
||||
if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break;
|
||||
if( *it == &zone ) return ctx;
|
||||
if( (*it)->child < 0 ) break;
|
||||
timeline = &m_worker.GetGpuChildren( (*it)->child );
|
||||
if( (*it)->Child() < 0 ) break;
|
||||
timeline = &m_worker.GetGpuChildren( (*it)->Child() );
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -13476,11 +13476,11 @@ int64_t View::GetZoneChildTime( const ZoneEvent& zone )
|
||||
int64_t View::GetZoneChildTime( const GpuEvent& zone )
|
||||
{
|
||||
int64_t time = 0;
|
||||
if( zone.child >= 0 )
|
||||
if( zone.Child() >= 0 )
|
||||
{
|
||||
for( auto& v : m_worker.GetGpuChildren( zone.child ) )
|
||||
for( auto& v : m_worker.GetGpuChildren( zone.Child() ) )
|
||||
{
|
||||
const auto childSpan = std::max( int64_t( 0 ), v->gpuEnd - v->gpuStart );
|
||||
const auto childSpan = std::max( int64_t( 0 ), v->GpuEnd() - v->GpuStart() );
|
||||
time += childSpan;
|
||||
}
|
||||
}
|
||||
@ -13519,9 +13519,9 @@ int64_t View::GetZoneSelfTime( const GpuEvent& zone )
|
||||
{
|
||||
if( m_cache.gpuSelfTime.first == &zone ) return m_cache.gpuSelfTime.second;
|
||||
if( m_cache.gpuSelfTime2.first == &zone ) return m_cache.gpuSelfTime2.second;
|
||||
const auto ztime = m_worker.GetZoneEnd( zone ) - zone.gpuStart;
|
||||
const auto ztime = m_worker.GetZoneEnd( zone ) - zone.GpuStart();
|
||||
const auto selftime = ztime - GetZoneChildTime( zone );
|
||||
if( zone.gpuEnd >= 0 )
|
||||
if( zone.GpuEnd() >= 0 )
|
||||
{
|
||||
m_cache.gpuSelfTime2 = m_cache.gpuSelfTime;
|
||||
m_cache.gpuSelfTime = std::make_pair( &zone, selftime );
|
||||
|
||||
@ -2059,9 +2059,9 @@ int64_t Worker::GetZoneEnd( const GpuEvent& ev )
|
||||
auto ptr = &ev;
|
||||
for(;;)
|
||||
{
|
||||
if( ptr->gpuEnd >= 0 ) return ptr->gpuEnd;
|
||||
if( ptr->child < 0 ) return ptr->gpuStart >= 0 ? ptr->gpuStart : m_data.lastTime;
|
||||
ptr = GetGpuChildren( ptr->child ).back();
|
||||
if( ptr->GpuEnd() >= 0 ) return ptr->GpuEnd();
|
||||
if( ptr->Child() < 0 ) return ptr->GpuStart() >= 0 ? ptr->GpuStart() : m_data.lastTime;
|
||||
ptr = GetGpuChildren( ptr->Child() ).back();
|
||||
}
|
||||
}
|
||||
|
||||
@ -4198,11 +4198,11 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
|
||||
const auto time = TscTime( cpuTime - m_data.baseTime );
|
||||
zone->SetCpuStart( time );
|
||||
zone->SetCpuEnd( -1 );
|
||||
zone->gpuStart = -1;
|
||||
zone->gpuEnd = -1;
|
||||
zone->SetGpuStart( -1 );
|
||||
zone->SetGpuEnd( -1 );
|
||||
zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) );
|
||||
zone->callstack.SetVal( 0 );
|
||||
zone->child = -1;
|
||||
zone->SetChild( -1 );
|
||||
|
||||
uint64_t ztid;
|
||||
if( ctx->thread == 0 )
|
||||
@ -4231,12 +4231,12 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
|
||||
if( !stack.empty() )
|
||||
{
|
||||
auto back = stack.back();
|
||||
if( back->child < 0 )
|
||||
if( back->Child() < 0 )
|
||||
{
|
||||
back->child = int32_t( m_data.gpuChildren.size() );
|
||||
back->SetChild( int32_t( m_data.gpuChildren.size() ) );
|
||||
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
|
||||
}
|
||||
timeline = &m_data.gpuChildren[back->child];
|
||||
timeline = &m_data.gpuChildren[back->Child()];
|
||||
}
|
||||
|
||||
timeline->push_back( zone );
|
||||
@ -4315,23 +4315,24 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
|
||||
assert( zone );
|
||||
ctx->query[ev.queryId] = nullptr;
|
||||
|
||||
if( zone->gpuStart < 0 )
|
||||
if( zone->GpuStart() < 0 )
|
||||
{
|
||||
const auto time = ctx->timeDiff + gpuTime;
|
||||
zone->gpuStart = time;
|
||||
zone->SetGpuStart( time );
|
||||
if( m_data.lastTime < time ) m_data.lastTime = time;
|
||||
ctx->count++;
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto time = ctx->timeDiff + gpuTime;
|
||||
zone->gpuEnd = time;
|
||||
if( m_data.lastTime < time ) m_data.lastTime = time;
|
||||
|
||||
if( zone->gpuEnd < zone->gpuStart )
|
||||
auto time = ctx->timeDiff + gpuTime;
|
||||
if( time < zone->GpuStart() )
|
||||
{
|
||||
std::swap( zone->gpuEnd, zone->gpuStart );
|
||||
auto tmp = zone->GpuStart();
|
||||
std::swap( time, tmp );
|
||||
zone->SetGpuStart( tmp );
|
||||
}
|
||||
zone->SetGpuEnd( time );
|
||||
if( m_data.lastTime < time ) m_data.lastTime = time;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4908,15 +4909,15 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_
|
||||
f.Read( sz );
|
||||
if( sz == 0 )
|
||||
{
|
||||
zone->child = -1;
|
||||
zone->SetChild( -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
zone->child = m_data.gpuChildren.size();
|
||||
zone->SetChild( m_data.gpuChildren.size() );
|
||||
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
|
||||
Vector<GpuEvent*> tmp;
|
||||
ReadTimeline( f, tmp, sz, refTime, refGpuTime );
|
||||
m_data.gpuChildren[zone->child] = std::move( tmp );
|
||||
m_data.gpuChildren[zone->Child()] = std::move( tmp );
|
||||
}
|
||||
}
|
||||
|
||||
@ -4926,15 +4927,15 @@ void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime,
|
||||
f.Read( sz );
|
||||
if( sz == 0 )
|
||||
{
|
||||
zone->child = -1;
|
||||
zone->SetChild( -1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
zone->child = m_data.gpuChildren.size();
|
||||
zone->SetChild( m_data.gpuChildren.size() );
|
||||
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
|
||||
Vector<GpuEvent*> tmp;
|
||||
ReadTimelinePre059( f, tmp, sz, refTime, refGpuTime, fileVer );
|
||||
m_data.gpuChildren[zone->child] = std::move( tmp );
|
||||
m_data.gpuChildren[zone->Child()] = std::move( tmp );
|
||||
}
|
||||
}
|
||||
|
||||
@ -5159,9 +5160,8 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
|
||||
{
|
||||
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
|
||||
|
||||
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
|
||||
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
|
||||
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
|
||||
int64_t tcpu, tgpu;
|
||||
f.Read2( tcpu, tgpu );
|
||||
int16_t srcloc;
|
||||
f.Read( srcloc );
|
||||
zone->SetSrcLoc( srcloc );
|
||||
@ -5169,15 +5169,18 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
|
||||
uint16_t thread;
|
||||
f.Read( thread );
|
||||
zone->SetThread( thread );
|
||||
refTime += zone->gpuStart;
|
||||
refGpuTime += zone->gpuEnd;
|
||||
refTime += tcpu;
|
||||
refGpuTime += tgpu;
|
||||
zone->SetCpuStart( refTime );
|
||||
zone->gpuStart = refGpuTime;
|
||||
zone->SetGpuStart( refGpuTime );
|
||||
|
||||
ReadTimeline( f, zone, refTime, refGpuTime );
|
||||
|
||||
zone->SetCpuEnd( ReadTimeOffset( f, refTime ) );
|
||||
zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
|
||||
f.Read2( tcpu, tgpu );
|
||||
refTime += tcpu;
|
||||
refGpuTime += tgpu;
|
||||
zone->SetCpuEnd( refTime );
|
||||
zone->SetGpuEnd( refGpuTime );
|
||||
}
|
||||
while( ++zone != zptr );
|
||||
}
|
||||
@ -5202,9 +5205,12 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
|
||||
if( cpuEnd >= 0 ) cpuEnd -= m_data.baseTime;
|
||||
zone->SetCpuStart( cpuStart );
|
||||
zone->SetCpuEnd( cpuEnd );
|
||||
f.Read( &zone->gpuStart, sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) );
|
||||
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
|
||||
if( zone->gpuEnd >= 0 ) zone->gpuEnd -= m_data.baseTime;
|
||||
int64_t gpuStart, gpuEnd;
|
||||
f.Read2( gpuStart, gpuEnd );
|
||||
if( gpuStart != std::numeric_limits<int64_t>::max() ) gpuStart -= m_data.baseTime;
|
||||
if( gpuEnd >= 0 ) gpuEnd -= m_data.baseTime;
|
||||
zone->SetGpuStart( gpuStart );
|
||||
zone->SetGpuEnd( gpuEnd );
|
||||
int16_t srcloc;
|
||||
f.Read( srcloc );
|
||||
zone->SetSrcLoc( srcloc );
|
||||
@ -5224,18 +5230,20 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
|
||||
}
|
||||
else if( fileVer <= FileVersion( 0, 4, 3 ) )
|
||||
{
|
||||
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
|
||||
int64_t tcpu, tgpu;
|
||||
f.Read2( tcpu, tgpu );
|
||||
int16_t srcloc;
|
||||
f.Read( srcloc );
|
||||
zone->SetSrcLoc( srcloc );
|
||||
f.Skip( 2 );
|
||||
f.Read( zone->callstack );
|
||||
f.Skip( 1 );
|
||||
refTime += zone->gpuStart;
|
||||
refGpuTime += zone->gpuEnd;
|
||||
refTime += tcpu;
|
||||
refGpuTime += tgpu;
|
||||
tgpu = refGpuTime;
|
||||
if( tgpu != std::numeric_limits<int64_t>::max() ) tgpu -= m_data.baseTime;
|
||||
zone->SetCpuStart( refTime - m_data.baseTime );
|
||||
zone->gpuStart = refGpuTime;
|
||||
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
|
||||
zone->SetGpuStart( tgpu );
|
||||
|
||||
uint64_t thread;
|
||||
f.Read( thread );
|
||||
@ -5250,9 +5258,8 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
|
||||
}
|
||||
else if( fileVer <= FileVersion( 0, 5, 1 ) )
|
||||
{
|
||||
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
|
||||
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
|
||||
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
|
||||
int64_t tcpu, tgpu;
|
||||
f.Read2( tcpu, tgpu );
|
||||
int16_t srcloc;
|
||||
f.Read( srcloc );
|
||||
zone->SetSrcLoc( srcloc );
|
||||
@ -5262,17 +5269,17 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
|
||||
uint16_t thread;
|
||||
f.Read( thread );
|
||||
zone->SetThread( thread );
|
||||
refTime += zone->gpuStart;
|
||||
refGpuTime += zone->gpuEnd;
|
||||
refTime += tcpu;
|
||||
refGpuTime += tgpu;
|
||||
tgpu = refGpuTime;
|
||||
if( tgpu != std::numeric_limits<int64_t>::max() ) tgpu -= m_data.baseTime;
|
||||
zone->SetCpuStart( refTime - m_data.baseTime );
|
||||
zone->gpuStart = refGpuTime;
|
||||
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
|
||||
zone->SetGpuStart( tgpu );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
|
||||
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
|
||||
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
|
||||
int64_t tcpu, tgpu;
|
||||
f.Read2( tcpu, tgpu );
|
||||
int16_t srcloc;
|
||||
f.Read( srcloc );
|
||||
zone->SetSrcLoc( srcloc );
|
||||
@ -5281,10 +5288,10 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
|
||||
uint16_t thread;
|
||||
f.Read( thread );
|
||||
zone->SetThread( thread );
|
||||
refTime += zone->gpuStart;
|
||||
refGpuTime += zone->gpuEnd;
|
||||
refTime += tcpu;
|
||||
refGpuTime += tgpu;
|
||||
zone->SetCpuStart( refTime );
|
||||
zone->gpuStart = refGpuTime;
|
||||
zone->SetGpuStart( refGpuTime );
|
||||
}
|
||||
ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer );
|
||||
if( fileVer > FileVersion( 0, 4, 1 ) )
|
||||
@ -5292,8 +5299,9 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
|
||||
int64_t cpuEnd = ReadTimeOffset( f, refTime );
|
||||
if( cpuEnd > 0 ) cpuEnd -= m_data.baseTime;
|
||||
zone->SetCpuEnd( cpuEnd );
|
||||
zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
|
||||
if( zone->gpuEnd > 0 ) zone->gpuEnd -= m_data.baseTime;
|
||||
int64_t gpuEnd = ReadTimeOffset( f, refGpuTime );
|
||||
if( gpuEnd > 0 ) gpuEnd -= m_data.baseTime;
|
||||
zone->SetGpuEnd( gpuEnd );
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -5721,25 +5729,25 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t&
|
||||
for( auto& v : vec )
|
||||
{
|
||||
WriteTimeOffset( f, refTime, v->CpuStart() );
|
||||
WriteTimeOffset( f, refGpuTime, v->gpuStart );
|
||||
WriteTimeOffset( f, refGpuTime, v->GpuStart() );
|
||||
const int16_t srcloc = v->SrcLoc();
|
||||
f.Write( &srcloc, sizeof( srcloc ) );
|
||||
f.Write( &v->callstack, sizeof( v->callstack ) );
|
||||
const uint16_t thread = v->Thread();
|
||||
f.Write( &thread, sizeof( thread ) );
|
||||
|
||||
if( v->child < 0 )
|
||||
if( v->Child() < 0 )
|
||||
{
|
||||
sz = 0;
|
||||
f.Write( &sz, sizeof( sz ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
WriteTimeline( f, GetGpuChildren( v->child ), refTime, refGpuTime );
|
||||
WriteTimeline( f, GetGpuChildren( v->Child() ), refTime, refGpuTime );
|
||||
}
|
||||
|
||||
WriteTimeOffset( f, refTime, v->CpuEnd() );
|
||||
WriteTimeOffset( f, refGpuTime, v->gpuEnd );
|
||||
WriteTimeOffset( f, refGpuTime, v->GpuEnd() );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -347,7 +347,7 @@ public:
|
||||
int64_t GetZoneEnd( const ZoneEvent& ev );
|
||||
int64_t GetZoneEnd( const GpuEvent& ev );
|
||||
static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.End() >= 0 ? ev.End() : ev.Start(); }
|
||||
static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; }
|
||||
static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : ev.GpuStart(); }
|
||||
|
||||
const char* GetString( uint64_t ptr ) const;
|
||||
const char* GetString( const StringRef& ref ) const;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user