1
0
mirror of https://github.com/wolfpld/tracy.git synced 2025-03-20 07:40:02 +08:00

Pack child into GPU start/end in GpuEvent (saves 4 bytes).

long    5152 MB -> 5061 MB
This commit is contained in:
Bartosz Taudul 2019-10-30 23:50:37 +01:00
parent 7319293081
commit 25b610a36f
4 changed files with 130 additions and 117 deletions

View File

@ -222,17 +222,22 @@ struct GpuEvent
tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); }
tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; }
tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); }
tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; }
tracy_force_inline void SetGpuStart( int64_t gpuStart ) { assert( gpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); }
tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; }
tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); }
tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); }
tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); }
tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); }
tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); }
tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); }
tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); }
uint64_t _cpuStart_srcloc;
uint64_t _cpuEnd_thread;
int64_t gpuStart;
int64_t gpuEnd;
uint64_t _gpuStart_child1;
uint64_t _gpuEnd_child2;
Int24 callstack;
int32_t child;
};
enum { GpuEventSize = sizeof( GpuEvent ) };

View File

@ -2106,9 +2106,9 @@ void View::DrawZones()
for( auto& td : v->threadData )
{
assert( !td.second.timeline.empty() );
if( td.second.timeline.front()->gpuStart >= 0 )
if( td.second.timeline.front()->GpuStart() >= 0 )
{
const auto begin = td.second.timeline.front()->gpuStart;
const auto begin = td.second.timeline.front()->GpuStart();
const auto drift = GpuDrift( v );
if( !singleThread ) offset += sstep;
const auto partDepth = DispatchGpuZoneLevel( td.second.timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift );
@ -2176,7 +2176,7 @@ void View::DrawZones()
int64_t t1 = std::numeric_limits<int64_t>::min();
for( auto& td : v->threadData )
{
const auto _t0 = td.second.timeline.front()->gpuStart;
const auto _t0 = td.second.timeline.front()->GpuStart();
if( _t0 >= 0 )
{
// FIXME
@ -2241,7 +2241,7 @@ void View::DrawZones()
int64_t t0 = std::numeric_limits<int64_t>::max();
for( auto& td : v->threadData )
{
const auto _t0 = td.second.timeline.front()->gpuStart;
const auto _t0 = td.second.timeline.front()->GpuStart();
if( _t0 >= 0 )
{
t0 = std::min( t0, _t0 );
@ -3318,10 +3318,10 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
const auto delay = m_worker.GetDelay();
const auto resolution = m_worker.GetResolution();
// cast to uint64_t, so that unended zones (end = -1) are still drawn
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
if( it == vec.end() ) return depth;
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuStart, begin, drift ) < (uint64_t)r; } );
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuStart(), begin, drift ) < (uint64_t)r; } );
if( it == zitend ) return depth;
const auto w = ImGui::GetWindowContentRegionWidth() - 1;
@ -3339,7 +3339,7 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
const auto color = GetZoneColor( ev );
auto end = m_worker.GetZoneEnd( ev );
if( end == std::numeric_limits<int64_t>::max() ) break;
const auto start = AdjustGpuTime( ev.gpuStart, begin, drift );
const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift );
end = AdjustGpuTime( end, begin, drift );
const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 );
if( zsz < MinVisSize )
@ -3352,7 +3352,7 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
for(;;)
{
const auto prevIt = it;
it = std::lower_bound( it, zitend, std::max<int64_t>( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
it = std::lower_bound( it, zitend, std::max<int64_t>( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
if( it == prevIt ) ++it;
num += std::distance( prevIt, it );
if( it == zitend ) break;
@ -3409,9 +3409,9 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
}
else
{
if( ev.child >= 0 )
if( ev.Child() >= 0 )
{
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.child ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
if( d > maxdepth ) maxdepth = d;
}
@ -3433,7 +3433,7 @@ int View::DrawGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
DrawTextContrast( draw, wpos + ImVec2( std::max( std::max( 0., px0 ), std::min( double( w - tsz.x ), x ) ), offset ), 0xFFFFFFFF, zoneName );
ImGui::PopClipRect();
}
else if( ev.gpuStart == ev.gpuEnd )
else if( ev.GpuStart() == ev.GpuEnd() )
{
DrawTextContrast( draw, wpos + ImVec2( px0 + ( px1 - px0 - tsz.x ) * 0.5, offset ), 0xFFFFFFFF, zoneName );
}
@ -3478,10 +3478,10 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
const auto delay = m_worker.GetDelay();
const auto resolution = m_worker.GetResolution();
// cast to uint64_t, so that unended zones (end = -1) are still drawn
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
auto it = std::lower_bound( vec.begin(), vec.end(), std::max<int64_t>( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
if( it == vec.end() ) return depth;
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuStart, begin, drift ) < (uint64_t)r; } );
const auto zitend = std::lower_bound( it, vec.end(), std::max<int64_t>( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuStart(), begin, drift ) < (uint64_t)r; } );
if( it == zitend ) return depth;
depth++;
@ -3492,7 +3492,7 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
auto& ev = **it;
auto end = m_worker.GetZoneEnd( ev );
if( end == std::numeric_limits<int64_t>::max() ) break;
const auto start = AdjustGpuTime( ev.gpuStart, begin, drift );
const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift );
end = AdjustGpuTime( end, begin, drift );
const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 );
if( zsz < MinVisSize )
@ -3502,7 +3502,7 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
for(;;)
{
const auto prevIt = it;
it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } );
it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } );
if( it == prevIt ) ++it;
if( it == zitend ) break;
const auto nend = AdjustGpuTime( m_worker.GetZoneEnd( **it ), begin, drift );
@ -3514,9 +3514,9 @@ int View::SkipGpuZoneLevel( const Vector<GpuEvent*>& vec, bool hover, double pxn
}
else
{
if( ev.child >= 0 )
if( ev.Child() >= 0 )
{
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.child ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift );
if( d > maxdepth ) maxdepth = d;
}
++it;
@ -6489,9 +6489,9 @@ void View::DrawGpuInfoWindow()
ImGui::BeginChild( "##gpuinfo" );
const auto end = m_worker.GetZoneEnd( ev );
const auto ztime = end - ev.gpuStart;
const auto ztime = end - ev.GpuStart();
const auto selftime = GetZoneSelfTime( ev );
TextFocused( "Time from start of program:", TimeToString( ev.gpuStart ) );
TextFocused( "Time from start of program:", TimeToString( ev.GpuStart() ) );
TextFocused( "GPU execution time:", TimeToString( ztime ) );
TextFocused( "GPU self time:", TimeToString( selftime ) );
if( ztime != 0 )
@ -6503,15 +6503,15 @@ void View::DrawGpuInfoWindow()
auto ctx = GetZoneCtx( ev );
if( !ctx )
{
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) );
TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) );
}
else
{
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto begin = td->second.timeline.front()->GpuStart();
const auto drift = GpuDrift( ctx );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) );
}
ImGui::Separator();
@ -6542,7 +6542,7 @@ void View::DrawGpuInfoWindow()
{
ImGui::SameLine();
}
ImGui::TextDisabled( "(%s) %s:%i", TimeToString( m_worker.GetZoneEnd( *v ) - v->gpuStart ), fileName, srcloc.line );
ImGui::TextDisabled( "(%s) %s:%i", TimeToString( m_worker.GetZoneEnd( *v ) - v->GpuStart() ), fileName, srcloc.line );
ImGui::PopID();
if( ImGui::IsItemClicked( 1 ) )
{
@ -6570,9 +6570,9 @@ void View::DrawGpuInfoWindow()
}
} );
if( ev.child >= 0 )
if( ev.Child() >= 0 )
{
const auto& children = m_worker.GetGpuChildren( ev.child );
const auto& children = m_worker.GetGpuChildren( ev.Child() );
bool expand = ImGui::TreeNode( "Child zones" );
ImGui::SameLine();
ImGui::TextDisabled( "(%s)", RealToString( children.size(), true ) );
@ -6599,7 +6599,7 @@ void View::DrawGpuInfoWindow()
{
const auto& child = *children[i];
const auto cend = m_worker.GetZoneEnd( child );
const auto ct = cend - child.gpuStart;
const auto ct = cend - child.GpuStart();
const auto srcloc = child.SrcLoc();
ctime += ct;
@ -6689,7 +6689,7 @@ void View::DrawGpuInfoWindow()
{
const auto& child = *children[cgr.v[i]];
const auto cend = m_worker.GetZoneEnd( child );
const auto ct = cend - child.gpuStart;
const auto ct = cend - child.GpuStart();
ctt[i] = ct;
cti[i] = uint32_t( i );
}
@ -6739,7 +6739,7 @@ void View::DrawGpuInfoWindow()
{
const auto& child = *children[i];
const auto cend = m_worker.GetZoneEnd( child );
const auto ct = cend - child.gpuStart;
const auto ct = cend - child.GpuStart();
ctime += ct;
ctt[i] = ct;
cti[i] = uint32_t( i );
@ -6902,7 +6902,7 @@ void View::DrawOptions()
size_t lastidx = 0;
for( size_t j=timeline.size()-1; j > 0; j-- )
{
if( timeline[j]->gpuEnd >= 0 )
if( timeline[j]->GpuEnd() >= 0 )
{
lastidx = j;
break;
@ -6921,7 +6921,7 @@ void View::DrawOptions()
const auto p1 = dist( gen );
if( p0 != p1 )
{
slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) );
}
}
while( idx < NumSlopes );
@ -12903,19 +12903,19 @@ void View::ZoomToZone( const ZoneEvent& ev )
void View::ZoomToZone( const GpuEvent& ev )
{
const auto end = m_worker.GetZoneEnd( ev );
if( end - ev.gpuStart <= 0 ) return;
if( end - ev.GpuStart() <= 0 ) return;
auto ctx = GetZoneCtx( ev );
if( !ctx )
{
ZoomToRange( ev.gpuStart, end );
ZoomToRange( ev.GpuStart(), end );
}
else
{
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto begin = td->second.timeline.front()->GpuStart();
const auto drift = GpuDrift( ctx );
ZoomToRange( AdjustGpuTime( ev.gpuStart, begin, drift ), AdjustGpuTime( end, begin, drift ) );
ZoomToRange( AdjustGpuTime( ev.GpuStart(), begin, drift ), AdjustGpuTime( end, begin, drift ) );
}
}
@ -13096,7 +13096,7 @@ void View::ZoneTooltip( const GpuEvent& ev )
const auto tid = GetZoneThread( ev );
const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() );
const auto end = m_worker.GetZoneEnd( ev );
const auto ztime = end - ev.gpuStart;
const auto ztime = end - ev.GpuStart();
const auto selftime = GetZoneSelfTime( ev );
ImGui::BeginTooltip();
@ -13121,15 +13121,15 @@ void View::ZoneTooltip( const GpuEvent& ev )
auto ctx = GetZoneCtx( ev );
if( !ctx )
{
TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) );
TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) );
}
else
{
const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) );
assert( td != ctx->threadData.end() );
const auto begin = td->second.timeline.front()->gpuStart;
const auto begin = td->second.timeline.front()->GpuStart();
const auto drift = GpuDrift( ctx );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) );
TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) );
}
ImGui::EndTooltip();
@ -13248,13 +13248,13 @@ const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const
if( timeline->empty() ) continue;
for(;;)
{
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } );
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } );
if( it != timeline->begin() ) --it;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break;
if( *it == &zone ) return parent;
if( (*it)->child < 0 ) break;
if( (*it)->Child() < 0 ) break;
parent = *it;
timeline = &m_worker.GetGpuChildren( parent->child );
timeline = &m_worker.GetGpuChildren( parent->Child() );
}
}
}
@ -13297,12 +13297,12 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const
if( timeline->empty() ) continue;
for(;;)
{
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } );
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } );
if( it != timeline->begin() ) --it;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break;
if( *it == &zone ) return ctx->thread;
if( (*it)->child < 0 ) break;
timeline = &m_worker.GetGpuChildren( (*it)->child );
if( (*it)->Child() < 0 ) break;
timeline = &m_worker.GetGpuChildren( (*it)->Child() );
}
}
return 0;
@ -13323,12 +13323,12 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const
if( timeline->empty() ) continue;
for(;;)
{
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } );
auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } );
if( it != timeline->begin() ) --it;
if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break;
if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break;
if( *it == &zone ) return ctx;
if( (*it)->child < 0 ) break;
timeline = &m_worker.GetGpuChildren( (*it)->child );
if( (*it)->Child() < 0 ) break;
timeline = &m_worker.GetGpuChildren( (*it)->Child() );
}
}
}
@ -13476,11 +13476,11 @@ int64_t View::GetZoneChildTime( const ZoneEvent& zone )
int64_t View::GetZoneChildTime( const GpuEvent& zone )
{
int64_t time = 0;
if( zone.child >= 0 )
if( zone.Child() >= 0 )
{
for( auto& v : m_worker.GetGpuChildren( zone.child ) )
for( auto& v : m_worker.GetGpuChildren( zone.Child() ) )
{
const auto childSpan = std::max( int64_t( 0 ), v->gpuEnd - v->gpuStart );
const auto childSpan = std::max( int64_t( 0 ), v->GpuEnd() - v->GpuStart() );
time += childSpan;
}
}
@ -13519,9 +13519,9 @@ int64_t View::GetZoneSelfTime( const GpuEvent& zone )
{
if( m_cache.gpuSelfTime.first == &zone ) return m_cache.gpuSelfTime.second;
if( m_cache.gpuSelfTime2.first == &zone ) return m_cache.gpuSelfTime2.second;
const auto ztime = m_worker.GetZoneEnd( zone ) - zone.gpuStart;
const auto ztime = m_worker.GetZoneEnd( zone ) - zone.GpuStart();
const auto selftime = ztime - GetZoneChildTime( zone );
if( zone.gpuEnd >= 0 )
if( zone.GpuEnd() >= 0 )
{
m_cache.gpuSelfTime2 = m_cache.gpuSelfTime;
m_cache.gpuSelfTime = std::make_pair( &zone, selftime );

View File

@ -2059,9 +2059,9 @@ int64_t Worker::GetZoneEnd( const GpuEvent& ev )
auto ptr = &ev;
for(;;)
{
if( ptr->gpuEnd >= 0 ) return ptr->gpuEnd;
if( ptr->child < 0 ) return ptr->gpuStart >= 0 ? ptr->gpuStart : m_data.lastTime;
ptr = GetGpuChildren( ptr->child ).back();
if( ptr->GpuEnd() >= 0 ) return ptr->GpuEnd();
if( ptr->Child() < 0 ) return ptr->GpuStart() >= 0 ? ptr->GpuStart() : m_data.lastTime;
ptr = GetGpuChildren( ptr->Child() ).back();
}
}
@ -4198,11 +4198,11 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
const auto time = TscTime( cpuTime - m_data.baseTime );
zone->SetCpuStart( time );
zone->SetCpuEnd( -1 );
zone->gpuStart = -1;
zone->gpuEnd = -1;
zone->SetGpuStart( -1 );
zone->SetGpuEnd( -1 );
zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) );
zone->callstack.SetVal( 0 );
zone->child = -1;
zone->SetChild( -1 );
uint64_t ztid;
if( ctx->thread == 0 )
@ -4231,12 +4231,12 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e
if( !stack.empty() )
{
auto back = stack.back();
if( back->child < 0 )
if( back->Child() < 0 )
{
back->child = int32_t( m_data.gpuChildren.size() );
back->SetChild( int32_t( m_data.gpuChildren.size() ) );
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
}
timeline = &m_data.gpuChildren[back->child];
timeline = &m_data.gpuChildren[back->Child()];
}
timeline->push_back( zone );
@ -4315,23 +4315,24 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev )
assert( zone );
ctx->query[ev.queryId] = nullptr;
if( zone->gpuStart < 0 )
if( zone->GpuStart() < 0 )
{
const auto time = ctx->timeDiff + gpuTime;
zone->gpuStart = time;
zone->SetGpuStart( time );
if( m_data.lastTime < time ) m_data.lastTime = time;
ctx->count++;
}
else
{
const auto time = ctx->timeDiff + gpuTime;
zone->gpuEnd = time;
if( m_data.lastTime < time ) m_data.lastTime = time;
if( zone->gpuEnd < zone->gpuStart )
auto time = ctx->timeDiff + gpuTime;
if( time < zone->GpuStart() )
{
std::swap( zone->gpuEnd, zone->gpuStart );
auto tmp = zone->GpuStart();
std::swap( time, tmp );
zone->SetGpuStart( tmp );
}
zone->SetGpuEnd( time );
if( m_data.lastTime < time ) m_data.lastTime = time;
}
}
@ -4908,15 +4909,15 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_
f.Read( sz );
if( sz == 0 )
{
zone->child = -1;
zone->SetChild( -1 );
}
else
{
zone->child = m_data.gpuChildren.size();
zone->SetChild( m_data.gpuChildren.size() );
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp;
ReadTimeline( f, tmp, sz, refTime, refGpuTime );
m_data.gpuChildren[zone->child] = std::move( tmp );
m_data.gpuChildren[zone->Child()] = std::move( tmp );
}
}
@ -4926,15 +4927,15 @@ void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime,
f.Read( sz );
if( sz == 0 )
{
zone->child = -1;
zone->SetChild( -1 );
}
else
{
zone->child = m_data.gpuChildren.size();
zone->SetChild( m_data.gpuChildren.size() );
m_data.gpuChildren.push_back( Vector<GpuEvent*>() );
Vector<GpuEvent*> tmp;
ReadTimelinePre059( f, tmp, sz, refTime, refGpuTime, fileVer );
m_data.gpuChildren[zone->child] = std::move( tmp );
m_data.gpuChildren[zone->Child()] = std::move( tmp );
}
}
@ -5159,9 +5160,8 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
{
s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed );
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
int64_t tcpu, tgpu;
f.Read2( tcpu, tgpu );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
@ -5169,15 +5169,18 @@ void Worker::ReadTimeline( FileRead& f, Vector<GpuEvent*>& vec, uint64_t size, i
uint16_t thread;
f.Read( thread );
zone->SetThread( thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
refTime += tcpu;
refGpuTime += tgpu;
zone->SetCpuStart( refTime );
zone->gpuStart = refGpuTime;
zone->SetGpuStart( refGpuTime );
ReadTimeline( f, zone, refTime, refGpuTime );
zone->SetCpuEnd( ReadTimeOffset( f, refTime ) );
zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
f.Read2( tcpu, tgpu );
refTime += tcpu;
refGpuTime += tgpu;
zone->SetCpuEnd( refTime );
zone->SetGpuEnd( refGpuTime );
}
while( ++zone != zptr );
}
@ -5202,9 +5205,12 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
if( cpuEnd >= 0 ) cpuEnd -= m_data.baseTime;
zone->SetCpuStart( cpuStart );
zone->SetCpuEnd( cpuEnd );
f.Read( &zone->gpuStart, sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) );
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
if( zone->gpuEnd >= 0 ) zone->gpuEnd -= m_data.baseTime;
int64_t gpuStart, gpuEnd;
f.Read2( gpuStart, gpuEnd );
if( gpuStart != std::numeric_limits<int64_t>::max() ) gpuStart -= m_data.baseTime;
if( gpuEnd >= 0 ) gpuEnd -= m_data.baseTime;
zone->SetGpuStart( gpuStart );
zone->SetGpuEnd( gpuEnd );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
@ -5224,18 +5230,20 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
}
else if( fileVer <= FileVersion( 0, 4, 3 ) )
{
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
int64_t tcpu, tgpu;
f.Read2( tcpu, tgpu );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
f.Skip( 2 );
f.Read( zone->callstack );
f.Skip( 1 );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
refTime += tcpu;
refGpuTime += tgpu;
tgpu = refGpuTime;
if( tgpu != std::numeric_limits<int64_t>::max() ) tgpu -= m_data.baseTime;
zone->SetCpuStart( refTime - m_data.baseTime );
zone->gpuStart = refGpuTime;
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
zone->SetGpuStart( tgpu );
uint64_t thread;
f.Read( thread );
@ -5250,9 +5258,8 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
}
else if( fileVer <= FileVersion( 0, 5, 1 ) )
{
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
int64_t tcpu, tgpu;
f.Read2( tcpu, tgpu );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
@ -5262,17 +5269,17 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
uint16_t thread;
f.Read( thread );
zone->SetThread( thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
refTime += tcpu;
refGpuTime += tgpu;
tgpu = refGpuTime;
if( tgpu != std::numeric_limits<int64_t>::max() ) tgpu -= m_data.baseTime;
zone->SetCpuStart( refTime - m_data.baseTime );
zone->gpuStart = refGpuTime;
if( zone->gpuStart != std::numeric_limits<int64_t>::max() ) zone->gpuStart -= m_data.baseTime;
zone->SetGpuStart( tgpu );
}
else
{
// Use zone->gpuStart as scratch buffer for CPU zone start time offset.
// Use zone->gpuEnd as scratch buffer for GPU zone start time offset.
f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) );
int64_t tcpu, tgpu;
f.Read2( tcpu, tgpu );
int16_t srcloc;
f.Read( srcloc );
zone->SetSrcLoc( srcloc );
@ -5281,10 +5288,10 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
uint16_t thread;
f.Read( thread );
zone->SetThread( thread );
refTime += zone->gpuStart;
refGpuTime += zone->gpuEnd;
refTime += tcpu;
refGpuTime += tgpu;
zone->SetCpuStart( refTime );
zone->gpuStart = refGpuTime;
zone->SetGpuStart( refGpuTime );
}
ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer );
if( fileVer > FileVersion( 0, 4, 1 ) )
@ -5292,8 +5299,9 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector<GpuEvent*>& vec, uint64_t s
int64_t cpuEnd = ReadTimeOffset( f, refTime );
if( cpuEnd > 0 ) cpuEnd -= m_data.baseTime;
zone->SetCpuEnd( cpuEnd );
zone->gpuEnd = ReadTimeOffset( f, refGpuTime );
if( zone->gpuEnd > 0 ) zone->gpuEnd -= m_data.baseTime;
int64_t gpuEnd = ReadTimeOffset( f, refGpuTime );
if( gpuEnd > 0 ) gpuEnd -= m_data.baseTime;
zone->SetGpuEnd( gpuEnd );
}
}
}
@ -5721,25 +5729,25 @@ void Worker::WriteTimeline( FileWrite& f, const Vector<GpuEvent*>& vec, int64_t&
for( auto& v : vec )
{
WriteTimeOffset( f, refTime, v->CpuStart() );
WriteTimeOffset( f, refGpuTime, v->gpuStart );
WriteTimeOffset( f, refGpuTime, v->GpuStart() );
const int16_t srcloc = v->SrcLoc();
f.Write( &srcloc, sizeof( srcloc ) );
f.Write( &v->callstack, sizeof( v->callstack ) );
const uint16_t thread = v->Thread();
f.Write( &thread, sizeof( thread ) );
if( v->child < 0 )
if( v->Child() < 0 )
{
sz = 0;
f.Write( &sz, sizeof( sz ) );
}
else
{
WriteTimeline( f, GetGpuChildren( v->child ), refTime, refGpuTime );
WriteTimeline( f, GetGpuChildren( v->Child() ), refTime, refGpuTime );
}
WriteTimeOffset( f, refTime, v->CpuEnd() );
WriteTimeOffset( f, refGpuTime, v->gpuEnd );
WriteTimeOffset( f, refGpuTime, v->GpuEnd() );
}
}

View File

@ -347,7 +347,7 @@ public:
int64_t GetZoneEnd( const ZoneEvent& ev );
int64_t GetZoneEnd( const GpuEvent& ev );
static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.End() >= 0 ? ev.End() : ev.Start(); }
static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; }
static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : ev.GpuStart(); }
const char* GetString( uint64_t ptr ) const;
const char* GetString( const StringRef& ref ) const;