From 25b610a36f3c74ab8986eeaca9f36ef7ccaca9be Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Wed, 30 Oct 2019 23:50:37 +0100 Subject: [PATCH] Pack child into GPU start/end in GpuEvent (saves 4 bytes). long 5152 MB -> 5061 MB --- server/TracyEvent.hpp | 11 +++- server/TracyView.cpp | 110 ++++++++++++++++++------------------ server/TracyWorker.cpp | 124 ++++++++++++++++++++++------------------- server/TracyWorker.hpp | 2 +- 4 files changed, 130 insertions(+), 117 deletions(-) diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 06d03193..193dbd1f 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -222,17 +222,22 @@ struct GpuEvent tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); } tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; } tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); } + tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; } + tracy_force_inline void SetGpuStart( int64_t gpuStart ) { assert( gpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); } + tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; } + tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); } tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); } tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); } tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); } tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); } + tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); } + tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); } uint64_t _cpuStart_srcloc; uint64_t _cpuEnd_thread; - int64_t gpuStart; - int64_t gpuEnd; + uint64_t _gpuStart_child1; + uint64_t _gpuEnd_child2; Int24 callstack; - int32_t child; }; enum { GpuEventSize = sizeof( GpuEvent ) }; diff --git a/server/TracyView.cpp b/server/TracyView.cpp index 73b7e11a..5d39694e 100644 --- a/server/TracyView.cpp +++ b/server/TracyView.cpp @@ -2106,9 +2106,9 @@ void View::DrawZones() for( auto& td : v->threadData ) { assert( !td.second.timeline.empty() ); - if( td.second.timeline.front()->gpuStart >= 0 ) + if( td.second.timeline.front()->GpuStart() >= 0 ) { - const auto begin = td.second.timeline.front()->gpuStart; + const auto begin = td.second.timeline.front()->GpuStart(); const auto drift = GpuDrift( v ); if( !singleThread ) offset += sstep; const auto partDepth = DispatchGpuZoneLevel( td.second.timeline, hover, pxns, int64_t( nspx ), wpos, offset, 0, v->thread, yMin, yMax, begin, drift ); @@ -2176,7 +2176,7 @@ void View::DrawZones() int64_t t1 = std::numeric_limits::min(); for( auto& td : v->threadData ) { - const auto _t0 = td.second.timeline.front()->gpuStart; + const auto _t0 = td.second.timeline.front()->GpuStart(); if( _t0 >= 0 ) { // FIXME @@ -2241,7 +2241,7 @@ void View::DrawZones() int64_t t0 = std::numeric_limits::max(); for( auto& td : v->threadData ) { - const auto _t0 = td.second.timeline.front()->gpuStart; + const auto _t0 = td.second.timeline.front()->GpuStart(); if( _t0 >= 0 ) { t0 = std::min( t0, _t0 ); @@ -3318,10 +3318,10 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn const auto delay = m_worker.GetDelay(); const auto resolution = m_worker.GetResolution(); // cast to uint64_t, so that unended zones (end = -1) are still drawn - auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } ); + auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } ); if( it == vec.end() ) return depth; - const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuStart, begin, drift ) < (uint64_t)r; } ); + const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuStart(), begin, drift ) < (uint64_t)r; } ); if( it == zitend ) return depth; const auto w = ImGui::GetWindowContentRegionWidth() - 1; @@ -3339,7 +3339,7 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn const auto color = GetZoneColor( ev ); auto end = m_worker.GetZoneEnd( ev ); if( end == std::numeric_limits::max() ) break; - const auto start = AdjustGpuTime( ev.gpuStart, begin, drift ); + const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 ); if( zsz < MinVisSize ) @@ -3352,7 +3352,7 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn for(;;) { const auto prevIt = it; - it = std::lower_bound( it, zitend, std::max( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } ); + it = std::lower_bound( it, zitend, std::max( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } ); if( it == prevIt ) ++it; num += std::distance( prevIt, it ); if( it == zitend ) break; @@ -3409,9 +3409,9 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn } else { - if( ev.child >= 0 ) + if( ev.Child() >= 0 ) { - const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.child ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); if( d > maxdepth ) maxdepth = d; } @@ -3433,7 +3433,7 @@ int View::DrawGpuZoneLevel( const Vector& vec, bool hover, double pxn DrawTextContrast( draw, wpos + ImVec2( std::max( std::max( 0., px0 ), std::min( double( w - tsz.x ), x ) ), offset ), 0xFFFFFFFF, zoneName ); ImGui::PopClipRect(); } - else if( ev.gpuStart == ev.gpuEnd ) + else if( ev.GpuStart() == ev.GpuEnd() ) { DrawTextContrast( draw, wpos + ImVec2( px0 + ( px1 - px0 - tsz.x ) * 0.5, offset ), 0xFFFFFFFF, zoneName ); } @@ -3478,10 +3478,10 @@ int View::SkipGpuZoneLevel( const Vector& vec, bool hover, double pxn const auto delay = m_worker.GetDelay(); const auto resolution = m_worker.GetResolution(); // cast to uint64_t, so that unended zones (end = -1) are still drawn - auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } ); + auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart - delay ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } ); if( it == vec.end() ) return depth; - const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuStart, begin, drift ) < (uint64_t)r; } ); + const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd + resolution ), [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuStart(), begin, drift ) < (uint64_t)r; } ); if( it == zitend ) return depth; depth++; @@ -3492,7 +3492,7 @@ int View::SkipGpuZoneLevel( const Vector& vec, bool hover, double pxn auto& ev = **it; auto end = m_worker.GetZoneEnd( ev ); if( end == std::numeric_limits::max() ) break; - const auto start = AdjustGpuTime( ev.gpuStart, begin, drift ); + const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 ); if( zsz < MinVisSize ) @@ -3502,7 +3502,7 @@ int View::SkipGpuZoneLevel( const Vector& vec, bool hover, double pxn for(;;) { const auto prevIt = it; - it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->gpuEnd, begin, drift ) < (uint64_t)r; } ); + it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { return (uint64_t)AdjustGpuTime( l->GpuEnd(), begin, drift ) < (uint64_t)r; } ); if( it == prevIt ) ++it; if( it == zitend ) break; const auto nend = AdjustGpuTime( m_worker.GetZoneEnd( **it ), begin, drift ); @@ -3514,9 +3514,9 @@ int View::SkipGpuZoneLevel( const Vector& vec, bool hover, double pxn } else { - if( ev.child >= 0 ) + if( ev.Child() >= 0 ) { - const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.child ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); if( d > maxdepth ) maxdepth = d; } ++it; @@ -6489,9 +6489,9 @@ void View::DrawGpuInfoWindow() ImGui::BeginChild( "##gpuinfo" ); const auto end = m_worker.GetZoneEnd( ev ); - const auto ztime = end - ev.gpuStart; + const auto ztime = end - ev.GpuStart(); const auto selftime = GetZoneSelfTime( ev ); - TextFocused( "Time from start of program:", TimeToString( ev.gpuStart ) ); + TextFocused( "Time from start of program:", TimeToString( ev.GpuStart() ) ); TextFocused( "GPU execution time:", TimeToString( ztime ) ); TextFocused( "GPU self time:", TimeToString( selftime ) ); if( ztime != 0 ) @@ -6503,15 +6503,15 @@ void View::DrawGpuInfoWindow() auto ctx = GetZoneCtx( ev ); if( !ctx ) { - TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) ); + TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); } else { const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); assert( td != ctx->threadData.end() ); - const auto begin = td->second.timeline.front()->gpuStart; + const auto begin = td->second.timeline.front()->GpuStart(); const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); } ImGui::Separator(); @@ -6542,7 +6542,7 @@ void View::DrawGpuInfoWindow() { ImGui::SameLine(); } - ImGui::TextDisabled( "(%s) %s:%i", TimeToString( m_worker.GetZoneEnd( *v ) - v->gpuStart ), fileName, srcloc.line ); + ImGui::TextDisabled( "(%s) %s:%i", TimeToString( m_worker.GetZoneEnd( *v ) - v->GpuStart() ), fileName, srcloc.line ); ImGui::PopID(); if( ImGui::IsItemClicked( 1 ) ) { @@ -6570,9 +6570,9 @@ void View::DrawGpuInfoWindow() } } ); - if( ev.child >= 0 ) + if( ev.Child() >= 0 ) { - const auto& children = m_worker.GetGpuChildren( ev.child ); + const auto& children = m_worker.GetGpuChildren( ev.Child() ); bool expand = ImGui::TreeNode( "Child zones" ); ImGui::SameLine(); ImGui::TextDisabled( "(%s)", RealToString( children.size(), true ) ); @@ -6599,7 +6599,7 @@ void View::DrawGpuInfoWindow() { const auto& child = *children[i]; const auto cend = m_worker.GetZoneEnd( child ); - const auto ct = cend - child.gpuStart; + const auto ct = cend - child.GpuStart(); const auto srcloc = child.SrcLoc(); ctime += ct; @@ -6689,7 +6689,7 @@ void View::DrawGpuInfoWindow() { const auto& child = *children[cgr.v[i]]; const auto cend = m_worker.GetZoneEnd( child ); - const auto ct = cend - child.gpuStart; + const auto ct = cend - child.GpuStart(); ctt[i] = ct; cti[i] = uint32_t( i ); } @@ -6739,7 +6739,7 @@ void View::DrawGpuInfoWindow() { const auto& child = *children[i]; const auto cend = m_worker.GetZoneEnd( child ); - const auto ct = cend - child.gpuStart; + const auto ct = cend - child.GpuStart(); ctime += ct; ctt[i] = ct; cti[i] = uint32_t( i ); @@ -6902,7 +6902,7 @@ void View::DrawOptions() size_t lastidx = 0; for( size_t j=timeline.size()-1; j > 0; j-- ) { - if( timeline[j]->gpuEnd >= 0 ) + if( timeline[j]->GpuEnd() >= 0 ) { lastidx = j; break; @@ -6921,7 +6921,7 @@ void View::DrawOptions() const auto p1 = dist( gen ); if( p0 != p1 ) { - slopes[idx++] = float( 1.0 - double( timeline[p1]->gpuStart - timeline[p0]->gpuStart ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); } } while( idx < NumSlopes ); @@ -12903,19 +12903,19 @@ void View::ZoomToZone( const ZoneEvent& ev ) void View::ZoomToZone( const GpuEvent& ev ) { const auto end = m_worker.GetZoneEnd( ev ); - if( end - ev.gpuStart <= 0 ) return; + if( end - ev.GpuStart() <= 0 ) return; auto ctx = GetZoneCtx( ev ); if( !ctx ) { - ZoomToRange( ev.gpuStart, end ); + ZoomToRange( ev.GpuStart(), end ); } else { const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); assert( td != ctx->threadData.end() ); - const auto begin = td->second.timeline.front()->gpuStart; + const auto begin = td->second.timeline.front()->GpuStart(); const auto drift = GpuDrift( ctx ); - ZoomToRange( AdjustGpuTime( ev.gpuStart, begin, drift ), AdjustGpuTime( end, begin, drift ) ); + ZoomToRange( AdjustGpuTime( ev.GpuStart(), begin, drift ), AdjustGpuTime( end, begin, drift ) ); } } @@ -13096,7 +13096,7 @@ void View::ZoneTooltip( const GpuEvent& ev ) const auto tid = GetZoneThread( ev ); const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto end = m_worker.GetZoneEnd( ev ); - const auto ztime = end - ev.gpuStart; + const auto ztime = end - ev.GpuStart(); const auto selftime = GetZoneSelfTime( ev ); ImGui::BeginTooltip(); @@ -13121,15 +13121,15 @@ void View::ZoneTooltip( const GpuEvent& ev ) auto ctx = GetZoneCtx( ev ); if( !ctx ) { - TextFocused( "Delay to execution:", TimeToString( ev.gpuStart - ev.CpuStart() ) ); + TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); } else { const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); assert( td != ctx->threadData.end() ); - const auto begin = td->second.timeline.front()->gpuStart; + const auto begin = td->second.timeline.front()->GpuStart(); const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.gpuStart, begin, drift ) - ev.CpuStart() ) ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); } ImGui::EndTooltip(); @@ -13248,13 +13248,13 @@ const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const if( timeline->empty() ) continue; for(;;) { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); if( it != timeline->begin() ) --it; - if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; + if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; if( *it == &zone ) return parent; - if( (*it)->child < 0 ) break; + if( (*it)->Child() < 0 ) break; parent = *it; - timeline = &m_worker.GetGpuChildren( parent->child ); + timeline = &m_worker.GetGpuChildren( parent->Child() ); } } } @@ -13297,12 +13297,12 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const if( timeline->empty() ) continue; for(;;) { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); if( it != timeline->begin() ) --it; - if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; + if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; if( *it == &zone ) return ctx->thread; - if( (*it)->child < 0 ) break; - timeline = &m_worker.GetGpuChildren( (*it)->child ); + if( (*it)->Child() < 0 ) break; + timeline = &m_worker.GetGpuChildren( (*it)->Child() ); } } return 0; @@ -13323,12 +13323,12 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const if( timeline->empty() ) continue; for(;;) { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.gpuStart, [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->gpuStart; } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); if( it != timeline->begin() ) --it; - if( zone.gpuEnd >= 0 && (*it)->gpuStart > zone.gpuEnd ) break; + if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; if( *it == &zone ) return ctx; - if( (*it)->child < 0 ) break; - timeline = &m_worker.GetGpuChildren( (*it)->child ); + if( (*it)->Child() < 0 ) break; + timeline = &m_worker.GetGpuChildren( (*it)->Child() ); } } } @@ -13476,11 +13476,11 @@ int64_t View::GetZoneChildTime( const ZoneEvent& zone ) int64_t View::GetZoneChildTime( const GpuEvent& zone ) { int64_t time = 0; - if( zone.child >= 0 ) + if( zone.Child() >= 0 ) { - for( auto& v : m_worker.GetGpuChildren( zone.child ) ) + for( auto& v : m_worker.GetGpuChildren( zone.Child() ) ) { - const auto childSpan = std::max( int64_t( 0 ), v->gpuEnd - v->gpuStart ); + const auto childSpan = std::max( int64_t( 0 ), v->GpuEnd() - v->GpuStart() ); time += childSpan; } } @@ -13519,9 +13519,9 @@ int64_t View::GetZoneSelfTime( const GpuEvent& zone ) { if( m_cache.gpuSelfTime.first == &zone ) return m_cache.gpuSelfTime.second; if( m_cache.gpuSelfTime2.first == &zone ) return m_cache.gpuSelfTime2.second; - const auto ztime = m_worker.GetZoneEnd( zone ) - zone.gpuStart; + const auto ztime = m_worker.GetZoneEnd( zone ) - zone.GpuStart(); const auto selftime = ztime - GetZoneChildTime( zone ); - if( zone.gpuEnd >= 0 ) + if( zone.GpuEnd() >= 0 ) { m_cache.gpuSelfTime2 = m_cache.gpuSelfTime; m_cache.gpuSelfTime = std::make_pair( &zone, selftime ); diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index c1c01223..586e9212 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -2059,9 +2059,9 @@ int64_t Worker::GetZoneEnd( const GpuEvent& ev ) auto ptr = &ev; for(;;) { - if( ptr->gpuEnd >= 0 ) return ptr->gpuEnd; - if( ptr->child < 0 ) return ptr->gpuStart >= 0 ? ptr->gpuStart : m_data.lastTime; - ptr = GetGpuChildren( ptr->child ).back(); + if( ptr->GpuEnd() >= 0 ) return ptr->GpuEnd(); + if( ptr->Child() < 0 ) return ptr->GpuStart() >= 0 ? ptr->GpuStart() : m_data.lastTime; + ptr = GetGpuChildren( ptr->Child() ).back(); } } @@ -4198,11 +4198,11 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e const auto time = TscTime( cpuTime - m_data.baseTime ); zone->SetCpuStart( time ); zone->SetCpuEnd( -1 ); - zone->gpuStart = -1; - zone->gpuEnd = -1; + zone->SetGpuStart( -1 ); + zone->SetGpuEnd( -1 ); zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); zone->callstack.SetVal( 0 ); - zone->child = -1; + zone->SetChild( -1 ); uint64_t ztid; if( ctx->thread == 0 ) @@ -4231,12 +4231,12 @@ void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& e if( !stack.empty() ) { auto back = stack.back(); - if( back->child < 0 ) + if( back->Child() < 0 ) { - back->child = int32_t( m_data.gpuChildren.size() ); + back->SetChild( int32_t( m_data.gpuChildren.size() ) ); m_data.gpuChildren.push_back( Vector() ); } - timeline = &m_data.gpuChildren[back->child]; + timeline = &m_data.gpuChildren[back->Child()]; } timeline->push_back( zone ); @@ -4315,23 +4315,24 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) assert( zone ); ctx->query[ev.queryId] = nullptr; - if( zone->gpuStart < 0 ) + if( zone->GpuStart() < 0 ) { const auto time = ctx->timeDiff + gpuTime; - zone->gpuStart = time; + zone->SetGpuStart( time ); if( m_data.lastTime < time ) m_data.lastTime = time; ctx->count++; } else { - const auto time = ctx->timeDiff + gpuTime; - zone->gpuEnd = time; - if( m_data.lastTime < time ) m_data.lastTime = time; - - if( zone->gpuEnd < zone->gpuStart ) + auto time = ctx->timeDiff + gpuTime; + if( time < zone->GpuStart() ) { - std::swap( zone->gpuEnd, zone->gpuStart ); + auto tmp = zone->GpuStart(); + std::swap( time, tmp ); + zone->SetGpuStart( tmp ); } + zone->SetGpuEnd( time ); + if( m_data.lastTime < time ) m_data.lastTime = time; } } @@ -4908,15 +4909,15 @@ void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_ f.Read( sz ); if( sz == 0 ) { - zone->child = -1; + zone->SetChild( -1 ); } else { - zone->child = m_data.gpuChildren.size(); + zone->SetChild( m_data.gpuChildren.size() ); m_data.gpuChildren.push_back( Vector() ); Vector tmp; ReadTimeline( f, tmp, sz, refTime, refGpuTime ); - m_data.gpuChildren[zone->child] = std::move( tmp ); + m_data.gpuChildren[zone->Child()] = std::move( tmp ); } } @@ -4926,15 +4927,15 @@ void Worker::ReadTimelinePre059( FileRead& f, GpuEvent* zone, int64_t& refTime, f.Read( sz ); if( sz == 0 ) { - zone->child = -1; + zone->SetChild( -1 ); } else { - zone->child = m_data.gpuChildren.size(); + zone->SetChild( m_data.gpuChildren.size() ); m_data.gpuChildren.push_back( Vector() ); Vector tmp; ReadTimelinePre059( f, tmp, sz, refTime, refGpuTime, fileVer ); - m_data.gpuChildren[zone->child] = std::move( tmp ); + m_data.gpuChildren[zone->Child()] = std::move( tmp ); } } @@ -5159,9 +5160,8 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint64_t size, i { s_loadProgress.subProgress.fetch_add( 1, std::memory_order_relaxed ); - // Use zone->gpuStart as scratch buffer for CPU zone start time offset. - // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); + int64_t tcpu, tgpu; + f.Read2( tcpu, tgpu ); int16_t srcloc; f.Read( srcloc ); zone->SetSrcLoc( srcloc ); @@ -5169,15 +5169,18 @@ void Worker::ReadTimeline( FileRead& f, Vector& vec, uint64_t size, i uint16_t thread; f.Read( thread ); zone->SetThread( thread ); - refTime += zone->gpuStart; - refGpuTime += zone->gpuEnd; + refTime += tcpu; + refGpuTime += tgpu; zone->SetCpuStart( refTime ); - zone->gpuStart = refGpuTime; + zone->SetGpuStart( refGpuTime ); ReadTimeline( f, zone, refTime, refGpuTime ); - zone->SetCpuEnd( ReadTimeOffset( f, refTime ) ); - zone->gpuEnd = ReadTimeOffset( f, refGpuTime ); + f.Read2( tcpu, tgpu ); + refTime += tcpu; + refGpuTime += tgpu; + zone->SetCpuEnd( refTime ); + zone->SetGpuEnd( refGpuTime ); } while( ++zone != zptr ); } @@ -5202,9 +5205,12 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s if( cpuEnd >= 0 ) cpuEnd -= m_data.baseTime; zone->SetCpuStart( cpuStart ); zone->SetCpuEnd( cpuEnd ); - f.Read( &zone->gpuStart, sizeof( GpuEvent::gpuStart ) + sizeof( GpuEvent::gpuEnd ) ); - if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; - if( zone->gpuEnd >= 0 ) zone->gpuEnd -= m_data.baseTime; + int64_t gpuStart, gpuEnd; + f.Read2( gpuStart, gpuEnd ); + if( gpuStart != std::numeric_limits::max() ) gpuStart -= m_data.baseTime; + if( gpuEnd >= 0 ) gpuEnd -= m_data.baseTime; + zone->SetGpuStart( gpuStart ); + zone->SetGpuEnd( gpuEnd ); int16_t srcloc; f.Read( srcloc ); zone->SetSrcLoc( srcloc ); @@ -5224,18 +5230,20 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s } else if( fileVer <= FileVersion( 0, 4, 3 ) ) { - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); + int64_t tcpu, tgpu; + f.Read2( tcpu, tgpu ); int16_t srcloc; f.Read( srcloc ); zone->SetSrcLoc( srcloc ); f.Skip( 2 ); f.Read( zone->callstack ); f.Skip( 1 ); - refTime += zone->gpuStart; - refGpuTime += zone->gpuEnd; + refTime += tcpu; + refGpuTime += tgpu; + tgpu = refGpuTime; + if( tgpu != std::numeric_limits::max() ) tgpu -= m_data.baseTime; zone->SetCpuStart( refTime - m_data.baseTime ); - zone->gpuStart = refGpuTime; - if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; + zone->SetGpuStart( tgpu ); uint64_t thread; f.Read( thread ); @@ -5250,9 +5258,8 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s } else if( fileVer <= FileVersion( 0, 5, 1 ) ) { - // Use zone->gpuStart as scratch buffer for CPU zone start time offset. - // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); + int64_t tcpu, tgpu; + f.Read2( tcpu, tgpu ); int16_t srcloc; f.Read( srcloc ); zone->SetSrcLoc( srcloc ); @@ -5262,17 +5269,17 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s uint16_t thread; f.Read( thread ); zone->SetThread( thread ); - refTime += zone->gpuStart; - refGpuTime += zone->gpuEnd; + refTime += tcpu; + refGpuTime += tgpu; + tgpu = refGpuTime; + if( tgpu != std::numeric_limits::max() ) tgpu -= m_data.baseTime; zone->SetCpuStart( refTime - m_data.baseTime ); - zone->gpuStart = refGpuTime; - if( zone->gpuStart != std::numeric_limits::max() ) zone->gpuStart -= m_data.baseTime; + zone->SetGpuStart( tgpu ); } else { - // Use zone->gpuStart as scratch buffer for CPU zone start time offset. - // Use zone->gpuEnd as scratch buffer for GPU zone start time offset. - f.Read( &zone->gpuStart, sizeof( zone->gpuStart ) + sizeof( zone->gpuEnd ) ); + int64_t tcpu, tgpu; + f.Read2( tcpu, tgpu ); int16_t srcloc; f.Read( srcloc ); zone->SetSrcLoc( srcloc ); @@ -5281,10 +5288,10 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s uint16_t thread; f.Read( thread ); zone->SetThread( thread ); - refTime += zone->gpuStart; - refGpuTime += zone->gpuEnd; + refTime += tcpu; + refGpuTime += tgpu; zone->SetCpuStart( refTime ); - zone->gpuStart = refGpuTime; + zone->SetGpuStart( refGpuTime ); } ReadTimelinePre059( f, zone, refTime, refGpuTime, fileVer ); if( fileVer > FileVersion( 0, 4, 1 ) ) @@ -5292,8 +5299,9 @@ void Worker::ReadTimelinePre059( FileRead& f, Vector& vec, uint64_t s int64_t cpuEnd = ReadTimeOffset( f, refTime ); if( cpuEnd > 0 ) cpuEnd -= m_data.baseTime; zone->SetCpuEnd( cpuEnd ); - zone->gpuEnd = ReadTimeOffset( f, refGpuTime ); - if( zone->gpuEnd > 0 ) zone->gpuEnd -= m_data.baseTime; + int64_t gpuEnd = ReadTimeOffset( f, refGpuTime ); + if( gpuEnd > 0 ) gpuEnd -= m_data.baseTime; + zone->SetGpuEnd( gpuEnd ); } } } @@ -5721,25 +5729,25 @@ void Worker::WriteTimeline( FileWrite& f, const Vector& vec, int64_t& for( auto& v : vec ) { WriteTimeOffset( f, refTime, v->CpuStart() ); - WriteTimeOffset( f, refGpuTime, v->gpuStart ); + WriteTimeOffset( f, refGpuTime, v->GpuStart() ); const int16_t srcloc = v->SrcLoc(); f.Write( &srcloc, sizeof( srcloc ) ); f.Write( &v->callstack, sizeof( v->callstack ) ); const uint16_t thread = v->Thread(); f.Write( &thread, sizeof( thread ) ); - if( v->child < 0 ) + if( v->Child() < 0 ) { sz = 0; f.Write( &sz, sizeof( sz ) ); } else { - WriteTimeline( f, GetGpuChildren( v->child ), refTime, refGpuTime ); + WriteTimeline( f, GetGpuChildren( v->Child() ), refTime, refGpuTime ); } WriteTimeOffset( f, refTime, v->CpuEnd() ); - WriteTimeOffset( f, refGpuTime, v->gpuEnd ); + WriteTimeOffset( f, refGpuTime, v->GpuEnd() ); } } diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 3d775c37..a6cfa3b3 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -347,7 +347,7 @@ public: int64_t GetZoneEnd( const ZoneEvent& ev ); int64_t GetZoneEnd( const GpuEvent& ev ); static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.End() >= 0 ? ev.End() : ev.Start(); } - static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.gpuEnd >= 0 ? ev.gpuEnd : ev.gpuStart; } + static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : ev.GpuStart(); } const char* GetString( uint64_t ptr ) const; const char* GetString( const StringRef& ref ) const;