From 5741bcfd324dc8f999d02367f0bede2a49ae8a25 Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Tue, 21 Dec 2021 15:48:40 +0100 Subject: [PATCH] Make ring buffer size adjustable. If call stack capture is enabled for context switch data, the 64KB buffer is too small to work without overruns. However, if the default buffer size is increased, then the maximum locked memory limit is hit. This change keeps the small buffer size for all the buffers that may be used without escalated privileges. The context switch buffer is bigger, but it does not need to obey the limits, as the application is running as root, if it is to be used. --- client/TracyRingBuffer.hpp | 25 ++++++++++++++----------- client/TracySysTrace.cpp | 29 ++++++++++++++--------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/client/TracyRingBuffer.hpp b/client/TracyRingBuffer.hpp index 4aac63df..af9802b7 100644 --- a/client/TracyRingBuffer.hpp +++ b/client/TracyRingBuffer.hpp @@ -3,19 +3,19 @@ namespace tracy { -template class RingBuffer { public: - RingBuffer( int fd, int id, int cpu = -1 ) - : m_id( id ) + RingBuffer( int size, int fd, int id, int cpu = -1 ) + : m_size( size ) + , m_id( id ) , m_cpu( cpu ) , m_fd( fd ) { const auto pageSize = uint32_t( getpagesize() ); - assert( Size >= pageSize ); - assert( __builtin_popcount( Size ) == 1 ); - m_mapSize = Size + pageSize; + assert( size >= pageSize ); + assert( __builtin_popcount( size ) == 1 ); + m_mapSize = size + pageSize; auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 ); if( mapAddr == MAP_FAILED ) { @@ -66,16 +66,18 @@ public: void Read( void* dst, uint64_t offset, uint64_t cnt ) { - auto src = ( m_tail + offset ) % Size; - if( src + cnt <= Size ) + const auto size = m_size; + auto src = ( m_tail + offset ) % size; + if( src + cnt <= size ) { memcpy( dst, m_buffer + src, cnt ); } else { - const auto s0 = Size - src; - memcpy( dst, m_buffer + src, s0 ); - memcpy( (char*)dst + s0, m_buffer, cnt - s0 ); + const auto s0 = size - src; + const auto buf = m_buffer; + memcpy( dst, buf + src, s0 ); + memcpy( (char*)dst + s0, buf, cnt - s0 ); } } @@ -115,6 +117,7 @@ private: std::atomic_store_explicit( (volatile std::atomic*)&m_metadata->data_tail, m_tail, std::memory_order_release ); } + int m_size; uint64_t m_tail; char* m_buffer; int m_id; diff --git a/client/TracySysTrace.cpp b/client/TracySysTrace.cpp index 109897ed..975bc9f7 100644 --- a/client/TracySysTrace.cpp +++ b/client/TracySysTrace.cpp @@ -649,8 +649,7 @@ static int s_numCpus = 0; static int s_numBuffers = 0; static int s_ctxBufferIdx = 0; -static constexpr size_t RingBufSize = 256*1024; -static RingBuffer* s_ring = nullptr; +static RingBuffer* s_ring = nullptr; static const int ThreadHashSize = 4 * 1024; static uint32_t s_threadHash[ThreadHashSize] = {}; @@ -890,7 +889,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) 2 // context switches + wakeups ); s_numCpus = (int)std::thread::hardware_concurrency(); - s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); + s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); s_numBuffers = 0; // software sampling @@ -923,13 +922,13 @@ bool SysTraceStart( int64_t& samplingPeriod ) fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd == -1 ) { - for( int j=0; j(); + for( int j=0; j( fd, EventCallstack ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCallstack ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -960,7 +959,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventCpuCycles ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCpuCycles ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -972,7 +971,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventInstructionsRetired ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventInstructionsRetired ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -994,7 +993,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventCacheReference ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheReference ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -1006,7 +1005,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventCacheMiss ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventCacheMiss ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -1023,7 +1022,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventBranchRetired ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchRetired ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -1035,7 +1034,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventBranchMiss ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventBranchMiss ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -1069,7 +1068,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventContextSwitch, i ); + new( s_ring+s_numBuffers ) RingBuffer( 256*1024, fd, EventContextSwitch, i ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -1086,7 +1085,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const int fd = perf_event_open( &pe, -1, i, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { - new( s_ring+s_numBuffers ) RingBuffer( fd, EventWakeup, i ); + new( s_ring+s_numBuffers ) RingBuffer( 64*1024, fd, EventWakeup, i ); s_numBuffers++; TracyDebug( " Core %i ok\n", i ); } @@ -1105,7 +1104,7 @@ void SysTraceStop() traceActive.store( false, std::memory_order_relaxed ); } -static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer& ring, uint64_t offset ) +static uint64_t* GetCallstackBlock( uint64_t cnt, RingBuffer& ring, uint64_t offset ) { auto trace = (uint64_t*)tracy_malloc_fast( ( 1 + cnt ) * sizeof( uint64_t ) ); ring.Read( trace+1, offset, sizeof( uint64_t ) * cnt ); @@ -1459,7 +1458,7 @@ void SysTraceWorker( void* ptr ) } } - for( int i=0; i(); + for( int i=0; i