1
0
mirror of https://github.com/wolfpld/tracy.git synced 2025-03-20 07:40:02 +08:00

Set precise_ip to 0 for cache on Intel.

Fuck knows how this is supposed to work. perf_event_open() opens the
descriptor successfully, but it produces no samples, if precise_ip is not 0.
There are no such problems on ARM (where precise_ip is 3, but maybe it is not
supported at all on that architecture, again, fuck knows if), and on AMD
perf_event_open() does not succeed when precise_ip > 0.
This commit is contained in:
Bartosz Taudul 2021-05-23 19:45:13 +02:00
parent b2d5fe8e1f
commit 233a0bb6d6
No known key found for this signature in database
GPG Key ID: B7FE2008B7575DF3

View File

@ -717,6 +717,21 @@ static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, uns
TracyDebug( " Probed precise_ip: %i\n", pe.precise_ip );
}
static bool IsGenuineIntel()
{
#if defined __i386 || defined __x86_64__
uint32_t regs[4];
__get_cpuid( 0, regs, regs+1, regs+2, regs+3 );
char manufacturer[12];
memcpy( manufacturer, regs+1, 4 );
memcpy( manufacturer+4, regs+3, 4 );
memcpy( manufacturer+8, regs+2, 4 );
return memcmp( manufacturer, "GenuineIntel", 12 ) == 0;
#else
return false;
#endif
}
static void SetupSampling( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
@ -828,6 +843,11 @@ static void SetupSampling( int64_t& samplingPeriod )
{
TracyDebug( "Setup sampling CPU cache references + misses\n" );
ProbePreciseIp( pe, PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_CACHE_MISSES, currentPid );
if( IsGenuineIntel() )
{
pe.precise_ip = 0;
TracyDebug( " CPU is GenuineIntel, forcing precise_ip down to 0\n" );
}
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, currentPid, i, -1, PERF_FLAG_FD_CLOEXEC );