#include "tracy_robin_hood.h" #include "TracyCharUtil.hpp" #include "TracySourceTokenizer.hpp" namespace tracy { namespace { static unordered_flat_set GetKeywords() { unordered_flat_set ret; for( auto& v : { "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", "bitand", "bitor", "break", "case", "catch", "class", "compl", "concept", "const", "consteval", "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", "default", "delete", "do", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "for", "friend", "if", "inline", "mutable", "namespace", "new", "noexcept", "not", "not_eq", "operator", "or", "or_eq", "private", "protected", "public", "reflexpr", "register", "reinterpret_cast", "return", "requires", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", "synchronized", "template", "thread_local", "throw", "try", "typedef", "typeid", "typename", "union", "using", "virtual", "volatile", "while", "xor", "xor_eq", "override", "final", "import", "module", "transaction_safe", "transaction_safe_dynamic" } ) { ret.insert( v ); } return ret; } static unordered_flat_set GetTypes() { unordered_flat_set ret; for( auto& v : { "bool", "char", "char8_t", "char16_t", "char32_t", "double", "float", "int", "long", "short", "signed", "unsigned", "void", "wchar_t", "size_t", "int8_t", "int16_t", "int32_t", "int64_t", "int_fast8_t", "int_fast16_t", "int_fast32_t", "int_fast64_t", "int_least8_t", "int_least16_t", "int_least32_t", "int_least64_t", "intmax_t", "intptr_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "uint_fast8_t", "uint_fast16_t", "uint_fast32_t", "uint_fast64_t", "uint_least8_t", "uint_least16_t", "uint_least32_t", "uint_least64_t", "uintmax_t", "uintptr_t", "type_info", "bad_typeid", "bad_cast", "type_index", "clock_t", "time_t", "tm", "timespec", "ptrdiff_t", "nullptr_t", "max_align_t", "auto", "__m64", "__m128", "__m128i", "__m128d", "__m256", "__m256i", "__m256d", "__m512", "__m512i", "__m512d", "__mmask8", "__mmask16", "__mmask32", "__mmask64", "int8x8_t", "int16x4_t", "int32x2_t", "int64x1_t", "uint8x8_t", "uint16x4_t", "uint32x2_t", "uint64x1_t", "float32x2_t", "poly8x8_t", "poly16x4_t", "int8x16_t", "int16x8_t", "int32x4_t", "int64x2_t", "uint8x16_t", "uint16x8_t", "uint32x4_t", "uint64x2_t", "float32x4_t", "poly8x16_t", "poly16x8_t", "int8x8x2_t", "int16x4x2_t", "int32x2x2_t", "int64x1x2_t", "uint8x8x2_t", "uint16x4x2_t", "uint32x2x2_t", "uint64x1x2_t", "float32x2x2_t", "poly8x8x2_t", "poly16x4x2_t", "int8x16x2_t", "int16x8x2_t", "int32x4x2_t", "int64x2x2_t", "uint8x16x2_t", "uint16x8x2_t", "uint32x4x2_t", "uint64x2x2_t", "float32x4x2_t", "poly8x16x2_t", "poly16x8x2_t", "int8x8x3_t", "int16x4x3_t", "int32x2x3_t", "int64x1x3_t", "uint8x8x3_t", "uint16x4x3_t", "uint32x2x3_t", "uint64x1x3_t", "float32x2x3_t", "poly8x8x3_t", "poly16x4x3_t", "int8x16x3_t", "int16x8x3_t", "int32x4x3_t", "int64x2x3_t", "uint8x16x3_t", "uint16x8x3_t", "uint32x4x3_t", "uint64x2x3_t", "float32x4x3_t", "poly8x16x3_t", "poly16x8x3_t", "int8x8x4_t", "int16x4x4_t", "int32x2x4_t", "int64x1x4_t", "uint8x8x4_t", "uint16x4x4_t", "uint32x2x4_t", "uint64x1x4_t", "float32x2x4_t", "poly8x8x4_t", "poly16x4x4_t", "int8x16x4_t", "int16x8x4_t", "int32x4x4_t", "int64x2x4_t", "uint8x16x4_t", "uint16x8x4_t", "uint32x4x4_t", "uint64x2x4_t", "float32x4x4_t", "poly8x16x4_t", "poly16x8x4_t" } ) { ret.insert( v ); } return ret; } static unordered_flat_set GetSpecial() { unordered_flat_set ret; for( auto& v : { "this", "nullptr", "true", "false", "goto", "NULL" } ) { ret.insert( v ); } return ret; } } static unordered_flat_set GetAsmRegs() { unordered_flat_set ret; for( auto& v : { // X86 "invalid", "rflags", "rip", "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", "ah", "bh", "ch", "dh", "SIL", "DIL", "BPL", "SPL", "al", "bl", "cl", "dl", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", // ARM "apsr", "apsr_nzcv", "cpsr", "fpexc", "fpinst", "fpscr", "fpscr_nzcv", "fpsid", "itstate", "lr", "pc", "sp", "spsr", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "fpinst2", "mvfr0", "mvfr1", "mvfr2", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" }) { ret.insert( v ); } return ret; } static unordered_flat_set GetAsmSizeDirectives() { unordered_flat_set ret; for( auto& v : { "byte", "word", "dword", "qword", "xmmword", "ymmword" }) { ret.insert( v ); } return ret; } Tokenizer::Tokenizer() : m_isInComment( false ) , m_isInPreprocessor( false ) { } std::vector Tokenizer::Tokenize( const char* begin, const char* end ) { std::vector ret; if( m_isInPreprocessor ) { if( begin == end ) { m_isInPreprocessor = false; return ret; } if( *(end-1) != '\\' ) m_isInPreprocessor = false; ret.emplace_back( Token { begin, end, TokenColor::Preprocessor } ); return ret; } const bool first = !m_isInComment; while( begin != end ) { if( m_isInComment ) { const auto pos = begin; for(;;) { while( begin != end && *begin != '*' ) begin++; begin++; if( begin < end ) { if( *begin == '/' ) { begin++; ret.emplace_back( Token { pos, begin, TokenColor::Comment } ); m_isInComment = false; break; } } else { ret.emplace_back( Token { pos, end, TokenColor::Comment } ); return ret; } } } else { while( begin != end && isspace( (uint8_t)*begin ) ) begin++; if( first && begin < end && *begin == '#' ) { if( *(end-1) == '\\' ) m_isInPreprocessor = true; ret.emplace_back( Token { begin, end, TokenColor::Preprocessor } ); return ret; } const auto pos = begin; const auto col = IdentifyToken( begin, end ); ret.emplace_back( Token { pos, begin, col } ); } } return ret; } static bool TokenizeNumber( const char*& begin, const char* end ) { const bool startNum = *begin >= '0' && *begin <= '9'; if( *begin != '+' && *begin != '-' && !startNum ) return false; begin++; bool hasNum = startNum; while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || *begin == '\'' ) ) { hasNum = true; begin++; } if( !hasNum ) return false; bool isFloat = false, isBinary = false; if( begin < end ) { if( *begin == '.' ) { isFloat = true; begin++; while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || *begin == '\'' ) ) begin++; } else if( *begin == 'x' || *begin == 'X' ) { // hexadecimal begin++; while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || ( *begin >= 'a' && *begin <= 'f' ) || ( *begin >= 'A' && *begin <= 'F' ) || *begin == '\'' ) ) begin++; } else if( *begin == 'b' || *begin == 'B' ) { isBinary = true; begin++; while( begin < end && ( ( *begin == '0' || *begin == '1' ) || *begin == '\'' ) ) begin++; } } if( !isBinary ) { if( begin < end && ( *begin == 'e' || *begin == 'E' || *begin == 'p' || *begin == 'P' ) ) { isFloat = true; begin++; if( begin < end && ( *begin == '+' || *begin == '-' ) ) begin++; bool hasDigits = false; while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || ( *begin >= 'a' && *begin <= 'f' ) || ( *begin >= 'A' && *begin <= 'F' ) || *begin == '\'' ) ) { hasDigits = true; begin++; } if( !hasDigits ) return false; } if( begin < end && ( *begin == 'f' || *begin == 'F' || *begin == 'l' || *begin == 'L' ) ) begin++; } if( !isFloat ) { while( begin < end && ( *begin == 'u' || *begin == 'U' || *begin == 'l' || *begin == 'L' ) ) begin++; } return true; } Tokenizer::TokenColor Tokenizer::IdentifyToken( const char*& begin, const char* end ) { static const auto s_keywords = GetKeywords(); static const auto s_types = GetTypes(); static const auto s_special = GetSpecial(); if( *begin == '"' ) { begin++; while( begin < end ) { if( *begin == '"' ) { begin++; break; } begin += 1 + ( *begin == '\\' && end - begin > 1 && *(begin+1) == '"' ); } return TokenColor::String; } if( *begin == '\'' ) { begin++; if( begin < end && *begin == '\\' ) begin++; if( begin < end ) begin++; if( begin < end && *begin == '\'' ) begin++; return TokenColor::CharacterLiteral; } if( ( *begin >= 'a' && *begin <= 'z' ) || ( *begin >= 'A' && *begin <= 'Z' ) || *begin == '_' ) { const char* tmp = begin; begin++; while( begin < end && ( ( *begin >= 'a' && *begin <= 'z' ) || ( *begin >= 'A' && *begin <= 'Z' ) || ( *begin >= '0' && *begin <= '9' ) || *begin == '_' ) ) begin++; if( begin - tmp <= 24 ) { char buf[25]; memcpy( buf, tmp, begin-tmp ); buf[begin-tmp] = '\0'; if( s_keywords.find( buf ) != s_keywords.end() ) return TokenColor::Keyword; if( s_types.find( buf ) != s_types.end() ) return TokenColor::Type; if( s_special.find( buf ) != s_special.end() ) return TokenColor::Special; } return TokenColor::Default; } const char* tmp = begin; if( TokenizeNumber( begin, end ) ) return TokenColor::Number; begin = tmp; if( *begin == '/' && end - begin > 1 ) { if( *(begin+1) == '/' ) { begin = end; return TokenColor::Comment; } if( *(begin+1) == '*' ) { begin += 2; for(;;) { while( begin < end && *begin != '*' ) begin++; if( begin == end ) { m_isInComment = true; return TokenColor::Comment; } begin++; if( begin < end && *begin == '/' ) { begin++; return TokenColor::Comment; } } } } while( begin < end ) { switch( *begin ) { case '[': case ']': case '{': case '}': case '!': case '%': case '^': case '&': case '*': case '(': case ')': case '-': case '+': case '=': case '~': case '|': case '<': case '>': case '?': case ':': case '/': case ';': case ',': case '.': begin++; break; default: goto out; } } out: if( begin != tmp ) return TokenColor::Punctuation; begin = end; return TokenColor::Default; } // Assembly Tokenizer::AsmOperand Tokenizer::TokenizeAsmOperand( const char assemblyText[160] ) { static const auto s_regs = GetAsmRegs(); static const auto s_sizes = GetAsmSizeDirectives(); Tokenizer::AsmOperand operandString; operandString.string = assemblyText; uint8_t idx = 0; char buf[160]; auto charAt = [&](int idx) -> char { return idx < operandString.string.size() ? operandString.string.at( idx ) : 0; }; auto putIntoBuf = [&](uint8_t beginIdx, uint8_t endIdx) -> void { memcpy( buf, operandString.string.data() + beginIdx, endIdx - beginIdx ); buf[endIdx - beginIdx] = '\0'; }; while ( charAt( idx ) != 0 ) { const auto beginIdx = idx; const auto curChar = charAt(idx); idx++; // Lexeme, could be a register, directive or a `nop` operand if( std::isalpha( curChar ) ) { while( std::isalpha( charAt( idx ) ) || std::isdigit( charAt( idx ) ) ) { idx++; } putIntoBuf( beginIdx, idx ); if( s_regs.find( buf ) != s_regs.end() ) { operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Register } ); } else if ( s_sizes.find( buf ) != s_sizes.end() ) { if (operandString.string.substr( idx, 4 ) == " ptr") { idx += 4; } operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::SizeDirective } ); } else { // specialized NOP's can reach here. operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Default } ); } } else if( std::isdigit( curChar ) ){ while( std::isalpha( charAt( idx ) ) || std::isdigit( charAt( idx ) ) ) { idx++; } operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Literal } ); } else { // is space, or isn't a digit or alpha numerical num while ( charAt( idx ) != 0 && (std::isspace( charAt( idx ) ) || !(std::isdigit( charAt( idx ) ) || std::isalpha( charAt( idx) ) ))) { idx++; } operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Default } ); } } return operandString; } }