From 8179aef32315781ce96e5b7bef588ed32282665b Mon Sep 17 00:00:00 2001 From: simplyWiri Date: Mon, 29 Aug 2022 19:54:51 +1000 Subject: [PATCH] Add basic assembly tokenizer, provides simple syntax colouring for assembly in the source view --- server/TracyImGui.hpp | 18 ++++++ server/TracySourceTokenizer.cpp | 104 ++++++++++++++++++++++++++++++++ server/TracySourceTokenizer.hpp | 26 ++++++++ server/TracySourceView.cpp | 81 ++++++++++--------------- server/TracySourceView.hpp | 2 +- 5 files changed, 182 insertions(+), 49 deletions(-) diff --git a/server/TracyImGui.hpp b/server/TracyImGui.hpp index 61430c9b..bd36c735 100644 --- a/server/TracyImGui.hpp +++ b/server/TracyImGui.hpp @@ -53,6 +53,24 @@ static constexpr const ImVec4 SyntaxColorsDimmed[] = { { 0.21f, 0.69f, 0.89f, 0.6f }, // special }; +static constexpr const ImVec4 AsmSyntaxColors[] = { + { 0.25f, 0.52f, 0.96f, 1 }, // mnemonic + { 0.64f, 0.82f, 1, 1 }, // label + { 0.7f, 0.7f, 0.7f, 1 }, // default ('[', '+', '*', ',') + { 0.25f, 0.74f, 0.38f, 1 }, // dword/xmmword 'ptr' + { 0.78f, 0.46f, 0.75f, 1 }, // register + { 0.81f, 0.6f, 0.91f, 1 }, // literal +}; + +static constexpr const ImVec4 AsmSyntaxColorsDimmed[] = { + { 0.25f, 0.52f, 0.96f, 0.6f }, // mnemonic + { 0.64f, 0.82f, 1, 0.6f }, // label + { 0.7f, 0.7f, 0.7f, 0.6f }, // default ('[', '+', '*', ',') + { 0.25f, 0.74f, 0.38f, 0.6f }, // dword/xmmword 'ptr' + { 0.78f, 0.46f, 0.75f, 0.6f }, // register + { 0.81f, 0.6f, 0.91f, 0.6f }, // literal +}; + [[maybe_unused]] static inline float GetScale() { diff --git a/server/TracySourceTokenizer.cpp b/server/TracySourceTokenizer.cpp index 2f07634d..4284a0c9 100644 --- a/server/TracySourceTokenizer.cpp +++ b/server/TracySourceTokenizer.cpp @@ -75,6 +75,50 @@ static unordered_flat_set G } } +static unordered_flat_set GetAsmRegs() +{ + unordered_flat_set ret; + + for( auto& v : { + // X86 + "invalid", "rflags", "rip", + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", + "ax", "bx", "cx", "dx", "si", "di", "bp", "sp", + "ah", "bh", "ch", "dh", "SIL", "DIL", "BPL", "SPL", + "al", "bl", "cl", "dl", + "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", + "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", "xmm17", "xmm18", "xmm19", + "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", + "xmm30", "xmm31", "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", + + // ARM + "apsr", "apsr_nzcv", "cpsr", "fpexc", "fpinst", "fpscr", "fpscr_nzcv", "fpsid", "itstate", + "lr", "pc", "sp", "spsr", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", + "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "fpinst2", "mvfr0", "mvfr1", "mvfr2", + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", + "q15", "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "s0", + "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", + "s30", "s31" }) + { + ret.insert( v ); + } + return ret; +} + +static unordered_flat_set GetAsmSizeDirectives() +{ + unordered_flat_set ret; + for( auto& v : { "byte", "word", "dword", "qword", "xmmword", "ymmword" }) + { + ret.insert( v ); + } + return ret; +} + Tokenizer::Tokenizer() : m_isInComment( false ) , m_isInPreprocessor( false ) @@ -310,5 +354,65 @@ out: begin = end; return TokenColor::Default; } + // Assembly +Tokenizer::AsmOperand Tokenizer::TokenizeAsmOperand( const char assemblyText[160] ) { + static const auto s_regs = GetAsmRegs(); + static const auto s_sizes = GetAsmSizeDirectives(); + + Tokenizer::AsmOperand operandString; + operandString.string = assemblyText; + + uint8_t idx = 0; + char buf[160]; + + auto charAt = [&](int idx) -> char { + return idx < operandString.string.size() ? operandString.string.at( idx ) : 0; + }; + auto putIntoBuf = [&](uint8_t beginIdx, uint8_t endIdx) -> void { + memcpy( buf, operandString.string.data() + beginIdx, endIdx - beginIdx ); + buf[endIdx - beginIdx] = '\0'; + }; + + while ( charAt( idx ) != 0 ) { + const auto beginIdx = idx; + const auto curChar = charAt(idx); + idx++; + // Lexeme, could be a register, directive or a `nop` operand + if( std::isalpha( curChar ) ) { + while( std::isalpha( charAt( idx ) ) || std::isdigit( charAt( idx ) ) ) { + idx++; + } + + putIntoBuf( beginIdx, idx ); + + if( s_regs.find( buf ) != s_regs.end() ) { + operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Register } ); + } else if ( s_sizes.find( buf ) != s_sizes.end() ) { + if (operandString.string.substr( idx, 4 ) == " ptr") { + idx += 4; + } + + operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::SizeDirective } ); + } else { + // specialized NOP's can reach here. + operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Default } ); + } + } else if( std::isdigit( curChar ) ){ + while( std::isalpha( charAt( idx ) ) || std::isdigit( charAt( idx ) ) ) { + idx++; + } + + operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Literal } ); + } else { + // is space, or isn't a digit or alpha numerical num + while ( charAt( idx ) != 0 && (std::isspace( charAt( idx ) ) || !(std::isdigit( charAt( idx ) ) || std::isalpha( charAt( idx) ) ))) { + idx++; + } + operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Default } ); + } + } + + return operandString; +} } diff --git a/server/TracySourceTokenizer.hpp b/server/TracySourceTokenizer.hpp index b02e0aef..e1899856 100644 --- a/server/TracySourceTokenizer.hpp +++ b/server/TracySourceTokenizer.hpp @@ -2,7 +2,9 @@ #define __TRACYSOURCETOKENIZER_HPP__ #include +#include #include +#include namespace tracy { @@ -24,6 +26,16 @@ public: Special }; + enum class AsmTokenColor : uint8_t + { + Mnemonic, // no-op, padding + Label, // no-op, padding + Default, // '+', '[', '*', etc + SizeDirective, // byte, word, dword, etc + Register, // rax, rip, etc + Literal, // 0x04, etc + }; + struct Token { const char* begin; @@ -31,6 +43,13 @@ public: TokenColor color; }; + struct AsmToken + { + uint8_t beginIdx; + uint8_t endIdx; + AsmTokenColor color; + }; + struct Line { const char* begin; @@ -38,9 +57,16 @@ public: std::vector tokens; }; + struct AsmOperand + { + std::string string; + std::vector tokens; + }; + Tokenizer(); std::vector Tokenize( const char* begin, const char* end ); + AsmOperand TokenizeAsmOperand( const char assemblyText[160] ); private: TokenColor IdentifyToken( const char*& begin, const char* end ); diff --git a/server/TracySourceView.cpp b/server/TracySourceView.cpp index 844c60d4..b00a58ea 100644 --- a/server/TracySourceView.cpp +++ b/server/TracySourceView.cpp @@ -4,6 +4,7 @@ #include +#include "TracySourceTokenizer.hpp" #include "imgui.h" #include "TracyCharUtil.hpp" #include "TracyColor.hpp" @@ -875,7 +876,7 @@ bool SourceView::Disassemble( uint64_t symAddr, const Worker& worker ) } } } - m_asm.emplace_back( AsmLine { op.address, jumpAddr, op.mnemonic, op.op_str, (uint8_t)op.size, leaData, jumpConditional, std::move( params ) } ); + m_asm.emplace_back( AsmLine { op.address, jumpAddr, op.mnemonic, m_tokenizer.TokenizeAsmOperand( op.op_str ), (uint8_t)op.size, leaData, jumpConditional, std::move( params ) } ); #if CS_API_MAJOR >= 4 auto& entry = m_asm.back(); @@ -906,7 +907,6 @@ bool SourceView::Disassemble( uint64_t symAddr, const Worker& worker ) break; } #endif - const auto mLen = (int)strlen( op.mnemonic ); if( mLen > mLenMax ) mLenMax = mLen; if( op.size > bytesMax ) bytesMax = op.size; @@ -3843,68 +3843,53 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr memcpy( buf, line.mnemonic.c_str(), msz ); memset( buf+msz, ' ', m_maxMnemonicLen-msz ); bool hasJump = false; + int jumpLabel = 0; if( line.jumpAddr != 0 ) { auto lit = m_locMap.find( line.jumpAddr ); if( lit != m_locMap.end() ) { + jumpLabel = lit->second; char tmp[64]; - sprintf( tmp, ".L%" PRIu32, lit->second ); + sprintf( tmp, ".L%" PRIu32, jumpLabel ); strcpy( buf+m_maxMnemonicLen, tmp ); hasJump = true; } } if( !hasJump ) { - memcpy( buf+m_maxMnemonicLen, line.operands.c_str(), line.operands.size() + 1 ); + memcpy( buf+m_maxMnemonicLen, line.operands.string.c_str(), line.operands.string.size() + 1 ); } const bool isInContext = IsInContext( worker, line.addr ); - if( asmIdx == m_asmSelected ) - { - TextColoredUnformatted( ImVec4( 1, 0.25f, 0.25f, isInContext ? 1.f : 0.5f ), buf ); + const auto selected = m_asmSelected == asmIdx; + auto transitiveDependency = false; + for(int i = 0; line.regData[i] != 0; i++) { + transitiveDependency |= line.regData[i] & ( WriteBit | ReadBit ); } - else if( line.regData[0] != 0 ) - { - bool hasDepencency = false; - int idx = 0; - for(;;) - { - if( line.regData[idx] == 0 ) break; - if( line.regData[idx] & ( WriteBit | ReadBit ) ) - { - hasDepencency = true; - break; - } - idx++; - } - if( hasDepencency ) - { - TextColoredUnformatted( ImVec4( 1, 0.5f, 1, isInContext ? 1.f : 0.5f ), buf ); - } - else - { - if( isInContext ) - { - ImGui::TextUnformatted( buf ); - } - else - { - TextDisabledUnformatted( buf ); - } - } - } - else - { - if( isInContext ) - { - ImGui::TextUnformatted( buf ); - } - else - { - TextDisabledUnformatted( buf ); + + const auto shouldFocus = (isInContext && m_asmSelected == -1) || selected || transitiveDependency; + const auto& colorPalette = shouldFocus ? AsmSyntaxColors : AsmSyntaxColorsDimmed; + + ImGui::BeginGroup(); + TextColoredUnformatted( colorPalette[(int)Tokenizer::AsmTokenColor::Mnemonic], buf, buf + m_maxMnemonicLen ); + + if ( hasJump ) { + char tmp[64]; + sprintf( tmp, ".L%" PRIu32, jumpLabel ); + + ImGui::SameLine(); + TextColoredUnformatted( colorPalette[(int)Tokenizer::AsmTokenColor::Label], tmp ); + } else { + for (const auto& token : line.operands.tokens) { + auto* begin = line.operands.string.c_str() + token.beginIdx; + auto* end = line.operands.string.c_str() + token.endIdx; + ImGui::SameLine(); + TextColoredUnformatted( colorPalette[(int)token.color], begin, end ); } } + ImGui::EndGroup(); + uint32_t jumpOffset; uint64_t jumpBase; @@ -5286,13 +5271,13 @@ void SourceView::Save( const Worker& worker, size_t start, size_t stop ) } if( !hasJump ) { - if( v.operands.empty() ) + if( v.operands.string.empty() ) { fprintf( f, "\t%s\n", v.mnemonic.c_str() ); } else { - fprintf( f, "\t%-*s%s\n", m_maxMnemonicLen, v.mnemonic.c_str(), v.operands.c_str() ); + fprintf( f, "\t%-*s%s\n", m_maxMnemonicLen, v.mnemonic.c_str(), v.operands.string.c_str() ); } } } diff --git a/server/TracySourceView.hpp b/server/TracySourceView.hpp index 162305d6..d5e529c9 100644 --- a/server/TracySourceView.hpp +++ b/server/TracySourceView.hpp @@ -82,7 +82,7 @@ private: uint64_t addr; uint64_t jumpAddr; std::string mnemonic; - std::string operands; + Tokenizer::AsmOperand operands; uint8_t len; LeaData leaData; bool jumpConditional;