1
0
mirror of https://github.com/wolfpld/tracy.git synced 2025-03-20 07:40:02 +08:00

Add basic assembly tokenizer, provides simple syntax colouring for assembly in the source view

This commit is contained in:
simplyWiri 2022-08-29 19:54:51 +10:00
parent 107975c8de
commit 8179aef323
5 changed files with 182 additions and 49 deletions

View File

@ -53,6 +53,24 @@ static constexpr const ImVec4 SyntaxColorsDimmed[] = {
{ 0.21f, 0.69f, 0.89f, 0.6f }, // special
};
static constexpr const ImVec4 AsmSyntaxColors[] = {
{ 0.25f, 0.52f, 0.96f, 1 }, // mnemonic
{ 0.64f, 0.82f, 1, 1 }, // label
{ 0.7f, 0.7f, 0.7f, 1 }, // default ('[', '+', '*', ',')
{ 0.25f, 0.74f, 0.38f, 1 }, // dword/xmmword 'ptr'
{ 0.78f, 0.46f, 0.75f, 1 }, // register
{ 0.81f, 0.6f, 0.91f, 1 }, // literal
};
static constexpr const ImVec4 AsmSyntaxColorsDimmed[] = {
{ 0.25f, 0.52f, 0.96f, 0.6f }, // mnemonic
{ 0.64f, 0.82f, 1, 0.6f }, // label
{ 0.7f, 0.7f, 0.7f, 0.6f }, // default ('[', '+', '*', ',')
{ 0.25f, 0.74f, 0.38f, 0.6f }, // dword/xmmword 'ptr'
{ 0.78f, 0.46f, 0.75f, 0.6f }, // register
{ 0.81f, 0.6f, 0.91f, 0.6f }, // literal
};
[[maybe_unused]] static inline float GetScale()
{

View File

@ -75,6 +75,50 @@ static unordered_flat_set<const char*, charutil::Hasher, charutil::Comparator> G
}
}
static unordered_flat_set<const char*, charutil::Hasher, charutil::Comparator> GetAsmRegs()
{
unordered_flat_set<const char*, charutil::Hasher, charutil::Comparator> ret;
for( auto& v : {
// X86
"invalid", "rflags", "rip",
"rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
"ax", "bx", "cx", "dx", "si", "di", "bp", "sp",
"ah", "bh", "ch", "dh", "SIL", "DIL", "BPL", "SPL",
"al", "bl", "cl", "dl",
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9",
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm16", "xmm17", "xmm18", "xmm19",
"xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29",
"xmm30", "xmm31", "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7",
// ARM
"apsr", "apsr_nzcv", "cpsr", "fpexc", "fpinst", "fpscr", "fpscr_nzcv", "fpsid", "itstate",
"lr", "pc", "sp", "spsr", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10",
"d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "fpinst2", "mvfr0", "mvfr1", "mvfr2",
"q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14",
"q15", "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "s0",
"s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29",
"s30", "s31" })
{
ret.insert( v );
}
return ret;
}
static unordered_flat_set<const char*, charutil::Hasher, charutil::Comparator> GetAsmSizeDirectives()
{
unordered_flat_set<const char*, charutil::Hasher, charutil::Comparator> ret;
for( auto& v : { "byte", "word", "dword", "qword", "xmmword", "ymmword" })
{
ret.insert( v );
}
return ret;
}
Tokenizer::Tokenizer()
: m_isInComment( false )
, m_isInPreprocessor( false )
@ -310,5 +354,65 @@ out:
begin = end;
return TokenColor::Default;
}
// Assembly
Tokenizer::AsmOperand Tokenizer::TokenizeAsmOperand( const char assemblyText[160] ) {
static const auto s_regs = GetAsmRegs();
static const auto s_sizes = GetAsmSizeDirectives();
Tokenizer::AsmOperand operandString;
operandString.string = assemblyText;
uint8_t idx = 0;
char buf[160];
auto charAt = [&](int idx) -> char {
return idx < operandString.string.size() ? operandString.string.at( idx ) : 0;
};
auto putIntoBuf = [&](uint8_t beginIdx, uint8_t endIdx) -> void {
memcpy( buf, operandString.string.data() + beginIdx, endIdx - beginIdx );
buf[endIdx - beginIdx] = '\0';
};
while ( charAt( idx ) != 0 ) {
const auto beginIdx = idx;
const auto curChar = charAt(idx);
idx++;
// Lexeme, could be a register, directive or a `nop` operand
if( std::isalpha( curChar ) ) {
while( std::isalpha( charAt( idx ) ) || std::isdigit( charAt( idx ) ) ) {
idx++;
}
putIntoBuf( beginIdx, idx );
if( s_regs.find( buf ) != s_regs.end() ) {
operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Register } );
} else if ( s_sizes.find( buf ) != s_sizes.end() ) {
if (operandString.string.substr( idx, 4 ) == " ptr") {
idx += 4;
}
operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::SizeDirective } );
} else {
// specialized NOP's can reach here.
operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Default } );
}
} else if( std::isdigit( curChar ) ){
while( std::isalpha( charAt( idx ) ) || std::isdigit( charAt( idx ) ) ) {
idx++;
}
operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Literal } );
} else {
// is space, or isn't a digit or alpha numerical num
while ( charAt( idx ) != 0 && (std::isspace( charAt( idx ) ) || !(std::isdigit( charAt( idx ) ) || std::isalpha( charAt( idx) ) ))) {
idx++;
}
operandString.tokens.emplace_back( AsmToken{ beginIdx, idx, AsmTokenColor::Default } );
}
}
return operandString;
}
}

View File

@ -2,7 +2,9 @@
#define __TRACYSOURCETOKENIZER_HPP__
#include <stdint.h>
#include <cstdint>
#include <vector>
#include <string>
namespace tracy
{
@ -24,6 +26,16 @@ public:
Special
};
enum class AsmTokenColor : uint8_t
{
Mnemonic, // no-op, padding
Label, // no-op, padding
Default, // '+', '[', '*', etc
SizeDirective, // byte, word, dword, etc
Register, // rax, rip, etc
Literal, // 0x04, etc
};
struct Token
{
const char* begin;
@ -31,6 +43,13 @@ public:
TokenColor color;
};
struct AsmToken
{
uint8_t beginIdx;
uint8_t endIdx;
AsmTokenColor color;
};
struct Line
{
const char* begin;
@ -38,9 +57,16 @@ public:
std::vector<Token> tokens;
};
struct AsmOperand
{
std::string string;
std::vector<AsmToken> tokens;
};
Tokenizer();
std::vector<Token> Tokenize( const char* begin, const char* end );
AsmOperand TokenizeAsmOperand( const char assemblyText[160] );
private:
TokenColor IdentifyToken( const char*& begin, const char* end );

View File

@ -4,6 +4,7 @@
#include <capstone.h>
#include "TracySourceTokenizer.hpp"
#include "imgui.h"
#include "TracyCharUtil.hpp"
#include "TracyColor.hpp"
@ -875,7 +876,7 @@ bool SourceView::Disassemble( uint64_t symAddr, const Worker& worker )
}
}
}
m_asm.emplace_back( AsmLine { op.address, jumpAddr, op.mnemonic, op.op_str, (uint8_t)op.size, leaData, jumpConditional, std::move( params ) } );
m_asm.emplace_back( AsmLine { op.address, jumpAddr, op.mnemonic, m_tokenizer.TokenizeAsmOperand( op.op_str ), (uint8_t)op.size, leaData, jumpConditional, std::move( params ) } );
#if CS_API_MAJOR >= 4
auto& entry = m_asm.back();
@ -906,7 +907,6 @@ bool SourceView::Disassemble( uint64_t symAddr, const Worker& worker )
break;
}
#endif
const auto mLen = (int)strlen( op.mnemonic );
if( mLen > mLenMax ) mLenMax = mLen;
if( op.size > bytesMax ) bytesMax = op.size;
@ -3843,68 +3843,53 @@ void SourceView::RenderAsmLine( AsmLine& line, const AddrStat& ipcnt, const Addr
memcpy( buf, line.mnemonic.c_str(), msz );
memset( buf+msz, ' ', m_maxMnemonicLen-msz );
bool hasJump = false;
int jumpLabel = 0;
if( line.jumpAddr != 0 )
{
auto lit = m_locMap.find( line.jumpAddr );
if( lit != m_locMap.end() )
{
jumpLabel = lit->second;
char tmp[64];
sprintf( tmp, ".L%" PRIu32, lit->second );
sprintf( tmp, ".L%" PRIu32, jumpLabel );
strcpy( buf+m_maxMnemonicLen, tmp );
hasJump = true;
}
}
if( !hasJump )
{
memcpy( buf+m_maxMnemonicLen, line.operands.c_str(), line.operands.size() + 1 );
memcpy( buf+m_maxMnemonicLen, line.operands.string.c_str(), line.operands.string.size() + 1 );
}
const bool isInContext = IsInContext( worker, line.addr );
if( asmIdx == m_asmSelected )
{
TextColoredUnformatted( ImVec4( 1, 0.25f, 0.25f, isInContext ? 1.f : 0.5f ), buf );
const auto selected = m_asmSelected == asmIdx;
auto transitiveDependency = false;
for(int i = 0; line.regData[i] != 0; i++) {
transitiveDependency |= line.regData[i] & ( WriteBit | ReadBit );
}
else if( line.regData[0] != 0 )
{
bool hasDepencency = false;
int idx = 0;
for(;;)
{
if( line.regData[idx] == 0 ) break;
if( line.regData[idx] & ( WriteBit | ReadBit ) )
{
hasDepencency = true;
break;
}
idx++;
}
if( hasDepencency )
{
TextColoredUnformatted( ImVec4( 1, 0.5f, 1, isInContext ? 1.f : 0.5f ), buf );
}
else
{
if( isInContext )
{
ImGui::TextUnformatted( buf );
}
else
{
TextDisabledUnformatted( buf );
}
}
}
else
{
if( isInContext )
{
ImGui::TextUnformatted( buf );
}
else
{
TextDisabledUnformatted( buf );
const auto shouldFocus = (isInContext && m_asmSelected == -1) || selected || transitiveDependency;
const auto& colorPalette = shouldFocus ? AsmSyntaxColors : AsmSyntaxColorsDimmed;
ImGui::BeginGroup();
TextColoredUnformatted( colorPalette[(int)Tokenizer::AsmTokenColor::Mnemonic], buf, buf + m_maxMnemonicLen );
if ( hasJump ) {
char tmp[64];
sprintf( tmp, ".L%" PRIu32, jumpLabel );
ImGui::SameLine();
TextColoredUnformatted( colorPalette[(int)Tokenizer::AsmTokenColor::Label], tmp );
} else {
for (const auto& token : line.operands.tokens) {
auto* begin = line.operands.string.c_str() + token.beginIdx;
auto* end = line.operands.string.c_str() + token.endIdx;
ImGui::SameLine();
TextColoredUnformatted( colorPalette[(int)token.color], begin, end );
}
}
ImGui::EndGroup();
uint32_t jumpOffset;
uint64_t jumpBase;
@ -5286,13 +5271,13 @@ void SourceView::Save( const Worker& worker, size_t start, size_t stop )
}
if( !hasJump )
{
if( v.operands.empty() )
if( v.operands.string.empty() )
{
fprintf( f, "\t%s\n", v.mnemonic.c_str() );
}
else
{
fprintf( f, "\t%-*s%s\n", m_maxMnemonicLen, v.mnemonic.c_str(), v.operands.c_str() );
fprintf( f, "\t%-*s%s\n", m_maxMnemonicLen, v.mnemonic.c_str(), v.operands.string.c_str() );
}
}
}

View File

@ -82,7 +82,7 @@ private:
uint64_t addr;
uint64_t jumpAddr;
std::string mnemonic;
std::string operands;
Tokenizer::AsmOperand operands;
uint8_t len;
LeaData leaData;
bool jumpConditional;