| //===-- DisassemblerLLVM.cpp ------------------------------------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "DisassemblerLLVM.h" |
| |
| #include "llvm-c/EnhancedDisassembly.h" |
| #include "llvm/Support/TargetSelect.h" |
| |
| #include "lldb/Core/Address.h" |
| #include "lldb/Core/DataExtractor.h" |
| #include "lldb/Core/Disassembler.h" |
| #include "lldb/Core/Module.h" |
| #include "lldb/Core/PluginManager.h" |
| #include "lldb/Core/Stream.h" |
| #include "lldb/Core/StreamString.h" |
| #include "lldb/Symbol/SymbolContext.h" |
| |
| #include "lldb/Target/ExecutionContext.h" |
| #include "lldb/Target/Process.h" |
| #include "lldb/Target/RegisterContext.h" |
| #include "lldb/Target/Target.h" |
| |
| #include <assert.h> |
| |
| using namespace lldb; |
| using namespace lldb_private; |
| |
| |
| static int |
| DataExtractorByteReader (uint8_t *byte, uint64_t address, void *arg) |
| { |
| DataExtractor &extractor = *((DataExtractor *)arg); |
| |
| if (extractor.ValidOffset(address)) |
| { |
| *byte = *(extractor.GetDataStart() + address); |
| return 0; |
| } |
| else |
| { |
| return -1; |
| } |
| } |
| |
| namespace { |
| struct RegisterReaderArg { |
| const lldb::addr_t instructionPointer; |
| const EDDisassemblerRef disassembler; |
| |
| RegisterReaderArg(lldb::addr_t ip, |
| EDDisassemblerRef dis) : |
| instructionPointer(ip), |
| disassembler(dis) |
| { |
| } |
| }; |
| } |
| |
| static int IPRegisterReader(uint64_t *value, unsigned regID, void* arg) |
| { |
| uint64_t instructionPointer = ((RegisterReaderArg*)arg)->instructionPointer; |
| EDDisassemblerRef disassembler = ((RegisterReaderArg*)arg)->disassembler; |
| |
| if (EDRegisterIsProgramCounter(disassembler, regID)) { |
| *value = instructionPointer; |
| return 0; |
| } |
| |
| return -1; |
| } |
| |
| InstructionLLVM::InstructionLLVM (const Address &addr, |
| AddressClass addr_class, |
| EDDisassemblerRef disassembler, |
| llvm::Triple::ArchType arch_type) : |
| Instruction (addr, addr_class), |
| m_disassembler (disassembler), |
| m_inst (NULL), |
| m_arch_type (arch_type) |
| { |
| } |
| |
| InstructionLLVM::~InstructionLLVM() |
| { |
| if (m_inst) |
| { |
| EDReleaseInst(m_inst); |
| m_inst = NULL; |
| } |
| } |
| |
| static void |
| PadString(Stream *s, const std::string &str, size_t width) |
| { |
| int diff = width - str.length(); |
| |
| if (diff > 0) |
| s->Printf("%s%*.*s", str.c_str(), diff, diff, ""); |
| else |
| s->Printf("%s ", str.c_str()); |
| } |
| static void |
| AddSymbolicInfo (const ExecutionContext *exe_ctx, |
| StreamString &comment, |
| uint64_t operand_value, |
| const Address &inst_addr) |
| { |
| Address so_addr; |
| Target *target = NULL; |
| if (exe_ctx) |
| target = exe_ctx->GetTargetPtr(); |
| if (target && !target->GetSectionLoadList().IsEmpty()) |
| { |
| if (target->GetSectionLoadList().ResolveLoadAddress(operand_value, so_addr)) |
| so_addr.Dump (&comment, |
| exe_ctx ? exe_ctx->GetBestExecutionContextScope() : NULL, |
| Address::DumpStyleResolvedDescriptionNoModule, |
| Address::DumpStyleSectionNameOffset); |
| } |
| else |
| { |
| ModuleSP module_sp (inst_addr.GetModule()); |
| if (module_sp) |
| { |
| if (module_sp->ResolveFileAddress(operand_value, so_addr)) |
| so_addr.Dump (&comment, |
| exe_ctx ? exe_ctx->GetBestExecutionContextScope() : NULL, |
| Address::DumpStyleResolvedDescriptionNoModule, |
| Address::DumpStyleSectionNameOffset); |
| } |
| } |
| } |
| |
| #include "llvm/ADT/StringRef.h" |
| static inline void StripSpaces(llvm::StringRef &Str) |
| { |
| while (!Str.empty() && isspace(Str[0])) |
| Str = Str.substr(1); |
| while (!Str.empty() && isspace(Str.back())) |
| Str = Str.substr(0, Str.size()-1); |
| } |
| static inline void RStrip(llvm::StringRef &Str, char c) |
| { |
| if (!Str.empty() && Str.back() == c) |
| Str = Str.substr(0, Str.size()-1); |
| } |
| // Aligns the raw disassembly (passed as 'str') with the rest of edis'ed disassembly output. |
| // This is called from non-raw mode when edis of the current m_inst fails for some reason. |
| static void |
| Align(Stream *s, const char *str, size_t opcodeColWidth, size_t operandColWidth) |
| { |
| llvm::StringRef raw_disasm(str); |
| StripSpaces(raw_disasm); |
| // Split the raw disassembly into opcode and operands. |
| std::pair<llvm::StringRef, llvm::StringRef> p = raw_disasm.split('\t'); |
| PadString(s, p.first, opcodeColWidth); |
| if (!p.second.empty()) |
| PadString(s, p.second, operandColWidth); |
| } |
| |
| #define AlignPC(pc_val) (pc_val & 0xFFFFFFFC) |
| |
| void |
| InstructionLLVM::CalculateMnemonicOperandsAndComment (const ExecutionContext* exe_ctx) |
| { |
| const int num_tokens = EDNumTokens(m_inst); |
| if (num_tokens > 0) |
| { |
| const char *token_cstr = NULL; |
| int currentOpIndex = -1; |
| StreamString comment; |
| uint32_t addr_nibble_size = 8; |
| addr_t base_addr = LLDB_INVALID_ADDRESS; |
| Target *target = exe_ctx ? exe_ctx->GetTargetPtr() : NULL; |
| if (target && !target->GetSectionLoadList().IsEmpty()) |
| base_addr = GetAddress().GetLoadAddress (target); |
| if (base_addr == LLDB_INVALID_ADDRESS) |
| base_addr = GetAddress().GetFileAddress (); |
| addr_nibble_size = target->GetArchitecture().GetAddressByteSize() * 2; |
| |
| lldb::addr_t PC = base_addr + EDInstByteSize(m_inst); |
| |
| // When executing an ARM instruction, PC reads as the address of the |
| // current instruction plus 8. And for Thumb, it is plus 4. |
| if (m_arch_type == llvm::Triple::arm) |
| PC = base_addr + 8; |
| else if (m_arch_type == llvm::Triple::thumb) |
| PC = base_addr + 4; |
| |
| RegisterReaderArg rra(PC, m_disassembler); |
| |
| for (int token_idx = 0; token_idx < num_tokens; ++token_idx) |
| { |
| EDTokenRef token; |
| if (EDGetToken(&token, m_inst, token_idx)) |
| break; |
| |
| if (EDTokenIsOpcode(token) == 1) |
| { |
| if (EDGetTokenString(&token_cstr, token) == 0) // 0 on success |
| { |
| if (token_cstr) |
| m_opcode_name.assign(token_cstr); |
| } |
| } |
| else |
| { |
| int operandIndex = EDOperandIndexForToken(token); |
| |
| if (operandIndex >= 0) |
| { |
| if (operandIndex != currentOpIndex) |
| { |
| currentOpIndex = operandIndex; |
| EDOperandRef operand; |
| |
| if (!EDGetOperand(&operand, m_inst, currentOpIndex)) |
| { |
| if (EDOperandIsMemory(operand)) |
| { |
| uint64_t operand_value; |
| |
| if (!EDEvaluateOperand(&operand_value, operand, IPRegisterReader, &rra)) |
| { |
| comment.Printf("0x%*.*llx ", addr_nibble_size, addr_nibble_size, operand_value); |
| AddSymbolicInfo (exe_ctx, comment, operand_value, GetAddress()); |
| } |
| } |
| } |
| } |
| } |
| if (m_mnemocics.empty() && EDTokenIsWhitespace (token) == 1) |
| continue; |
| if (EDGetTokenString (&token_cstr, token)) |
| break; |
| m_mnemocics.append (token_cstr); |
| } |
| } |
| // FIXME!!! |
| // Workaround for llvm::tB's operands not properly parsed by ARMAsmParser. |
| if (m_arch_type == llvm::Triple::thumb && m_opcode_name.compare("b") == 0) |
| { |
| const char *inst_str; |
| const char *pos = NULL; |
| comment.Clear(); |
| if (EDGetInstString(&inst_str, m_inst) == 0 && (pos = strstr(inst_str, "#")) != NULL) |
| { |
| uint64_t operand_value = PC + atoi(++pos); |
| // Put the address value into the operands. |
| comment.Printf("0x%*.*llx ", addr_nibble_size, addr_nibble_size, operand_value); |
| AddSymbolicInfo (exe_ctx, comment, operand_value, GetAddress()); |
| } |
| } |
| // Yet more workaround for "bl #..." and "blx #...". |
| if ((m_arch_type == llvm::Triple::arm || m_arch_type == llvm::Triple::thumb) && |
| (m_opcode_name.compare("bl") == 0 || m_opcode_name.compare("blx") == 0)) |
| { |
| const char *inst_str; |
| const char *pos = NULL; |
| comment.Clear(); |
| if (EDGetInstString(&inst_str, m_inst) == 0 && (pos = strstr(inst_str, "#")) != NULL) |
| { |
| if (m_arch_type == llvm::Triple::thumb && m_opcode_name.compare("blx") == 0) |
| { |
| // A8.6.23 BLX (immediate) |
| // Target Address = Align(PC,4) + offset value |
| PC = AlignPC(PC); |
| } |
| uint64_t operand_value = PC + atoi(++pos); |
| // Put the address value into the comment. |
| comment.Printf("0x%*.*llx ", addr_nibble_size, addr_nibble_size, operand_value); |
| // And the original token string into the operands. |
| // llvm::StringRef Str(pos - 1); |
| // RStrip(Str, '\n'); |
| // operands.PutCString(Str.str().c_str()); |
| AddSymbolicInfo (exe_ctx, comment, operand_value, GetAddress()); |
| } |
| } |
| // END of workaround. |
| |
| m_comment.swap (comment.GetString()); |
| } |
| } |
| |
| bool |
| InstructionLLVM::DoesBranch() const |
| { |
| return EDInstIsBranch(m_inst); |
| } |
| |
| size_t |
| InstructionLLVM::Decode (const Disassembler &disassembler, |
| const lldb_private::DataExtractor &data, |
| uint32_t data_offset) |
| { |
| if (EDCreateInsts(&m_inst, 1, m_disassembler, DataExtractorByteReader, data_offset, (void*)(&data))) |
| { |
| const int byte_size = EDInstByteSize(m_inst); |
| uint32_t offset = data_offset; |
| // Make a copy of the opcode in m_opcode |
| switch (disassembler.GetArchitecture().GetMachine()) |
| { |
| case llvm::Triple::x86: |
| case llvm::Triple::x86_64: |
| m_opcode.SetOpcodeBytes (data.PeekData (data_offset, byte_size), byte_size); |
| break; |
| |
| case llvm::Triple::arm: |
| case llvm::Triple::thumb: |
| switch (byte_size) |
| { |
| case 2: |
| m_opcode.SetOpcode16 (data.GetU16 (&offset)); |
| break; |
| |
| case 4: |
| { |
| if (GetAddressClass() == eAddressClassCodeAlternateISA) |
| { |
| // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves. |
| uint32_t orig_bytes = data.GetU32 (&offset); |
| uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1); |
| uint16_t lower_bits = orig_bytes & ((1u << 16) - 1); |
| uint32_t swapped = (lower_bits << 16) | upper_bits; |
| m_opcode.SetOpcode32 (swapped); |
| } |
| else |
| m_opcode.SetOpcode32 (data.GetU32 (&offset)); |
| } |
| break; |
| |
| default: |
| assert (!"Invalid ARM opcode size"); |
| break; |
| } |
| break; |
| |
| default: |
| assert (!"This shouldn't happen since we control the architecture we allow DisassemblerLLVM to be created for"); |
| break; |
| } |
| return byte_size; |
| } |
| else |
| return 0; |
| } |
| |
| static inline EDAssemblySyntax_t |
| SyntaxForArchSpec (const ArchSpec &arch) |
| { |
| switch (arch.GetMachine ()) |
| { |
| case llvm::Triple::x86: |
| case llvm::Triple::x86_64: |
| return kEDAssemblySyntaxX86ATT; |
| case llvm::Triple::arm: |
| case llvm::Triple::thumb: |
| return kEDAssemblySyntaxARMUAL; |
| default: |
| break; |
| } |
| return (EDAssemblySyntax_t)0; // default |
| } |
| |
| Disassembler * |
| DisassemblerLLVM::CreateInstance(const ArchSpec &arch) |
| { |
| std::auto_ptr<DisassemblerLLVM> disasm_ap (new DisassemblerLLVM(arch)); |
| |
| if (disasm_ap.get() && disasm_ap->IsValid()) |
| return disasm_ap.release(); |
| |
| return NULL; |
| } |
| |
| DisassemblerLLVM::DisassemblerLLVM(const ArchSpec &arch) : |
| Disassembler (arch), |
| m_disassembler (NULL), |
| m_disassembler_thumb (NULL) // For ARM only |
| { |
| // Initialize the LLVM objects needed to use the disassembler. |
| static struct InitializeLLVM { |
| InitializeLLVM() { |
| llvm::InitializeAllTargetInfos(); |
| llvm::InitializeAllTargetMCs(); |
| llvm::InitializeAllAsmParsers(); |
| llvm::InitializeAllDisassemblers(); |
| } |
| } InitializeLLVM; |
| |
| const std::string &arch_triple = arch.GetTriple().str(); |
| if (!arch_triple.empty()) |
| { |
| if (EDGetDisassembler(&m_disassembler, arch_triple.c_str(), SyntaxForArchSpec (arch))) |
| m_disassembler = NULL; |
| llvm::Triple::ArchType llvm_arch = arch.GetTriple().getArch(); |
| // Don't have the lldb::Triple::thumb architecture here. If someone specifies |
| // "thumb" as the architecture, we want a thumb only disassembler. But if any |
| // architecture starting with "arm" if specified, we want to auto detect the |
| // arm/thumb code automatically using the AddressClass from section offset |
| // addresses. |
| if (llvm_arch == llvm::Triple::arm) |
| { |
| ArchSpec thumb_arch(arch); |
| thumb_arch.GetTriple().setArchName(llvm::StringRef("thumbv7")); |
| std::string thumb_triple(thumb_arch.GetTriple().getTriple()); |
| if (EDGetDisassembler(&m_disassembler_thumb, thumb_triple.c_str(), kEDAssemblySyntaxARMUAL)) |
| m_disassembler_thumb = NULL; |
| } |
| } |
| } |
| |
| DisassemblerLLVM::~DisassemblerLLVM() |
| { |
| } |
| |
| size_t |
| DisassemblerLLVM::DecodeInstructions |
| ( |
| const Address &base_addr, |
| const DataExtractor& data, |
| uint32_t data_offset, |
| uint32_t num_instructions, |
| bool append |
| ) |
| { |
| if (m_disassembler == NULL) |
| return 0; |
| |
| size_t total_inst_byte_size = 0; |
| |
| if (!append) |
| m_instruction_list.Clear(); |
| |
| while (data.ValidOffset(data_offset) && num_instructions) |
| { |
| Address inst_addr (base_addr); |
| inst_addr.Slide(data_offset); |
| |
| bool use_thumb = false; |
| // If we have a thumb disassembler, then we have an ARM architecture |
| // so we need to check what the instruction address class is to make |
| // sure we shouldn't be disassembling as thumb... |
| AddressClass inst_address_class = eAddressClassInvalid; |
| if (m_disassembler_thumb) |
| { |
| inst_address_class = inst_addr.GetAddressClass (); |
| if (inst_address_class == eAddressClassCodeAlternateISA) |
| use_thumb = true; |
| } |
| |
| InstructionSP inst_sp (new InstructionLLVM (inst_addr, |
| inst_address_class, |
| use_thumb ? m_disassembler_thumb : m_disassembler, |
| use_thumb ? llvm::Triple::thumb : m_arch.GetMachine())); |
| |
| size_t inst_byte_size = inst_sp->Decode (*this, data, data_offset); |
| |
| if (inst_byte_size == 0) |
| break; |
| |
| m_instruction_list.Append (inst_sp); |
| |
| total_inst_byte_size += inst_byte_size; |
| data_offset += inst_byte_size; |
| num_instructions--; |
| } |
| |
| return total_inst_byte_size; |
| } |
| |
| void |
| DisassemblerLLVM::Initialize() |
| { |
| PluginManager::RegisterPlugin (GetPluginNameStatic(), |
| GetPluginDescriptionStatic(), |
| CreateInstance); |
| } |
| |
| void |
| DisassemblerLLVM::Terminate() |
| { |
| PluginManager::UnregisterPlugin (CreateInstance); |
| } |
| |
| |
| const char * |
| DisassemblerLLVM::GetPluginNameStatic() |
| { |
| return "llvm-edis"; |
| } |
| |
| const char * |
| DisassemblerLLVM::GetPluginDescriptionStatic() |
| { |
| return "Disassembler that uses the LLVM enhanced disassembler to disassemble i386, x86_64 and ARM."; |
| } |
| |
| //------------------------------------------------------------------ |
| // PluginInterface protocol |
| //------------------------------------------------------------------ |
| const char * |
| DisassemblerLLVM::GetPluginName() |
| { |
| return "DisassemblerLLVM"; |
| } |
| |
| const char * |
| DisassemblerLLVM::GetShortPluginName() |
| { |
| return GetPluginNameStatic(); |
| } |
| |
| uint32_t |
| DisassemblerLLVM::GetPluginVersion() |
| { |
| return 1; |
| } |
| |