blob: 70e706be35aef5e30ae44d3e04bc00da2f593f6c [file] [log] [blame]
Sean Callanan32a56ec2012-02-17 00:53:45 +00001//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerLLVMC.h"
11
12#include "llvm-c/Disassembler.h"
13#include "llvm/Support/TargetSelect.h"
14
15#include "lldb/Core/Address.h"
16#include "lldb/Core/DataExtractor.h"
17#include "lldb/Core/Stream.h"
18#include "lldb/Symbol/SymbolContext.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Target/Process.h"
21#include "lldb/Target/RegisterContext.h"
22#include "lldb/Target/Target.h"
23#include "lldb/Target/StackFrame.h"
24
25#include <regex.h>
26
27using namespace lldb;
28using namespace lldb_private;
29
30class InstructionLLVMC : public lldb_private::Instruction
31{
32public:
33 InstructionLLVMC (DisassemblerLLVMC &disasm,
34 const lldb_private::Address &address,
35 lldb_private::AddressClass addr_class) :
36 Instruction(address, addr_class),
37 m_disasm(disasm),
38 m_is_valid(false),
39 m_no_comments(true),
40 m_comment_stream()
41 {
42 }
43
44 virtual
45 ~InstructionLLVMC ()
46 {
47 }
48
49 static void
50 PadToWidth (lldb_private::StreamString &ss,
51 int new_width)
52 {
53 int old_width = ss.GetSize();
54
55 if (old_width < new_width)
56 {
57 ss.Printf("%*s", new_width - old_width, "");
58 }
59 }
60
61 virtual void
62 Dump (lldb_private::Stream *s,
63 uint32_t max_opcode_byte_size,
64 bool show_address,
65 bool show_bytes,
66 const lldb_private::ExecutionContext* exe_ctx,
67 bool raw)
68 {
69 const size_t opcode_column_width = 7;
70 const size_t operand_column_width = 25;
71
72 StreamString ss;
73
74 ExecutionContextScope *exe_scope = NULL;
75
76 if ((!raw) && exe_ctx)
77 {
78 exe_scope = exe_ctx->GetBestExecutionContextScope();
79
80 DataExtractor extractor(m_raw_bytes.data(),
81 m_raw_bytes.size(),
82 m_disasm.GetArchitecture().GetByteOrder(),
83 m_disasm.GetArchitecture().GetAddressByteSize());
84
85 Parse <true> (m_address,
86 m_address_class,
87 extractor,
88 0,
89 exe_scope);
90 }
91
92 if (show_address)
93 {
94 m_address.Dump(&ss,
95 exe_scope,
96 Address::DumpStyleLoadAddress,
97 Address::DumpStyleModuleWithFileAddress,
98 0);
99
100 ss.PutCString(": ");
101 }
102
103 if (show_bytes)
104 {
105 if (m_opcode.GetType() == Opcode::eTypeBytes)
106 {
107 // x86_64 and i386 are the only ones that use bytes right now so
108 // pad out the byte dump to be able to always show 15 bytes (3 chars each)
109 // plus a space
110 if (max_opcode_byte_size > 0)
111 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
112 else
113 m_opcode.Dump (&ss, 15 * 3 + 1);
114 }
115 else
116 {
117 // Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
118 // plus two for padding...
119 if (max_opcode_byte_size > 0)
120 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
121 else
122 m_opcode.Dump (&ss, 12);
123 }
124 }
125
126 int size_before_inst = ss.GetSize();
127
128 ss.PutCString(m_opcode_name.c_str());
129
130 PadToWidth(ss, size_before_inst + opcode_column_width);
131
132 ss.PutCString(m_mnemocics.c_str());
133
134 PadToWidth(ss, size_before_inst + opcode_column_width + operand_column_width);
135
136 if (!m_comment.empty())
137 {
138 ss.PutCString(" ; ");
139 ss.PutCString(m_comment.c_str());
140 }
141
142 ss.Flush();
143
144 s->PutCString(ss.GetData());
145 }
146
147 virtual bool
148 DoesBranch () const
149 {
150 return false;
151 }
152
153 virtual size_t
154 Decode (const lldb_private::Disassembler &disassembler,
155 const lldb_private::DataExtractor &data,
156 uint32_t data_offset)
157 {
158 Parse <false> (m_address,
159 m_address_class,
160 data,
161 data_offset,
162 NULL);
163
164 return m_opcode.GetByteSize();
165 }
166
167 void
168 AddReferencedAddress (std::string &description)
169 {
170 if (m_no_comments)
171 m_comment_stream.PutCString(", ");
172 else
173 m_no_comments = true;
174
175 m_comment_stream.PutCString(description.c_str());
176 }
177
178 virtual void
179 CalculateMnemonicOperandsAndComment (lldb_private::ExecutionContextScope *exe_scope)
180 {
181 DataExtractor extractor(m_raw_bytes.data(),
182 m_raw_bytes.size(),
183 m_disasm.GetArchitecture().GetByteOrder(),
184 m_disasm.GetArchitecture().GetAddressByteSize());
185
186 Parse <true> (m_address,
187 m_address_class,
188 extractor,
189 0,
190 exe_scope);
191 }
192
193 bool
194 IsValid ()
195 {
196 return m_is_valid;
197 }
198
199 size_t
200 GetByteSize ()
201 {
202 return m_opcode.GetByteSize();
203 }
204protected:
205 void PopulateOpcode (const DataExtractor &extractor,
206 uint32_t offset,
207 size_t inst_size)
208 {
209 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
210
211 switch (arch)
212 {
213 default:
214 case llvm::Triple::x86:
215 case llvm::Triple::x86_64:
216 m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
217 break;
218 case llvm::Triple::arm:
219 case llvm::Triple::thumb:
220 switch (inst_size)
221 {
222 case 2:
223 {
224 m_opcode.SetOpcode16 (extractor.GetU16 (&offset));
225 break;
226 }
227 break;
228 case 4:
229 {
230 if (arch == llvm::Triple::arm &&
231 m_address_class == eAddressClassCodeAlternateISA)
232 {
233 // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
234 uint32_t orig_bytes = extractor.GetU32 (&offset);
235 uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
236 uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
237 uint32_t swapped = (lower_bits << 16) | upper_bits;
238 m_opcode.SetOpcode32 (swapped);
239 }
240 else
241 {
242 m_opcode.SetOpcode32 (extractor.GetU32 (&offset));
243 }
244 }
245 break;
246 default:
247 assert (!"Invalid ARM opcode size");
248 break;
249 }
250 break;
251 }
252 }
253
254 template <bool Reparse> bool Parse (const lldb_private::Address &address,
255 lldb_private::AddressClass addr_class,
256 const DataExtractor &extractor,
257 uint32_t data_offset,
258 lldb_private::ExecutionContextScope *exe_scope)
259 {
260 std::vector<char> out_string(256);
261
262 const uint8_t *data_start = extractor.GetDataStart();
263
264 m_disasm.Lock(this, exe_scope);
265
266 ::LLVMDisasmContextRef disasm_context;
267
268 if (addr_class == eAddressClassCodeAlternateISA)
269 disasm_context = m_disasm.m_alternate_disasm_context;
270 else
271 disasm_context = m_disasm.m_disasm_context;
272
273 m_comment_stream.Clear();
274
275 size_t inst_size = ::LLVMDisasmInstruction(disasm_context,
276 const_cast<uint8_t*>(data_start) + data_offset,
277 extractor.GetByteSize() - data_offset,
278 address.GetFileAddress(),
279 out_string.data(),
280 out_string.size());
281
282 m_comment_stream.Flush();
283 m_no_comments = false;
284
285 m_comment.swap(m_comment_stream.GetString());
286
287 m_disasm.Unlock();
288
289 if (Reparse)
290 {
291 if (inst_size != m_raw_bytes.size())
292 return false;
293 }
294 else
295 {
296 if (!inst_size)
297 return false;
298
299 PopulateOpcode(extractor, data_offset, inst_size);
300
301 m_raw_bytes.resize(inst_size);
302 memcpy(m_raw_bytes.data(), data_start + data_offset, inst_size);
303
304 if (!s_regex_compiled)
305 {
306 ::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
307 s_regex_compiled = true;
308 }
309
310 ::regmatch_t matches[3];
311
312 const char *out_data = out_string.data();
313
314 if (!::regexec(&s_regex, out_data, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
315 {
316 if (matches[1].rm_so != -1)
317 m_opcode_name.assign(out_data + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
318 if (matches[2].rm_so != -1)
319 m_mnemocics.assign(out_data + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
320 }
321
322 m_is_valid = true;
323 }
324
325 return true;
326 }
327
328 bool m_is_valid;
329 DisassemblerLLVMC &m_disasm;
330 std::vector<uint8_t> m_raw_bytes;
331
332 bool m_no_comments;
333 StreamString m_comment_stream;
334
335 static bool s_regex_compiled;
336 static ::regex_t s_regex;
337};
338
339bool InstructionLLVMC::s_regex_compiled = false;
340::regex_t InstructionLLVMC::s_regex;
341
342Disassembler *
343DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
344{
345 std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
346
347 if (disasm_ap.get() && disasm_ap->IsValid())
348 return disasm_ap.release();
349
350 return NULL;
351}
352
353DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
354 Disassembler(arch),
355 m_disasm_context(NULL),
356 m_alternate_disasm_context(NULL)
357{
358 m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
359 (void*)this,
360 /*TagType=*/1,
361 DisassemblerLLVMC::OpInfoCallback,
362 DisassemblerLLVMC::SymbolLookupCallback);
363
364 if (arch.GetTriple().getArch() == llvm::Triple::arm)
365 {
366 m_alternate_disasm_context = ::LLVMCreateDisasm("thumbv7-apple-darwin",
367 (void*)this,
368 /*TagType=*/1,
369 DisassemblerLLVMC::OpInfoCallback,
370 DisassemblerLLVMC::SymbolLookupCallback);
371 }
372}
373
374DisassemblerLLVMC::~DisassemblerLLVMC()
375{
376}
377
378size_t
379DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
380 const DataExtractor& data,
381 uint32_t data_offset,
382 uint32_t num_instructions,
383 bool append)
384{
385 if (!append)
386 m_instruction_list.Clear();
387
388 if (!IsValid())
389 return 0;
390
391 uint32_t data_cursor = data_offset;
392 size_t data_byte_size = data.GetByteSize();
393 uint32_t instructions_parsed = 0;
394
395 uint64_t instruction_pointer = base_addr.GetFileAddress();
396
397 std::vector<char> out_string(256);
398
399 while (data_offset < data_byte_size && instructions_parsed < num_instructions)
400 {
401 Address instr_address = base_addr;
402 instr_address.Slide(data_cursor);
403
404 AddressClass address_class = eAddressClassUnknown;
405
406 if (m_alternate_disasm_context)
407 address_class = instr_address.GetAddressClass ();
408
409 InstructionSP inst_sp(new InstructionLLVMC(*this,
410 instr_address,
411 address_class));
412
413 if (!inst_sp)
414 return data_cursor - data_offset;
415
416 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
417
418 if (!inst_size)
419 return data_cursor - data_offset;
420
421 m_instruction_list.Append(inst_sp);
422
423 instruction_pointer += inst_size;
424 data_cursor += inst_size;
425 instructions_parsed++;
426 }
427
428 return data_cursor - data_offset;
429}
430
431void
432DisassemblerLLVMC::Initialize()
433{
434 PluginManager::RegisterPlugin (GetPluginNameStatic(),
435 GetPluginDescriptionStatic(),
436 CreateInstance);
437
438 llvm::InitializeAllTargetInfos();
439 llvm::InitializeAllTargetMCs();
440 llvm::InitializeAllAsmParsers();
441 llvm::InitializeAllDisassemblers();
442}
443
444void
445DisassemblerLLVMC::Terminate()
446{
447 PluginManager::UnregisterPlugin (CreateInstance);
448}
449
450
451const char *
452DisassemblerLLVMC::GetPluginNameStatic()
453{
454 return "llvm";
455}
456
457const char *
458DisassemblerLLVMC::GetPluginDescriptionStatic()
459{
460 return "Disassembler that uses LLVM opcode tables to disassemble i386, x86_64 and ARM.";
461}
462
463int DisassemblerLLVMC::OpInfoCallback (void *DisInfo,
464 uint64_t PC,
465 uint64_t Offset,
466 uint64_t Size,
467 int TagType,
468 void *TagBug)
469{
470 return static_cast<DisassemblerLLVMC*>(DisInfo)->OpInfo(PC,
471 Offset,
472 Size,
473 TagType,
474 TagBug);
475}
476
477const char *DisassemblerLLVMC::SymbolLookupCallback(void *DisInfo,
478 uint64_t ReferenceValue,
479 uint64_t *ReferenceType,
480 uint64_t ReferencePC,
481 const char **ReferenceName)
482{
483 return static_cast<DisassemblerLLVMC*>(DisInfo)->SymbolLookup(ReferenceValue,
484 ReferenceType,
485 ReferencePC,
486 ReferenceName);
487}
488
489int DisassemblerLLVMC::OpInfo (uint64_t PC,
490 uint64_t Offset,
491 uint64_t Size,
492 int TagType,
493 void *TagBug)
494{
495 switch (TagType)
496 {
497 default:
498 break;
499 case 1:
500 bzero (TagBug, sizeof(::LLVMOpInfo1));
501 break;
502 }
503 return 0;
504}
505
506const char *DisassemblerLLVMC::SymbolLookup (uint64_t ReferenceValue,
507 uint64_t *ReferenceType,
508 uint64_t ReferencePC,
509 const char **ReferenceName)
510{
511 const char *result_name = NULL;
512 uint64_t result_reference_type = LLVMDisassembler_ReferenceType_InOut_None;
513 const char *result_referred_name = NULL;
514
515 if (m_exe_scope && m_inst)
516 {
517 Address reference_address;
518
Johnny Chendd5e3632012-02-20 22:05:34 +0000519 TargetSP target_sp (m_exe_scope->CalculateTarget());
520 Target *target = target_sp.get();
Sean Callanan32a56ec2012-02-17 00:53:45 +0000521
522 if (target)
523 {
524 if (!target->GetSectionLoadList().IsEmpty())
525 target->GetSectionLoadList().ResolveLoadAddress(ReferenceValue, reference_address);
526 else
527 target->GetImages().ResolveFileAddress(ReferenceValue, reference_address);
528
529 if (reference_address.IsValid())
530 {
531 SymbolContext reference_sc;
532
533 target->GetImages().ResolveSymbolContextForAddress(reference_address,
534 eSymbolContextFunction | eSymbolContextSymbol,
535 reference_sc);
536
537 StreamString ss;
538
539 const bool show_fullpaths = false;
540 const bool show_module = true;
541 const bool show_inlined_frames = false;
542
543 reference_sc.DumpStopContext(&ss,
544 m_exe_scope,
545 reference_address,
546 show_fullpaths,
547 show_module,
548 show_inlined_frames);
549
550 m_inst->AddReferencedAddress(ss.GetString());
551 }
552 }
553 }
554
555 *ReferenceType = result_reference_type;
556 *ReferenceName = result_referred_name;
557
558 return result_name;
559}
560
561//------------------------------------------------------------------
562// PluginInterface protocol
563//------------------------------------------------------------------
564const char *
565DisassemblerLLVMC::GetPluginName()
566{
567 return "DisassemblerLLVMC";
568}
569
570const char *
571DisassemblerLLVMC::GetShortPluginName()
572{
573 return GetPluginNameStatic();
574}
575
576uint32_t
577DisassemblerLLVMC::GetPluginVersion()
578{
579 return 1;
580}
581