blob: a41ff6e77f37a997b56a9d59de6e68a87eedd12c [file] [log] [blame]
Sean Callanan32a56ec2012-02-17 00:53:45 +00001//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerLLVMC.h"
11
12#include "llvm-c/Disassembler.h"
13#include "llvm/Support/TargetSelect.h"
14
15#include "lldb/Core/Address.h"
16#include "lldb/Core/DataExtractor.h"
17#include "lldb/Core/Stream.h"
18#include "lldb/Symbol/SymbolContext.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Target/Process.h"
21#include "lldb/Target/RegisterContext.h"
22#include "lldb/Target/Target.h"
23#include "lldb/Target/StackFrame.h"
24
25#include <regex.h>
26
27using namespace lldb;
28using namespace lldb_private;
29
30class InstructionLLVMC : public lldb_private::Instruction
31{
32public:
33 InstructionLLVMC (DisassemblerLLVMC &disasm,
34 const lldb_private::Address &address,
Greg Clayton7fb14302012-04-13 00:07:34 +000035 AddressClass addr_class) :
Sean Callanan32a56ec2012-02-17 00:53:45 +000036 Instruction(address, addr_class),
Sean Callanan32a56ec2012-02-17 00:53:45 +000037 m_is_valid(false),
Bill Wendling432fe102012-04-06 00:09:59 +000038 m_disasm(disasm),
Sean Callanan32a56ec2012-02-17 00:53:45 +000039 m_no_comments(true),
Sean Callanan5f1b66c2012-03-02 23:22:53 +000040 m_comment_stream(),
41 m_does_branch(eLazyBoolCalculate)
Sean Callanan32a56ec2012-02-17 00:53:45 +000042 {
43 }
44
45 virtual
46 ~InstructionLLVMC ()
47 {
48 }
49
50 static void
51 PadToWidth (lldb_private::StreamString &ss,
52 int new_width)
53 {
54 int old_width = ss.GetSize();
55
56 if (old_width < new_width)
57 {
58 ss.Printf("%*s", new_width - old_width, "");
59 }
60 }
61
62 virtual void
63 Dump (lldb_private::Stream *s,
64 uint32_t max_opcode_byte_size,
65 bool show_address,
66 bool show_bytes,
67 const lldb_private::ExecutionContext* exe_ctx,
68 bool raw)
69 {
70 const size_t opcode_column_width = 7;
71 const size_t operand_column_width = 25;
72
73 StreamString ss;
74
75 ExecutionContextScope *exe_scope = NULL;
76
77 if ((!raw) && exe_ctx)
78 {
79 exe_scope = exe_ctx->GetBestExecutionContextScope();
80
81 DataExtractor extractor(m_raw_bytes.data(),
82 m_raw_bytes.size(),
83 m_disasm.GetArchitecture().GetByteOrder(),
84 m_disasm.GetArchitecture().GetAddressByteSize());
85
86 Parse <true> (m_address,
87 m_address_class,
88 extractor,
89 0,
90 exe_scope);
91 }
92
93 if (show_address)
94 {
95 m_address.Dump(&ss,
96 exe_scope,
97 Address::DumpStyleLoadAddress,
98 Address::DumpStyleModuleWithFileAddress,
99 0);
100
101 ss.PutCString(": ");
102 }
103
104 if (show_bytes)
105 {
106 if (m_opcode.GetType() == Opcode::eTypeBytes)
107 {
108 // x86_64 and i386 are the only ones that use bytes right now so
109 // pad out the byte dump to be able to always show 15 bytes (3 chars each)
110 // plus a space
111 if (max_opcode_byte_size > 0)
112 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
113 else
114 m_opcode.Dump (&ss, 15 * 3 + 1);
115 }
116 else
117 {
118 // Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
119 // plus two for padding...
120 if (max_opcode_byte_size > 0)
121 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
122 else
123 m_opcode.Dump (&ss, 12);
124 }
125 }
126
127 int size_before_inst = ss.GetSize();
128
129 ss.PutCString(m_opcode_name.c_str());
130
131 PadToWidth(ss, size_before_inst + opcode_column_width);
132
133 ss.PutCString(m_mnemocics.c_str());
134
135 PadToWidth(ss, size_before_inst + opcode_column_width + operand_column_width);
136
137 if (!m_comment.empty())
138 {
139 ss.PutCString(" ; ");
140 ss.PutCString(m_comment.c_str());
141 }
142
143 ss.Flush();
144
145 s->PutCString(ss.GetData());
146 }
147
148 virtual bool
149 DoesBranch () const
150 {
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000151 return m_does_branch == eLazyBoolYes;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000152 }
153
154 virtual size_t
155 Decode (const lldb_private::Disassembler &disassembler,
156 const lldb_private::DataExtractor &data,
157 uint32_t data_offset)
158 {
159 Parse <false> (m_address,
160 m_address_class,
161 data,
162 data_offset,
163 NULL);
164
165 return m_opcode.GetByteSize();
166 }
167
168 void
169 AddReferencedAddress (std::string &description)
170 {
171 if (m_no_comments)
172 m_comment_stream.PutCString(", ");
173 else
174 m_no_comments = true;
175
176 m_comment_stream.PutCString(description.c_str());
177 }
178
179 virtual void
180 CalculateMnemonicOperandsAndComment (lldb_private::ExecutionContextScope *exe_scope)
181 {
182 DataExtractor extractor(m_raw_bytes.data(),
183 m_raw_bytes.size(),
184 m_disasm.GetArchitecture().GetByteOrder(),
185 m_disasm.GetArchitecture().GetAddressByteSize());
186
187 Parse <true> (m_address,
188 m_address_class,
189 extractor,
190 0,
191 exe_scope);
192 }
193
194 bool
195 IsValid ()
196 {
197 return m_is_valid;
198 }
199
200 size_t
201 GetByteSize ()
202 {
203 return m_opcode.GetByteSize();
204 }
205protected:
206 void PopulateOpcode (const DataExtractor &extractor,
207 uint32_t offset,
208 size_t inst_size)
209 {
Greg Clayton50561692012-04-11 21:13:31 +0000210 const ArchSpec &arch = m_disasm.GetArchitecture();
211 llvm::Triple::ArchType machine = arch.GetMachine();
Sean Callanan32a56ec2012-02-17 00:53:45 +0000212
Greg Clayton50561692012-04-11 21:13:31 +0000213 switch (machine)
Sean Callanan32a56ec2012-02-17 00:53:45 +0000214 {
Sean Callanan32a56ec2012-02-17 00:53:45 +0000215 case llvm::Triple::x86:
216 case llvm::Triple::x86_64:
217 m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
Greg Clayton50561692012-04-11 21:13:31 +0000218 return;
219
Sean Callanan32a56ec2012-02-17 00:53:45 +0000220 case llvm::Triple::arm:
221 case llvm::Triple::thumb:
222 switch (inst_size)
223 {
224 case 2:
Greg Clayton50561692012-04-11 21:13:31 +0000225 m_opcode.SetOpcode16 (extractor.GetU16 (&offset));
Sean Callanan32a56ec2012-02-17 00:53:45 +0000226 break;
227 case 4:
Greg Clayton50561692012-04-11 21:13:31 +0000228 if (machine == llvm::Triple::arm && m_address_class == eAddressClassCodeAlternateISA)
Sean Callanan32a56ec2012-02-17 00:53:45 +0000229 {
Greg Clayton50561692012-04-11 21:13:31 +0000230 // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
231 uint32_t orig_bytes = extractor.GetU32 (&offset);
232 uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
233 uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
234 uint32_t swapped = (lower_bits << 16) | upper_bits;
235 m_opcode.SetOpcode32 (swapped);
236 }
237 else
238 {
239 m_opcode.SetOpcode32 (extractor.GetU32 (&offset));
Sean Callanan32a56ec2012-02-17 00:53:45 +0000240 }
241 break;
242 default:
243 assert (!"Invalid ARM opcode size");
244 break;
245 }
Greg Clayton50561692012-04-11 21:13:31 +0000246 return;
247
248 default:
Sean Callanan32a56ec2012-02-17 00:53:45 +0000249 break;
250 }
Greg Clayton50561692012-04-11 21:13:31 +0000251 // Handle the default cases here.
252 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
253 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
254 if (min_op_byte_size == max_op_byte_size)
255 {
256 assert (inst_size == min_op_byte_size);
257 switch (inst_size)
258 {
259 case 1: m_opcode.SetOpcode8 (extractor.GetU8 (&offset)); return;
260 case 2: m_opcode.SetOpcode16 (extractor.GetU16 (&offset)); return;
261 case 4: m_opcode.SetOpcode32 (extractor.GetU32 (&offset)); return;
262 case 8: m_opcode.SetOpcode64 (extractor.GetU64 (&offset)); return;
263 default:
264 break;
265 }
266 }
267 m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
Sean Callanan32a56ec2012-02-17 00:53:45 +0000268 }
269
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000270 bool StringRepresentsBranch (const char *data, size_t size)
271 {
272 const char *cursor = data;
273
274 bool inWhitespace = true;
275
276 while (inWhitespace && cursor < data + size)
277 {
278 switch (*cursor)
279 {
280 default:
281 inWhitespace = false;
282 break;
283 case ' ':
284 break;
285 case '\t':
286 break;
287 }
288
289 if (inWhitespace)
290 ++cursor;
291 }
292
293 if (cursor >= data + size)
294 return false;
295
296 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
297
298 switch (arch)
299 {
300 default:
301 return false;
302 case llvm::Triple::x86:
303 case llvm::Triple::x86_64:
304 switch (cursor[0])
305 {
306 default:
307 return false;
308 case 'j':
309 return true;
310 case 'c':
311 if (cursor[1] == 'a' &&
312 cursor[2] == 'l' &&
313 cursor[3] == 'l')
314 return true;
315 else
316 return false;
317 }
318 case llvm::Triple::arm:
319 case llvm::Triple::thumb:
320 switch (cursor[0])
321 {
322 default:
323 return false;
324 case 'b':
325 {
326 switch (cursor[1])
327 {
328 default:
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000329 return true;
Sean Callanan561bf582012-04-10 21:51:12 +0000330 case 'f':
331 case 'i':
332 case 'k':
333 return false;
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000334 }
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000335 }
336 case 'c':
337 {
338 switch (cursor[1])
339 {
340 default:
341 return false;
342 case 'b':
343 return true;
344 }
345 }
346 }
347 }
348
349 return false;
350 }
351
Sean Callanan32a56ec2012-02-17 00:53:45 +0000352 template <bool Reparse> bool Parse (const lldb_private::Address &address,
Greg Clayton7fb14302012-04-13 00:07:34 +0000353 AddressClass addr_class,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000354 const DataExtractor &extractor,
355 uint32_t data_offset,
356 lldb_private::ExecutionContextScope *exe_scope)
357 {
358 std::vector<char> out_string(256);
359
360 const uint8_t *data_start = extractor.GetDataStart();
361
362 m_disasm.Lock(this, exe_scope);
363
364 ::LLVMDisasmContextRef disasm_context;
365
366 if (addr_class == eAddressClassCodeAlternateISA)
367 disasm_context = m_disasm.m_alternate_disasm_context;
368 else
369 disasm_context = m_disasm.m_disasm_context;
370
371 m_comment_stream.Clear();
Sean Callanan501e77a2012-03-22 01:10:50 +0000372
373 lldb::addr_t pc = LLDB_INVALID_ADDRESS;
374
375 if (exe_scope)
376 if (TargetSP target_sp = exe_scope->CalculateTarget())
377 pc = m_address.GetLoadAddress(target_sp.get());
378
379 if (pc == LLDB_INVALID_ADDRESS)
380 pc = m_address.GetFileAddress();
381
382 size_t inst_size = ::LLVMDisasmInstruction(disasm_context,
383 const_cast<uint8_t*>(data_start) + data_offset,
384 extractor.GetByteSize() - data_offset,
385 pc,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000386 out_string.data(),
387 out_string.size());
388
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000389 if (m_does_branch == eLazyBoolCalculate)
390 m_does_branch = (StringRepresentsBranch (out_string.data(), out_string.size()) ?
391 eLazyBoolYes : eLazyBoolNo);
392
Sean Callanan32a56ec2012-02-17 00:53:45 +0000393 m_comment_stream.Flush();
394 m_no_comments = false;
395
396 m_comment.swap(m_comment_stream.GetString());
397
398 m_disasm.Unlock();
399
400 if (Reparse)
401 {
402 if (inst_size != m_raw_bytes.size())
403 return false;
404 }
405 else
406 {
407 if (!inst_size)
408 return false;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000409 }
Sean Callanan501e77a2012-03-22 01:10:50 +0000410
411 PopulateOpcode(extractor, data_offset, inst_size);
412
413 m_raw_bytes.resize(inst_size);
414 memcpy(m_raw_bytes.data(), data_start + data_offset, inst_size);
415
416 if (!s_regex_compiled)
417 {
418 ::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
419 s_regex_compiled = true;
420 }
421
422 ::regmatch_t matches[3];
423
424 const char *out_data = out_string.data();
425
426 if (!::regexec(&s_regex, out_data, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
427 {
428 if (matches[1].rm_so != -1)
429 m_opcode_name.assign(out_data + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
430 if (matches[2].rm_so != -1)
431 m_mnemocics.assign(out_data + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
432 }
433
434 m_is_valid = true;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000435
436 return true;
437 }
438
439 bool m_is_valid;
440 DisassemblerLLVMC &m_disasm;
441 std::vector<uint8_t> m_raw_bytes;
442
443 bool m_no_comments;
444 StreamString m_comment_stream;
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000445 LazyBool m_does_branch;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000446
447 static bool s_regex_compiled;
448 static ::regex_t s_regex;
449};
450
451bool InstructionLLVMC::s_regex_compiled = false;
452::regex_t InstructionLLVMC::s_regex;
453
454Disassembler *
455DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
456{
457 std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
458
459 if (disasm_ap.get() && disasm_ap->IsValid())
460 return disasm_ap.release();
461
462 return NULL;
463}
464
465DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
466 Disassembler(arch),
467 m_disasm_context(NULL),
468 m_alternate_disasm_context(NULL)
469{
470 m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
471 (void*)this,
472 /*TagType=*/1,
Sean Callananaa12be42012-02-23 23:43:28 +0000473 NULL,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000474 DisassemblerLLVMC::SymbolLookupCallback);
475
476 if (arch.GetTriple().getArch() == llvm::Triple::arm)
477 {
478 m_alternate_disasm_context = ::LLVMCreateDisasm("thumbv7-apple-darwin",
479 (void*)this,
480 /*TagType=*/1,
Sean Callananaa12be42012-02-23 23:43:28 +0000481 NULL,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000482 DisassemblerLLVMC::SymbolLookupCallback);
483 }
484}
485
486DisassemblerLLVMC::~DisassemblerLLVMC()
487{
Sean Callananeec2f082012-04-06 17:59:49 +0000488 if (m_disasm_context)
489 {
490 ::LLVMDisasmDispose(m_disasm_context);
491 m_disasm_context = NULL;
492 }
493 if (m_alternate_disasm_context)
494 {
495 ::LLVMDisasmDispose(m_alternate_disasm_context);
496 m_alternate_disasm_context = NULL;
497 }
Sean Callanan32a56ec2012-02-17 00:53:45 +0000498}
499
500size_t
501DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
502 const DataExtractor& data,
503 uint32_t data_offset,
504 uint32_t num_instructions,
505 bool append)
506{
507 if (!append)
508 m_instruction_list.Clear();
509
510 if (!IsValid())
511 return 0;
512
513 uint32_t data_cursor = data_offset;
514 size_t data_byte_size = data.GetByteSize();
515 uint32_t instructions_parsed = 0;
516
517 uint64_t instruction_pointer = base_addr.GetFileAddress();
518
519 std::vector<char> out_string(256);
520
521 while (data_offset < data_byte_size && instructions_parsed < num_instructions)
522 {
523 Address instr_address = base_addr;
524 instr_address.Slide(data_cursor);
525
526 AddressClass address_class = eAddressClassUnknown;
527
528 if (m_alternate_disasm_context)
529 address_class = instr_address.GetAddressClass ();
530
531 InstructionSP inst_sp(new InstructionLLVMC(*this,
532 instr_address,
533 address_class));
534
535 if (!inst_sp)
536 return data_cursor - data_offset;
537
538 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
539
540 if (!inst_size)
541 return data_cursor - data_offset;
542
543 m_instruction_list.Append(inst_sp);
544
545 instruction_pointer += inst_size;
546 data_cursor += inst_size;
547 instructions_parsed++;
548 }
549
550 return data_cursor - data_offset;
551}
552
553void
554DisassemblerLLVMC::Initialize()
555{
556 PluginManager::RegisterPlugin (GetPluginNameStatic(),
557 GetPluginDescriptionStatic(),
558 CreateInstance);
559
560 llvm::InitializeAllTargetInfos();
561 llvm::InitializeAllTargetMCs();
562 llvm::InitializeAllAsmParsers();
563 llvm::InitializeAllDisassemblers();
564}
565
566void
567DisassemblerLLVMC::Terminate()
568{
569 PluginManager::UnregisterPlugin (CreateInstance);
570}
571
572
573const char *
574DisassemblerLLVMC::GetPluginNameStatic()
575{
Greg Clayton34814d62012-03-22 00:49:15 +0000576 return "llvm-mc";
Sean Callanan32a56ec2012-02-17 00:53:45 +0000577}
578
579const char *
580DisassemblerLLVMC::GetPluginDescriptionStatic()
581{
Greg Clayton34814d62012-03-22 00:49:15 +0000582 return "Disassembler that uses LLVM MC to disassemble i386, x86_64 and ARM.";
Sean Callanan32a56ec2012-02-17 00:53:45 +0000583}
584
585int DisassemblerLLVMC::OpInfoCallback (void *DisInfo,
586 uint64_t PC,
587 uint64_t Offset,
588 uint64_t Size,
589 int TagType,
590 void *TagBug)
591{
592 return static_cast<DisassemblerLLVMC*>(DisInfo)->OpInfo(PC,
593 Offset,
594 Size,
595 TagType,
596 TagBug);
597}
598
599const char *DisassemblerLLVMC::SymbolLookupCallback(void *DisInfo,
600 uint64_t ReferenceValue,
601 uint64_t *ReferenceType,
602 uint64_t ReferencePC,
603 const char **ReferenceName)
604{
605 return static_cast<DisassemblerLLVMC*>(DisInfo)->SymbolLookup(ReferenceValue,
606 ReferenceType,
607 ReferencePC,
608 ReferenceName);
609}
610
611int DisassemblerLLVMC::OpInfo (uint64_t PC,
612 uint64_t Offset,
613 uint64_t Size,
614 int TagType,
615 void *TagBug)
616{
617 switch (TagType)
618 {
619 default:
620 break;
621 case 1:
622 bzero (TagBug, sizeof(::LLVMOpInfo1));
623 break;
624 }
625 return 0;
626}
627
628const char *DisassemblerLLVMC::SymbolLookup (uint64_t ReferenceValue,
629 uint64_t *ReferenceType,
630 uint64_t ReferencePC,
631 const char **ReferenceName)
632{
633 const char *result_name = NULL;
634 uint64_t result_reference_type = LLVMDisassembler_ReferenceType_InOut_None;
635 const char *result_referred_name = NULL;
636
637 if (m_exe_scope && m_inst)
638 {
639 Address reference_address;
640
Johnny Chendd5e3632012-02-20 22:05:34 +0000641 TargetSP target_sp (m_exe_scope->CalculateTarget());
642 Target *target = target_sp.get();
Sean Callanan32a56ec2012-02-17 00:53:45 +0000643
644 if (target)
645 {
Sean Callanan685099b2012-03-22 20:04:23 +0000646 if (!target->GetSectionLoadList().ResolveLoadAddress(ReferenceValue, reference_address))
647 {
648 if (ModuleSP module_sp = m_inst->GetAddress().GetModule())
649 module_sp->ResolveFileAddress(ReferenceValue, reference_address);
650 }
Sean Callanan32a56ec2012-02-17 00:53:45 +0000651
Sean Callananaa12be42012-02-23 23:43:28 +0000652 if (reference_address.IsValid() && reference_address.GetSection())
Sean Callanan32a56ec2012-02-17 00:53:45 +0000653 {
Sean Callanan32a56ec2012-02-17 00:53:45 +0000654 StreamString ss;
655
Sean Callananaa12be42012-02-23 23:43:28 +0000656 reference_address.Dump (&ss,
657 target,
658 Address::DumpStyleResolvedDescriptionNoModule,
659 Address::DumpStyleSectionNameOffset);
Sean Callanan32a56ec2012-02-17 00:53:45 +0000660
Sean Callanan685099b2012-03-22 20:04:23 +0000661 if (!ss.GetString().empty())
662 m_inst->AddReferencedAddress(ss.GetString());
Sean Callanan32a56ec2012-02-17 00:53:45 +0000663 }
664 }
665 }
666
667 *ReferenceType = result_reference_type;
668 *ReferenceName = result_referred_name;
669
670 return result_name;
671}
672
673//------------------------------------------------------------------
674// PluginInterface protocol
675//------------------------------------------------------------------
676const char *
677DisassemblerLLVMC::GetPluginName()
678{
679 return "DisassemblerLLVMC";
680}
681
682const char *
683DisassemblerLLVMC::GetShortPluginName()
684{
685 return GetPluginNameStatic();
686}
687
688uint32_t
689DisassemblerLLVMC::GetPluginVersion()
690{
691 return 1;
692}
693