blob: bf323ae7167e37a8f513c7c399e10770444bc66f [file] [log] [blame]
Sean Callanan95e5c632012-02-17 00:53:45 +00001//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerLLVMC.h"
11
12#include "llvm-c/Disassembler.h"
13#include "llvm/Support/TargetSelect.h"
14
15#include "lldb/Core/Address.h"
16#include "lldb/Core/DataExtractor.h"
17#include "lldb/Core/Stream.h"
18#include "lldb/Symbol/SymbolContext.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Target/Process.h"
21#include "lldb/Target/RegisterContext.h"
22#include "lldb/Target/Target.h"
23#include "lldb/Target/StackFrame.h"
24
25#include <regex.h>
26
27using namespace lldb;
28using namespace lldb_private;
29
30class InstructionLLVMC : public lldb_private::Instruction
31{
32public:
33 InstructionLLVMC (DisassemblerLLVMC &disasm,
34 const lldb_private::Address &address,
35 lldb_private::AddressClass addr_class) :
36 Instruction(address, addr_class),
Sean Callanan95e5c632012-02-17 00:53:45 +000037 m_is_valid(false),
Bill Wendlinge6eeef02012-04-06 00:09:59 +000038 m_disasm(disasm),
Sean Callanan95e5c632012-02-17 00:53:45 +000039 m_no_comments(true),
Sean Callanan7725a462012-03-02 23:22:53 +000040 m_comment_stream(),
41 m_does_branch(eLazyBoolCalculate)
Sean Callanan95e5c632012-02-17 00:53:45 +000042 {
43 }
44
45 virtual
46 ~InstructionLLVMC ()
47 {
48 }
49
50 static void
51 PadToWidth (lldb_private::StreamString &ss,
52 int new_width)
53 {
54 int old_width = ss.GetSize();
55
56 if (old_width < new_width)
57 {
58 ss.Printf("%*s", new_width - old_width, "");
59 }
60 }
61
62 virtual void
63 Dump (lldb_private::Stream *s,
64 uint32_t max_opcode_byte_size,
65 bool show_address,
66 bool show_bytes,
67 const lldb_private::ExecutionContext* exe_ctx,
68 bool raw)
69 {
70 const size_t opcode_column_width = 7;
71 const size_t operand_column_width = 25;
72
73 StreamString ss;
74
75 ExecutionContextScope *exe_scope = NULL;
76
77 if ((!raw) && exe_ctx)
78 {
79 exe_scope = exe_ctx->GetBestExecutionContextScope();
80
81 DataExtractor extractor(m_raw_bytes.data(),
82 m_raw_bytes.size(),
83 m_disasm.GetArchitecture().GetByteOrder(),
84 m_disasm.GetArchitecture().GetAddressByteSize());
85
86 Parse <true> (m_address,
87 m_address_class,
88 extractor,
89 0,
90 exe_scope);
91 }
92
93 if (show_address)
94 {
95 m_address.Dump(&ss,
96 exe_scope,
97 Address::DumpStyleLoadAddress,
98 Address::DumpStyleModuleWithFileAddress,
99 0);
100
101 ss.PutCString(": ");
102 }
103
104 if (show_bytes)
105 {
106 if (m_opcode.GetType() == Opcode::eTypeBytes)
107 {
108 // x86_64 and i386 are the only ones that use bytes right now so
109 // pad out the byte dump to be able to always show 15 bytes (3 chars each)
110 // plus a space
111 if (max_opcode_byte_size > 0)
112 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
113 else
114 m_opcode.Dump (&ss, 15 * 3 + 1);
115 }
116 else
117 {
118 // Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
119 // plus two for padding...
120 if (max_opcode_byte_size > 0)
121 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
122 else
123 m_opcode.Dump (&ss, 12);
124 }
125 }
126
127 int size_before_inst = ss.GetSize();
128
129 ss.PutCString(m_opcode_name.c_str());
130
131 PadToWidth(ss, size_before_inst + opcode_column_width);
132
133 ss.PutCString(m_mnemocics.c_str());
134
135 PadToWidth(ss, size_before_inst + opcode_column_width + operand_column_width);
136
137 if (!m_comment.empty())
138 {
139 ss.PutCString(" ; ");
140 ss.PutCString(m_comment.c_str());
141 }
142
143 ss.Flush();
144
145 s->PutCString(ss.GetData());
146 }
147
148 virtual bool
149 DoesBranch () const
150 {
Sean Callanan7725a462012-03-02 23:22:53 +0000151 return m_does_branch == eLazyBoolYes;
Sean Callanan95e5c632012-02-17 00:53:45 +0000152 }
153
154 virtual size_t
155 Decode (const lldb_private::Disassembler &disassembler,
156 const lldb_private::DataExtractor &data,
157 uint32_t data_offset)
158 {
159 Parse <false> (m_address,
160 m_address_class,
161 data,
162 data_offset,
163 NULL);
164
165 return m_opcode.GetByteSize();
166 }
167
168 void
169 AddReferencedAddress (std::string &description)
170 {
171 if (m_no_comments)
172 m_comment_stream.PutCString(", ");
173 else
174 m_no_comments = true;
175
176 m_comment_stream.PutCString(description.c_str());
177 }
178
179 virtual void
180 CalculateMnemonicOperandsAndComment (lldb_private::ExecutionContextScope *exe_scope)
181 {
182 DataExtractor extractor(m_raw_bytes.data(),
183 m_raw_bytes.size(),
184 m_disasm.GetArchitecture().GetByteOrder(),
185 m_disasm.GetArchitecture().GetAddressByteSize());
186
187 Parse <true> (m_address,
188 m_address_class,
189 extractor,
190 0,
191 exe_scope);
192 }
193
194 bool
195 IsValid ()
196 {
197 return m_is_valid;
198 }
199
200 size_t
201 GetByteSize ()
202 {
203 return m_opcode.GetByteSize();
204 }
205protected:
206 void PopulateOpcode (const DataExtractor &extractor,
207 uint32_t offset,
208 size_t inst_size)
209 {
210 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
211
212 switch (arch)
213 {
214 default:
215 case llvm::Triple::x86:
216 case llvm::Triple::x86_64:
217 m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
218 break;
219 case llvm::Triple::arm:
220 case llvm::Triple::thumb:
221 switch (inst_size)
222 {
223 case 2:
224 {
225 m_opcode.SetOpcode16 (extractor.GetU16 (&offset));
226 break;
227 }
228 break;
229 case 4:
230 {
231 if (arch == llvm::Triple::arm &&
232 m_address_class == eAddressClassCodeAlternateISA)
233 {
234 // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
235 uint32_t orig_bytes = extractor.GetU32 (&offset);
236 uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
237 uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
238 uint32_t swapped = (lower_bits << 16) | upper_bits;
239 m_opcode.SetOpcode32 (swapped);
240 }
241 else
242 {
243 m_opcode.SetOpcode32 (extractor.GetU32 (&offset));
244 }
245 }
246 break;
247 default:
248 assert (!"Invalid ARM opcode size");
249 break;
250 }
251 break;
252 }
253 }
254
Sean Callanan7725a462012-03-02 23:22:53 +0000255 bool StringRepresentsBranch (const char *data, size_t size)
256 {
257 const char *cursor = data;
258
259 bool inWhitespace = true;
260
261 while (inWhitespace && cursor < data + size)
262 {
263 switch (*cursor)
264 {
265 default:
266 inWhitespace = false;
267 break;
268 case ' ':
269 break;
270 case '\t':
271 break;
272 }
273
274 if (inWhitespace)
275 ++cursor;
276 }
277
278 if (cursor >= data + size)
279 return false;
280
281 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
282
283 switch (arch)
284 {
285 default:
286 return false;
287 case llvm::Triple::x86:
288 case llvm::Triple::x86_64:
289 switch (cursor[0])
290 {
291 default:
292 return false;
293 case 'j':
294 return true;
295 case 'c':
296 if (cursor[1] == 'a' &&
297 cursor[2] == 'l' &&
298 cursor[3] == 'l')
299 return true;
300 else
301 return false;
302 }
303 case llvm::Triple::arm:
304 case llvm::Triple::thumb:
305 switch (cursor[0])
306 {
307 default:
308 return false;
309 case 'b':
310 {
311 switch (cursor[1])
312 {
313 default:
Sean Callanan7725a462012-03-02 23:22:53 +0000314 return true;
Sean Callanan62ecb9b2012-04-10 21:51:12 +0000315 case 'f':
316 case 'i':
317 case 'k':
318 return false;
Sean Callanan7725a462012-03-02 23:22:53 +0000319 }
Sean Callanan7725a462012-03-02 23:22:53 +0000320 }
321 case 'c':
322 {
323 switch (cursor[1])
324 {
325 default:
326 return false;
327 case 'b':
328 return true;
329 }
330 }
331 }
332 }
333
334 return false;
335 }
336
Sean Callanan95e5c632012-02-17 00:53:45 +0000337 template <bool Reparse> bool Parse (const lldb_private::Address &address,
338 lldb_private::AddressClass addr_class,
339 const DataExtractor &extractor,
340 uint32_t data_offset,
341 lldb_private::ExecutionContextScope *exe_scope)
342 {
343 std::vector<char> out_string(256);
344
345 const uint8_t *data_start = extractor.GetDataStart();
346
347 m_disasm.Lock(this, exe_scope);
348
349 ::LLVMDisasmContextRef disasm_context;
350
351 if (addr_class == eAddressClassCodeAlternateISA)
352 disasm_context = m_disasm.m_alternate_disasm_context;
353 else
354 disasm_context = m_disasm.m_disasm_context;
355
356 m_comment_stream.Clear();
Sean Callanan3f85b322012-03-22 01:10:50 +0000357
358 lldb::addr_t pc = LLDB_INVALID_ADDRESS;
359
360 if (exe_scope)
361 if (TargetSP target_sp = exe_scope->CalculateTarget())
362 pc = m_address.GetLoadAddress(target_sp.get());
363
364 if (pc == LLDB_INVALID_ADDRESS)
365 pc = m_address.GetFileAddress();
366
367 size_t inst_size = ::LLVMDisasmInstruction(disasm_context,
368 const_cast<uint8_t*>(data_start) + data_offset,
369 extractor.GetByteSize() - data_offset,
370 pc,
Sean Callanan95e5c632012-02-17 00:53:45 +0000371 out_string.data(),
372 out_string.size());
373
Sean Callanan7725a462012-03-02 23:22:53 +0000374 if (m_does_branch == eLazyBoolCalculate)
375 m_does_branch = (StringRepresentsBranch (out_string.data(), out_string.size()) ?
376 eLazyBoolYes : eLazyBoolNo);
377
Sean Callanan95e5c632012-02-17 00:53:45 +0000378 m_comment_stream.Flush();
379 m_no_comments = false;
380
381 m_comment.swap(m_comment_stream.GetString());
382
383 m_disasm.Unlock();
384
385 if (Reparse)
386 {
387 if (inst_size != m_raw_bytes.size())
388 return false;
389 }
390 else
391 {
392 if (!inst_size)
393 return false;
Sean Callanan95e5c632012-02-17 00:53:45 +0000394 }
Sean Callanan3f85b322012-03-22 01:10:50 +0000395
396 PopulateOpcode(extractor, data_offset, inst_size);
397
398 m_raw_bytes.resize(inst_size);
399 memcpy(m_raw_bytes.data(), data_start + data_offset, inst_size);
400
401 if (!s_regex_compiled)
402 {
403 ::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
404 s_regex_compiled = true;
405 }
406
407 ::regmatch_t matches[3];
408
409 const char *out_data = out_string.data();
410
411 if (!::regexec(&s_regex, out_data, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
412 {
413 if (matches[1].rm_so != -1)
414 m_opcode_name.assign(out_data + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
415 if (matches[2].rm_so != -1)
416 m_mnemocics.assign(out_data + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
417 }
418
419 m_is_valid = true;
Sean Callanan95e5c632012-02-17 00:53:45 +0000420
421 return true;
422 }
423
424 bool m_is_valid;
425 DisassemblerLLVMC &m_disasm;
426 std::vector<uint8_t> m_raw_bytes;
427
428 bool m_no_comments;
429 StreamString m_comment_stream;
Sean Callanan7725a462012-03-02 23:22:53 +0000430 LazyBool m_does_branch;
Sean Callanan95e5c632012-02-17 00:53:45 +0000431
432 static bool s_regex_compiled;
433 static ::regex_t s_regex;
434};
435
436bool InstructionLLVMC::s_regex_compiled = false;
437::regex_t InstructionLLVMC::s_regex;
438
439Disassembler *
440DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
441{
442 std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
443
444 if (disasm_ap.get() && disasm_ap->IsValid())
445 return disasm_ap.release();
446
447 return NULL;
448}
449
450DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
451 Disassembler(arch),
452 m_disasm_context(NULL),
453 m_alternate_disasm_context(NULL)
454{
455 m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
456 (void*)this,
457 /*TagType=*/1,
Sean Callanan6f298a62012-02-23 23:43:28 +0000458 NULL,
Sean Callanan95e5c632012-02-17 00:53:45 +0000459 DisassemblerLLVMC::SymbolLookupCallback);
460
461 if (arch.GetTriple().getArch() == llvm::Triple::arm)
462 {
463 m_alternate_disasm_context = ::LLVMCreateDisasm("thumbv7-apple-darwin",
464 (void*)this,
465 /*TagType=*/1,
Sean Callanan6f298a62012-02-23 23:43:28 +0000466 NULL,
Sean Callanan95e5c632012-02-17 00:53:45 +0000467 DisassemblerLLVMC::SymbolLookupCallback);
468 }
469}
470
471DisassemblerLLVMC::~DisassemblerLLVMC()
472{
Sean Callanan2b54db72012-04-06 17:59:49 +0000473 if (m_disasm_context)
474 {
475 ::LLVMDisasmDispose(m_disasm_context);
476 m_disasm_context = NULL;
477 }
478 if (m_alternate_disasm_context)
479 {
480 ::LLVMDisasmDispose(m_alternate_disasm_context);
481 m_alternate_disasm_context = NULL;
482 }
Sean Callanan95e5c632012-02-17 00:53:45 +0000483}
484
485size_t
486DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
487 const DataExtractor& data,
488 uint32_t data_offset,
489 uint32_t num_instructions,
490 bool append)
491{
492 if (!append)
493 m_instruction_list.Clear();
494
495 if (!IsValid())
496 return 0;
497
498 uint32_t data_cursor = data_offset;
499 size_t data_byte_size = data.GetByteSize();
500 uint32_t instructions_parsed = 0;
501
502 uint64_t instruction_pointer = base_addr.GetFileAddress();
503
504 std::vector<char> out_string(256);
505
506 while (data_offset < data_byte_size && instructions_parsed < num_instructions)
507 {
508 Address instr_address = base_addr;
509 instr_address.Slide(data_cursor);
510
511 AddressClass address_class = eAddressClassUnknown;
512
513 if (m_alternate_disasm_context)
514 address_class = instr_address.GetAddressClass ();
515
516 InstructionSP inst_sp(new InstructionLLVMC(*this,
517 instr_address,
518 address_class));
519
520 if (!inst_sp)
521 return data_cursor - data_offset;
522
523 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
524
525 if (!inst_size)
526 return data_cursor - data_offset;
527
528 m_instruction_list.Append(inst_sp);
529
530 instruction_pointer += inst_size;
531 data_cursor += inst_size;
532 instructions_parsed++;
533 }
534
535 return data_cursor - data_offset;
536}
537
538void
539DisassemblerLLVMC::Initialize()
540{
541 PluginManager::RegisterPlugin (GetPluginNameStatic(),
542 GetPluginDescriptionStatic(),
543 CreateInstance);
544
545 llvm::InitializeAllTargetInfos();
546 llvm::InitializeAllTargetMCs();
547 llvm::InitializeAllAsmParsers();
548 llvm::InitializeAllDisassemblers();
549}
550
551void
552DisassemblerLLVMC::Terminate()
553{
554 PluginManager::UnregisterPlugin (CreateInstance);
555}
556
557
558const char *
559DisassemblerLLVMC::GetPluginNameStatic()
560{
Greg Claytonf8712de2012-03-22 00:49:15 +0000561 return "llvm-mc";
Sean Callanan95e5c632012-02-17 00:53:45 +0000562}
563
564const char *
565DisassemblerLLVMC::GetPluginDescriptionStatic()
566{
Greg Claytonf8712de2012-03-22 00:49:15 +0000567 return "Disassembler that uses LLVM MC to disassemble i386, x86_64 and ARM.";
Sean Callanan95e5c632012-02-17 00:53:45 +0000568}
569
570int DisassemblerLLVMC::OpInfoCallback (void *DisInfo,
571 uint64_t PC,
572 uint64_t Offset,
573 uint64_t Size,
574 int TagType,
575 void *TagBug)
576{
577 return static_cast<DisassemblerLLVMC*>(DisInfo)->OpInfo(PC,
578 Offset,
579 Size,
580 TagType,
581 TagBug);
582}
583
584const char *DisassemblerLLVMC::SymbolLookupCallback(void *DisInfo,
585 uint64_t ReferenceValue,
586 uint64_t *ReferenceType,
587 uint64_t ReferencePC,
588 const char **ReferenceName)
589{
590 return static_cast<DisassemblerLLVMC*>(DisInfo)->SymbolLookup(ReferenceValue,
591 ReferenceType,
592 ReferencePC,
593 ReferenceName);
594}
595
596int DisassemblerLLVMC::OpInfo (uint64_t PC,
597 uint64_t Offset,
598 uint64_t Size,
599 int TagType,
600 void *TagBug)
601{
602 switch (TagType)
603 {
604 default:
605 break;
606 case 1:
607 bzero (TagBug, sizeof(::LLVMOpInfo1));
608 break;
609 }
610 return 0;
611}
612
613const char *DisassemblerLLVMC::SymbolLookup (uint64_t ReferenceValue,
614 uint64_t *ReferenceType,
615 uint64_t ReferencePC,
616 const char **ReferenceName)
617{
618 const char *result_name = NULL;
619 uint64_t result_reference_type = LLVMDisassembler_ReferenceType_InOut_None;
620 const char *result_referred_name = NULL;
621
622 if (m_exe_scope && m_inst)
623 {
624 Address reference_address;
625
Johnny Chen9b44acd2012-02-20 22:05:34 +0000626 TargetSP target_sp (m_exe_scope->CalculateTarget());
627 Target *target = target_sp.get();
Sean Callanan95e5c632012-02-17 00:53:45 +0000628
629 if (target)
630 {
Sean Callanan745af462012-03-22 20:04:23 +0000631 if (!target->GetSectionLoadList().ResolveLoadAddress(ReferenceValue, reference_address))
632 {
633 if (ModuleSP module_sp = m_inst->GetAddress().GetModule())
634 module_sp->ResolveFileAddress(ReferenceValue, reference_address);
635 }
Sean Callanan95e5c632012-02-17 00:53:45 +0000636
Sean Callanan6f298a62012-02-23 23:43:28 +0000637 if (reference_address.IsValid() && reference_address.GetSection())
Sean Callanan95e5c632012-02-17 00:53:45 +0000638 {
Sean Callanan95e5c632012-02-17 00:53:45 +0000639 StreamString ss;
640
Sean Callanan6f298a62012-02-23 23:43:28 +0000641 reference_address.Dump (&ss,
642 target,
643 Address::DumpStyleResolvedDescriptionNoModule,
644 Address::DumpStyleSectionNameOffset);
Sean Callanan95e5c632012-02-17 00:53:45 +0000645
Sean Callanan745af462012-03-22 20:04:23 +0000646 if (!ss.GetString().empty())
647 m_inst->AddReferencedAddress(ss.GetString());
Sean Callanan95e5c632012-02-17 00:53:45 +0000648 }
649 }
650 }
651
652 *ReferenceType = result_reference_type;
653 *ReferenceName = result_referred_name;
654
655 return result_name;
656}
657
658//------------------------------------------------------------------
659// PluginInterface protocol
660//------------------------------------------------------------------
661const char *
662DisassemblerLLVMC::GetPluginName()
663{
664 return "DisassemblerLLVMC";
665}
666
667const char *
668DisassemblerLLVMC::GetShortPluginName()
669{
670 return GetPluginNameStatic();
671}
672
673uint32_t
674DisassemblerLLVMC::GetPluginVersion()
675{
676 return 1;
677}
678