blob: 2667ff0d45666fba23e6f83a66fc86106c5a75dc [file] [log] [blame]
Sean Callanan32a56ec2012-02-17 00:53:45 +00001//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerLLVMC.h"
11
12#include "llvm-c/Disassembler.h"
13#include "llvm/Support/TargetSelect.h"
14
15#include "lldb/Core/Address.h"
16#include "lldb/Core/DataExtractor.h"
17#include "lldb/Core/Stream.h"
18#include "lldb/Symbol/SymbolContext.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Target/Process.h"
21#include "lldb/Target/RegisterContext.h"
22#include "lldb/Target/Target.h"
23#include "lldb/Target/StackFrame.h"
24
25#include <regex.h>
26
27using namespace lldb;
28using namespace lldb_private;
29
30class InstructionLLVMC : public lldb_private::Instruction
31{
32public:
33 InstructionLLVMC (DisassemblerLLVMC &disasm,
34 const lldb_private::Address &address,
35 lldb_private::AddressClass addr_class) :
36 Instruction(address, addr_class),
37 m_disasm(disasm),
38 m_is_valid(false),
39 m_no_comments(true),
Sean Callanan5f1b66c2012-03-02 23:22:53 +000040 m_comment_stream(),
41 m_does_branch(eLazyBoolCalculate)
Sean Callanan32a56ec2012-02-17 00:53:45 +000042 {
43 }
44
45 virtual
46 ~InstructionLLVMC ()
47 {
48 }
49
50 static void
51 PadToWidth (lldb_private::StreamString &ss,
52 int new_width)
53 {
54 int old_width = ss.GetSize();
55
56 if (old_width < new_width)
57 {
58 ss.Printf("%*s", new_width - old_width, "");
59 }
60 }
61
62 virtual void
63 Dump (lldb_private::Stream *s,
64 uint32_t max_opcode_byte_size,
65 bool show_address,
66 bool show_bytes,
67 const lldb_private::ExecutionContext* exe_ctx,
68 bool raw)
69 {
70 const size_t opcode_column_width = 7;
71 const size_t operand_column_width = 25;
72
73 StreamString ss;
74
75 ExecutionContextScope *exe_scope = NULL;
76
77 if ((!raw) && exe_ctx)
78 {
79 exe_scope = exe_ctx->GetBestExecutionContextScope();
80
81 DataExtractor extractor(m_raw_bytes.data(),
82 m_raw_bytes.size(),
83 m_disasm.GetArchitecture().GetByteOrder(),
84 m_disasm.GetArchitecture().GetAddressByteSize());
85
86 Parse <true> (m_address,
87 m_address_class,
88 extractor,
89 0,
90 exe_scope);
91 }
92
93 if (show_address)
94 {
95 m_address.Dump(&ss,
96 exe_scope,
97 Address::DumpStyleLoadAddress,
98 Address::DumpStyleModuleWithFileAddress,
99 0);
100
101 ss.PutCString(": ");
102 }
103
104 if (show_bytes)
105 {
106 if (m_opcode.GetType() == Opcode::eTypeBytes)
107 {
108 // x86_64 and i386 are the only ones that use bytes right now so
109 // pad out the byte dump to be able to always show 15 bytes (3 chars each)
110 // plus a space
111 if (max_opcode_byte_size > 0)
112 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
113 else
114 m_opcode.Dump (&ss, 15 * 3 + 1);
115 }
116 else
117 {
118 // Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
119 // plus two for padding...
120 if (max_opcode_byte_size > 0)
121 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
122 else
123 m_opcode.Dump (&ss, 12);
124 }
125 }
126
127 int size_before_inst = ss.GetSize();
128
129 ss.PutCString(m_opcode_name.c_str());
130
131 PadToWidth(ss, size_before_inst + opcode_column_width);
132
133 ss.PutCString(m_mnemocics.c_str());
134
135 PadToWidth(ss, size_before_inst + opcode_column_width + operand_column_width);
136
137 if (!m_comment.empty())
138 {
139 ss.PutCString(" ; ");
140 ss.PutCString(m_comment.c_str());
141 }
142
143 ss.Flush();
144
145 s->PutCString(ss.GetData());
146 }
147
148 virtual bool
149 DoesBranch () const
150 {
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000151 return m_does_branch == eLazyBoolYes;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000152 }
153
154 virtual size_t
155 Decode (const lldb_private::Disassembler &disassembler,
156 const lldb_private::DataExtractor &data,
157 uint32_t data_offset)
158 {
159 Parse <false> (m_address,
160 m_address_class,
161 data,
162 data_offset,
163 NULL);
164
165 return m_opcode.GetByteSize();
166 }
167
168 void
169 AddReferencedAddress (std::string &description)
170 {
171 if (m_no_comments)
172 m_comment_stream.PutCString(", ");
173 else
174 m_no_comments = true;
175
176 m_comment_stream.PutCString(description.c_str());
177 }
178
179 virtual void
180 CalculateMnemonicOperandsAndComment (lldb_private::ExecutionContextScope *exe_scope)
181 {
182 DataExtractor extractor(m_raw_bytes.data(),
183 m_raw_bytes.size(),
184 m_disasm.GetArchitecture().GetByteOrder(),
185 m_disasm.GetArchitecture().GetAddressByteSize());
186
187 Parse <true> (m_address,
188 m_address_class,
189 extractor,
190 0,
191 exe_scope);
192 }
193
194 bool
195 IsValid ()
196 {
197 return m_is_valid;
198 }
199
200 size_t
201 GetByteSize ()
202 {
203 return m_opcode.GetByteSize();
204 }
205protected:
206 void PopulateOpcode (const DataExtractor &extractor,
207 uint32_t offset,
208 size_t inst_size)
209 {
210 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
211
212 switch (arch)
213 {
214 default:
215 case llvm::Triple::x86:
216 case llvm::Triple::x86_64:
217 m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
218 break;
219 case llvm::Triple::arm:
220 case llvm::Triple::thumb:
221 switch (inst_size)
222 {
223 case 2:
224 {
225 m_opcode.SetOpcode16 (extractor.GetU16 (&offset));
226 break;
227 }
228 break;
229 case 4:
230 {
231 if (arch == llvm::Triple::arm &&
232 m_address_class == eAddressClassCodeAlternateISA)
233 {
234 // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
235 uint32_t orig_bytes = extractor.GetU32 (&offset);
236 uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
237 uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
238 uint32_t swapped = (lower_bits << 16) | upper_bits;
239 m_opcode.SetOpcode32 (swapped);
240 }
241 else
242 {
243 m_opcode.SetOpcode32 (extractor.GetU32 (&offset));
244 }
245 }
246 break;
247 default:
248 assert (!"Invalid ARM opcode size");
249 break;
250 }
251 break;
252 }
253 }
254
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000255 bool StringRepresentsBranch (const char *data, size_t size)
256 {
257 const char *cursor = data;
258
259 bool inWhitespace = true;
260
261 while (inWhitespace && cursor < data + size)
262 {
263 switch (*cursor)
264 {
265 default:
266 inWhitespace = false;
267 break;
268 case ' ':
269 break;
270 case '\t':
271 break;
272 }
273
274 if (inWhitespace)
275 ++cursor;
276 }
277
278 if (cursor >= data + size)
279 return false;
280
281 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
282
283 switch (arch)
284 {
285 default:
286 return false;
287 case llvm::Triple::x86:
288 case llvm::Triple::x86_64:
289 switch (cursor[0])
290 {
291 default:
292 return false;
293 case 'j':
294 return true;
295 case 'c':
296 if (cursor[1] == 'a' &&
297 cursor[2] == 'l' &&
298 cursor[3] == 'l')
299 return true;
300 else
301 return false;
302 }
303 case llvm::Triple::arm:
304 case llvm::Triple::thumb:
305 switch (cursor[0])
306 {
307 default:
308 return false;
309 case 'b':
310 {
311 switch (cursor[1])
312 {
313 default:
314 return false;
315 case 'l':
316 case 'x':
317 case ' ':
318 case '\t':
319 return true;
320 }
321 return false;
322 }
323 case 'c':
324 {
325 switch (cursor[1])
326 {
327 default:
328 return false;
329 case 'b':
330 return true;
331 }
332 }
333 }
334 }
335
336 return false;
337 }
338
Sean Callanan32a56ec2012-02-17 00:53:45 +0000339 template <bool Reparse> bool Parse (const lldb_private::Address &address,
340 lldb_private::AddressClass addr_class,
341 const DataExtractor &extractor,
342 uint32_t data_offset,
343 lldb_private::ExecutionContextScope *exe_scope)
344 {
345 std::vector<char> out_string(256);
346
347 const uint8_t *data_start = extractor.GetDataStart();
348
349 m_disasm.Lock(this, exe_scope);
350
351 ::LLVMDisasmContextRef disasm_context;
352
353 if (addr_class == eAddressClassCodeAlternateISA)
354 disasm_context = m_disasm.m_alternate_disasm_context;
355 else
356 disasm_context = m_disasm.m_disasm_context;
357
358 m_comment_stream.Clear();
359
360 size_t inst_size = ::LLVMDisasmInstruction(disasm_context,
361 const_cast<uint8_t*>(data_start) + data_offset,
362 extractor.GetByteSize() - data_offset,
363 address.GetFileAddress(),
364 out_string.data(),
365 out_string.size());
366
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000367 if (m_does_branch == eLazyBoolCalculate)
368 m_does_branch = (StringRepresentsBranch (out_string.data(), out_string.size()) ?
369 eLazyBoolYes : eLazyBoolNo);
370
Sean Callanan32a56ec2012-02-17 00:53:45 +0000371 m_comment_stream.Flush();
372 m_no_comments = false;
373
374 m_comment.swap(m_comment_stream.GetString());
375
376 m_disasm.Unlock();
377
378 if (Reparse)
379 {
380 if (inst_size != m_raw_bytes.size())
381 return false;
382 }
383 else
384 {
385 if (!inst_size)
386 return false;
387
388 PopulateOpcode(extractor, data_offset, inst_size);
389
390 m_raw_bytes.resize(inst_size);
391 memcpy(m_raw_bytes.data(), data_start + data_offset, inst_size);
392
393 if (!s_regex_compiled)
394 {
395 ::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
396 s_regex_compiled = true;
397 }
398
399 ::regmatch_t matches[3];
400
401 const char *out_data = out_string.data();
402
403 if (!::regexec(&s_regex, out_data, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
404 {
405 if (matches[1].rm_so != -1)
406 m_opcode_name.assign(out_data + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
407 if (matches[2].rm_so != -1)
408 m_mnemocics.assign(out_data + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
409 }
410
411 m_is_valid = true;
412 }
413
414 return true;
415 }
416
417 bool m_is_valid;
418 DisassemblerLLVMC &m_disasm;
419 std::vector<uint8_t> m_raw_bytes;
420
421 bool m_no_comments;
422 StreamString m_comment_stream;
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000423 LazyBool m_does_branch;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000424
425 static bool s_regex_compiled;
426 static ::regex_t s_regex;
427};
428
429bool InstructionLLVMC::s_regex_compiled = false;
430::regex_t InstructionLLVMC::s_regex;
431
432Disassembler *
433DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
434{
435 std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
436
437 if (disasm_ap.get() && disasm_ap->IsValid())
438 return disasm_ap.release();
439
440 return NULL;
441}
442
443DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
444 Disassembler(arch),
445 m_disasm_context(NULL),
446 m_alternate_disasm_context(NULL)
447{
448 m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
449 (void*)this,
450 /*TagType=*/1,
Sean Callananaa12be42012-02-23 23:43:28 +0000451 NULL,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000452 DisassemblerLLVMC::SymbolLookupCallback);
453
454 if (arch.GetTriple().getArch() == llvm::Triple::arm)
455 {
456 m_alternate_disasm_context = ::LLVMCreateDisasm("thumbv7-apple-darwin",
457 (void*)this,
458 /*TagType=*/1,
Sean Callananaa12be42012-02-23 23:43:28 +0000459 NULL,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000460 DisassemblerLLVMC::SymbolLookupCallback);
461 }
462}
463
464DisassemblerLLVMC::~DisassemblerLLVMC()
465{
466}
467
468size_t
469DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
470 const DataExtractor& data,
471 uint32_t data_offset,
472 uint32_t num_instructions,
473 bool append)
474{
475 if (!append)
476 m_instruction_list.Clear();
477
478 if (!IsValid())
479 return 0;
480
481 uint32_t data_cursor = data_offset;
482 size_t data_byte_size = data.GetByteSize();
483 uint32_t instructions_parsed = 0;
484
485 uint64_t instruction_pointer = base_addr.GetFileAddress();
486
487 std::vector<char> out_string(256);
488
489 while (data_offset < data_byte_size && instructions_parsed < num_instructions)
490 {
491 Address instr_address = base_addr;
492 instr_address.Slide(data_cursor);
493
494 AddressClass address_class = eAddressClassUnknown;
495
496 if (m_alternate_disasm_context)
497 address_class = instr_address.GetAddressClass ();
498
499 InstructionSP inst_sp(new InstructionLLVMC(*this,
500 instr_address,
501 address_class));
502
503 if (!inst_sp)
504 return data_cursor - data_offset;
505
506 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
507
508 if (!inst_size)
509 return data_cursor - data_offset;
510
511 m_instruction_list.Append(inst_sp);
512
513 instruction_pointer += inst_size;
514 data_cursor += inst_size;
515 instructions_parsed++;
516 }
517
518 return data_cursor - data_offset;
519}
520
521void
522DisassemblerLLVMC::Initialize()
523{
524 PluginManager::RegisterPlugin (GetPluginNameStatic(),
525 GetPluginDescriptionStatic(),
526 CreateInstance);
527
528 llvm::InitializeAllTargetInfos();
529 llvm::InitializeAllTargetMCs();
530 llvm::InitializeAllAsmParsers();
531 llvm::InitializeAllDisassemblers();
532}
533
534void
535DisassemblerLLVMC::Terminate()
536{
537 PluginManager::UnregisterPlugin (CreateInstance);
538}
539
540
541const char *
542DisassemblerLLVMC::GetPluginNameStatic()
543{
544 return "llvm";
545}
546
547const char *
548DisassemblerLLVMC::GetPluginDescriptionStatic()
549{
550 return "Disassembler that uses LLVM opcode tables to disassemble i386, x86_64 and ARM.";
551}
552
553int DisassemblerLLVMC::OpInfoCallback (void *DisInfo,
554 uint64_t PC,
555 uint64_t Offset,
556 uint64_t Size,
557 int TagType,
558 void *TagBug)
559{
560 return static_cast<DisassemblerLLVMC*>(DisInfo)->OpInfo(PC,
561 Offset,
562 Size,
563 TagType,
564 TagBug);
565}
566
567const char *DisassemblerLLVMC::SymbolLookupCallback(void *DisInfo,
568 uint64_t ReferenceValue,
569 uint64_t *ReferenceType,
570 uint64_t ReferencePC,
571 const char **ReferenceName)
572{
573 return static_cast<DisassemblerLLVMC*>(DisInfo)->SymbolLookup(ReferenceValue,
574 ReferenceType,
575 ReferencePC,
576 ReferenceName);
577}
578
579int DisassemblerLLVMC::OpInfo (uint64_t PC,
580 uint64_t Offset,
581 uint64_t Size,
582 int TagType,
583 void *TagBug)
584{
585 switch (TagType)
586 {
587 default:
588 break;
589 case 1:
590 bzero (TagBug, sizeof(::LLVMOpInfo1));
591 break;
592 }
593 return 0;
594}
595
596const char *DisassemblerLLVMC::SymbolLookup (uint64_t ReferenceValue,
597 uint64_t *ReferenceType,
598 uint64_t ReferencePC,
599 const char **ReferenceName)
600{
601 const char *result_name = NULL;
602 uint64_t result_reference_type = LLVMDisassembler_ReferenceType_InOut_None;
603 const char *result_referred_name = NULL;
604
605 if (m_exe_scope && m_inst)
606 {
607 Address reference_address;
608
Johnny Chendd5e3632012-02-20 22:05:34 +0000609 TargetSP target_sp (m_exe_scope->CalculateTarget());
610 Target *target = target_sp.get();
Sean Callanan32a56ec2012-02-17 00:53:45 +0000611
612 if (target)
613 {
614 if (!target->GetSectionLoadList().IsEmpty())
615 target->GetSectionLoadList().ResolveLoadAddress(ReferenceValue, reference_address);
616 else
617 target->GetImages().ResolveFileAddress(ReferenceValue, reference_address);
618
Sean Callananaa12be42012-02-23 23:43:28 +0000619 if (reference_address.IsValid() && reference_address.GetSection())
Sean Callanan32a56ec2012-02-17 00:53:45 +0000620 {
Sean Callanan32a56ec2012-02-17 00:53:45 +0000621 StreamString ss;
622
Sean Callananaa12be42012-02-23 23:43:28 +0000623 reference_address.Dump (&ss,
624 target,
625 Address::DumpStyleResolvedDescriptionNoModule,
626 Address::DumpStyleSectionNameOffset);
Sean Callanan32a56ec2012-02-17 00:53:45 +0000627
628 m_inst->AddReferencedAddress(ss.GetString());
629 }
630 }
631 }
632
633 *ReferenceType = result_reference_type;
634 *ReferenceName = result_referred_name;
635
636 return result_name;
637}
638
639//------------------------------------------------------------------
640// PluginInterface protocol
641//------------------------------------------------------------------
642const char *
643DisassemblerLLVMC::GetPluginName()
644{
645 return "DisassemblerLLVMC";
646}
647
648const char *
649DisassemblerLLVMC::GetShortPluginName()
650{
651 return GetPluginNameStatic();
652}
653
654uint32_t
655DisassemblerLLVMC::GetPluginVersion()
656{
657 return 1;
658}
659