blob: 2666475adcb6949bdc18d86cf338e436f3a26d0c [file] [log] [blame]
Sean Callanan32a56ec2012-02-17 00:53:45 +00001//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerLLVMC.h"
11
12#include "llvm-c/Disassembler.h"
13#include "llvm/Support/TargetSelect.h"
14
15#include "lldb/Core/Address.h"
16#include "lldb/Core/DataExtractor.h"
17#include "lldb/Core/Stream.h"
18#include "lldb/Symbol/SymbolContext.h"
19#include "lldb/Target/ExecutionContext.h"
20#include "lldb/Target/Process.h"
21#include "lldb/Target/RegisterContext.h"
22#include "lldb/Target/Target.h"
23#include "lldb/Target/StackFrame.h"
24
25#include <regex.h>
26
27using namespace lldb;
28using namespace lldb_private;
29
30class InstructionLLVMC : public lldb_private::Instruction
31{
32public:
33 InstructionLLVMC (DisassemblerLLVMC &disasm,
34 const lldb_private::Address &address,
35 lldb_private::AddressClass addr_class) :
36 Instruction(address, addr_class),
37 m_disasm(disasm),
38 m_is_valid(false),
39 m_no_comments(true),
Sean Callanan5f1b66c2012-03-02 23:22:53 +000040 m_comment_stream(),
41 m_does_branch(eLazyBoolCalculate)
Sean Callanan32a56ec2012-02-17 00:53:45 +000042 {
43 }
44
45 virtual
46 ~InstructionLLVMC ()
47 {
48 }
49
50 static void
51 PadToWidth (lldb_private::StreamString &ss,
52 int new_width)
53 {
54 int old_width = ss.GetSize();
55
56 if (old_width < new_width)
57 {
58 ss.Printf("%*s", new_width - old_width, "");
59 }
60 }
61
62 virtual void
63 Dump (lldb_private::Stream *s,
64 uint32_t max_opcode_byte_size,
65 bool show_address,
66 bool show_bytes,
67 const lldb_private::ExecutionContext* exe_ctx,
68 bool raw)
69 {
70 const size_t opcode_column_width = 7;
71 const size_t operand_column_width = 25;
72
73 StreamString ss;
74
75 ExecutionContextScope *exe_scope = NULL;
76
77 if ((!raw) && exe_ctx)
78 {
79 exe_scope = exe_ctx->GetBestExecutionContextScope();
80
81 DataExtractor extractor(m_raw_bytes.data(),
82 m_raw_bytes.size(),
83 m_disasm.GetArchitecture().GetByteOrder(),
84 m_disasm.GetArchitecture().GetAddressByteSize());
85
86 Parse <true> (m_address,
87 m_address_class,
88 extractor,
89 0,
90 exe_scope);
91 }
92
93 if (show_address)
94 {
95 m_address.Dump(&ss,
96 exe_scope,
97 Address::DumpStyleLoadAddress,
98 Address::DumpStyleModuleWithFileAddress,
99 0);
100
101 ss.PutCString(": ");
102 }
103
104 if (show_bytes)
105 {
106 if (m_opcode.GetType() == Opcode::eTypeBytes)
107 {
108 // x86_64 and i386 are the only ones that use bytes right now so
109 // pad out the byte dump to be able to always show 15 bytes (3 chars each)
110 // plus a space
111 if (max_opcode_byte_size > 0)
112 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
113 else
114 m_opcode.Dump (&ss, 15 * 3 + 1);
115 }
116 else
117 {
118 // Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
119 // plus two for padding...
120 if (max_opcode_byte_size > 0)
121 m_opcode.Dump (&ss, max_opcode_byte_size * 3 + 1);
122 else
123 m_opcode.Dump (&ss, 12);
124 }
125 }
126
127 int size_before_inst = ss.GetSize();
128
129 ss.PutCString(m_opcode_name.c_str());
130
131 PadToWidth(ss, size_before_inst + opcode_column_width);
132
133 ss.PutCString(m_mnemocics.c_str());
134
135 PadToWidth(ss, size_before_inst + opcode_column_width + operand_column_width);
136
137 if (!m_comment.empty())
138 {
139 ss.PutCString(" ; ");
140 ss.PutCString(m_comment.c_str());
141 }
142
143 ss.Flush();
144
145 s->PutCString(ss.GetData());
146 }
147
148 virtual bool
149 DoesBranch () const
150 {
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000151 return m_does_branch == eLazyBoolYes;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000152 }
153
154 virtual size_t
155 Decode (const lldb_private::Disassembler &disassembler,
156 const lldb_private::DataExtractor &data,
157 uint32_t data_offset)
158 {
159 Parse <false> (m_address,
160 m_address_class,
161 data,
162 data_offset,
163 NULL);
164
165 return m_opcode.GetByteSize();
166 }
167
168 void
169 AddReferencedAddress (std::string &description)
170 {
171 if (m_no_comments)
172 m_comment_stream.PutCString(", ");
173 else
174 m_no_comments = true;
175
176 m_comment_stream.PutCString(description.c_str());
177 }
178
179 virtual void
180 CalculateMnemonicOperandsAndComment (lldb_private::ExecutionContextScope *exe_scope)
181 {
182 DataExtractor extractor(m_raw_bytes.data(),
183 m_raw_bytes.size(),
184 m_disasm.GetArchitecture().GetByteOrder(),
185 m_disasm.GetArchitecture().GetAddressByteSize());
186
187 Parse <true> (m_address,
188 m_address_class,
189 extractor,
190 0,
191 exe_scope);
192 }
193
194 bool
195 IsValid ()
196 {
197 return m_is_valid;
198 }
199
200 size_t
201 GetByteSize ()
202 {
203 return m_opcode.GetByteSize();
204 }
205protected:
206 void PopulateOpcode (const DataExtractor &extractor,
207 uint32_t offset,
208 size_t inst_size)
209 {
210 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
211
212 switch (arch)
213 {
214 default:
215 case llvm::Triple::x86:
216 case llvm::Triple::x86_64:
217 m_opcode.SetOpcodeBytes(extractor.PeekData(offset, inst_size), inst_size);
218 break;
219 case llvm::Triple::arm:
220 case llvm::Triple::thumb:
221 switch (inst_size)
222 {
223 case 2:
224 {
225 m_opcode.SetOpcode16 (extractor.GetU16 (&offset));
226 break;
227 }
228 break;
229 case 4:
230 {
231 if (arch == llvm::Triple::arm &&
232 m_address_class == eAddressClassCodeAlternateISA)
233 {
234 // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
235 uint32_t orig_bytes = extractor.GetU32 (&offset);
236 uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
237 uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
238 uint32_t swapped = (lower_bits << 16) | upper_bits;
239 m_opcode.SetOpcode32 (swapped);
240 }
241 else
242 {
243 m_opcode.SetOpcode32 (extractor.GetU32 (&offset));
244 }
245 }
246 break;
247 default:
248 assert (!"Invalid ARM opcode size");
249 break;
250 }
251 break;
252 }
253 }
254
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000255 bool StringRepresentsBranch (const char *data, size_t size)
256 {
257 const char *cursor = data;
258
259 bool inWhitespace = true;
260
261 while (inWhitespace && cursor < data + size)
262 {
263 switch (*cursor)
264 {
265 default:
266 inWhitespace = false;
267 break;
268 case ' ':
269 break;
270 case '\t':
271 break;
272 }
273
274 if (inWhitespace)
275 ++cursor;
276 }
277
278 if (cursor >= data + size)
279 return false;
280
281 llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
282
283 switch (arch)
284 {
285 default:
286 return false;
287 case llvm::Triple::x86:
288 case llvm::Triple::x86_64:
289 switch (cursor[0])
290 {
291 default:
292 return false;
293 case 'j':
294 return true;
295 case 'c':
296 if (cursor[1] == 'a' &&
297 cursor[2] == 'l' &&
298 cursor[3] == 'l')
299 return true;
300 else
301 return false;
302 }
303 case llvm::Triple::arm:
304 case llvm::Triple::thumb:
305 switch (cursor[0])
306 {
307 default:
308 return false;
309 case 'b':
310 {
311 switch (cursor[1])
312 {
313 default:
314 return false;
315 case 'l':
316 case 'x':
317 case ' ':
318 case '\t':
319 return true;
320 }
321 return false;
322 }
323 case 'c':
324 {
325 switch (cursor[1])
326 {
327 default:
328 return false;
329 case 'b':
330 return true;
331 }
332 }
333 }
334 }
335
336 return false;
337 }
338
Sean Callanan32a56ec2012-02-17 00:53:45 +0000339 template <bool Reparse> bool Parse (const lldb_private::Address &address,
340 lldb_private::AddressClass addr_class,
341 const DataExtractor &extractor,
342 uint32_t data_offset,
343 lldb_private::ExecutionContextScope *exe_scope)
344 {
345 std::vector<char> out_string(256);
346
347 const uint8_t *data_start = extractor.GetDataStart();
348
349 m_disasm.Lock(this, exe_scope);
350
351 ::LLVMDisasmContextRef disasm_context;
352
353 if (addr_class == eAddressClassCodeAlternateISA)
354 disasm_context = m_disasm.m_alternate_disasm_context;
355 else
356 disasm_context = m_disasm.m_disasm_context;
357
358 m_comment_stream.Clear();
Sean Callanan501e77a2012-03-22 01:10:50 +0000359
360 lldb::addr_t pc = LLDB_INVALID_ADDRESS;
361
362 if (exe_scope)
363 if (TargetSP target_sp = exe_scope->CalculateTarget())
364 pc = m_address.GetLoadAddress(target_sp.get());
365
366 if (pc == LLDB_INVALID_ADDRESS)
367 pc = m_address.GetFileAddress();
368
369 size_t inst_size = ::LLVMDisasmInstruction(disasm_context,
370 const_cast<uint8_t*>(data_start) + data_offset,
371 extractor.GetByteSize() - data_offset,
372 pc,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000373 out_string.data(),
374 out_string.size());
375
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000376 if (m_does_branch == eLazyBoolCalculate)
377 m_does_branch = (StringRepresentsBranch (out_string.data(), out_string.size()) ?
378 eLazyBoolYes : eLazyBoolNo);
379
Sean Callanan32a56ec2012-02-17 00:53:45 +0000380 m_comment_stream.Flush();
381 m_no_comments = false;
382
383 m_comment.swap(m_comment_stream.GetString());
384
385 m_disasm.Unlock();
386
387 if (Reparse)
388 {
389 if (inst_size != m_raw_bytes.size())
390 return false;
391 }
392 else
393 {
394 if (!inst_size)
395 return false;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000396 }
Sean Callanan501e77a2012-03-22 01:10:50 +0000397
398 PopulateOpcode(extractor, data_offset, inst_size);
399
400 m_raw_bytes.resize(inst_size);
401 memcpy(m_raw_bytes.data(), data_start + data_offset, inst_size);
402
403 if (!s_regex_compiled)
404 {
405 ::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
406 s_regex_compiled = true;
407 }
408
409 ::regmatch_t matches[3];
410
411 const char *out_data = out_string.data();
412
413 if (!::regexec(&s_regex, out_data, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
414 {
415 if (matches[1].rm_so != -1)
416 m_opcode_name.assign(out_data + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
417 if (matches[2].rm_so != -1)
418 m_mnemocics.assign(out_data + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
419 }
420
421 m_is_valid = true;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000422
423 return true;
424 }
425
426 bool m_is_valid;
427 DisassemblerLLVMC &m_disasm;
428 std::vector<uint8_t> m_raw_bytes;
429
430 bool m_no_comments;
431 StreamString m_comment_stream;
Sean Callanan5f1b66c2012-03-02 23:22:53 +0000432 LazyBool m_does_branch;
Sean Callanan32a56ec2012-02-17 00:53:45 +0000433
434 static bool s_regex_compiled;
435 static ::regex_t s_regex;
436};
437
438bool InstructionLLVMC::s_regex_compiled = false;
439::regex_t InstructionLLVMC::s_regex;
440
441Disassembler *
442DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
443{
444 std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
445
446 if (disasm_ap.get() && disasm_ap->IsValid())
447 return disasm_ap.release();
448
449 return NULL;
450}
451
452DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
453 Disassembler(arch),
454 m_disasm_context(NULL),
455 m_alternate_disasm_context(NULL)
456{
457 m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
458 (void*)this,
459 /*TagType=*/1,
Sean Callananaa12be42012-02-23 23:43:28 +0000460 NULL,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000461 DisassemblerLLVMC::SymbolLookupCallback);
462
463 if (arch.GetTriple().getArch() == llvm::Triple::arm)
464 {
465 m_alternate_disasm_context = ::LLVMCreateDisasm("thumbv7-apple-darwin",
466 (void*)this,
467 /*TagType=*/1,
Sean Callananaa12be42012-02-23 23:43:28 +0000468 NULL,
Sean Callanan32a56ec2012-02-17 00:53:45 +0000469 DisassemblerLLVMC::SymbolLookupCallback);
470 }
471}
472
473DisassemblerLLVMC::~DisassemblerLLVMC()
474{
475}
476
477size_t
478DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
479 const DataExtractor& data,
480 uint32_t data_offset,
481 uint32_t num_instructions,
482 bool append)
483{
484 if (!append)
485 m_instruction_list.Clear();
486
487 if (!IsValid())
488 return 0;
489
490 uint32_t data_cursor = data_offset;
491 size_t data_byte_size = data.GetByteSize();
492 uint32_t instructions_parsed = 0;
493
494 uint64_t instruction_pointer = base_addr.GetFileAddress();
495
496 std::vector<char> out_string(256);
497
498 while (data_offset < data_byte_size && instructions_parsed < num_instructions)
499 {
500 Address instr_address = base_addr;
501 instr_address.Slide(data_cursor);
502
503 AddressClass address_class = eAddressClassUnknown;
504
505 if (m_alternate_disasm_context)
506 address_class = instr_address.GetAddressClass ();
507
508 InstructionSP inst_sp(new InstructionLLVMC(*this,
509 instr_address,
510 address_class));
511
512 if (!inst_sp)
513 return data_cursor - data_offset;
514
515 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
516
517 if (!inst_size)
518 return data_cursor - data_offset;
519
520 m_instruction_list.Append(inst_sp);
521
522 instruction_pointer += inst_size;
523 data_cursor += inst_size;
524 instructions_parsed++;
525 }
526
527 return data_cursor - data_offset;
528}
529
530void
531DisassemblerLLVMC::Initialize()
532{
533 PluginManager::RegisterPlugin (GetPluginNameStatic(),
534 GetPluginDescriptionStatic(),
535 CreateInstance);
536
537 llvm::InitializeAllTargetInfos();
538 llvm::InitializeAllTargetMCs();
539 llvm::InitializeAllAsmParsers();
540 llvm::InitializeAllDisassemblers();
541}
542
543void
544DisassemblerLLVMC::Terminate()
545{
546 PluginManager::UnregisterPlugin (CreateInstance);
547}
548
549
550const char *
551DisassemblerLLVMC::GetPluginNameStatic()
552{
Greg Clayton34814d62012-03-22 00:49:15 +0000553 return "llvm-mc";
Sean Callanan32a56ec2012-02-17 00:53:45 +0000554}
555
556const char *
557DisassemblerLLVMC::GetPluginDescriptionStatic()
558{
Greg Clayton34814d62012-03-22 00:49:15 +0000559 return "Disassembler that uses LLVM MC to disassemble i386, x86_64 and ARM.";
Sean Callanan32a56ec2012-02-17 00:53:45 +0000560}
561
562int DisassemblerLLVMC::OpInfoCallback (void *DisInfo,
563 uint64_t PC,
564 uint64_t Offset,
565 uint64_t Size,
566 int TagType,
567 void *TagBug)
568{
569 return static_cast<DisassemblerLLVMC*>(DisInfo)->OpInfo(PC,
570 Offset,
571 Size,
572 TagType,
573 TagBug);
574}
575
576const char *DisassemblerLLVMC::SymbolLookupCallback(void *DisInfo,
577 uint64_t ReferenceValue,
578 uint64_t *ReferenceType,
579 uint64_t ReferencePC,
580 const char **ReferenceName)
581{
582 return static_cast<DisassemblerLLVMC*>(DisInfo)->SymbolLookup(ReferenceValue,
583 ReferenceType,
584 ReferencePC,
585 ReferenceName);
586}
587
588int DisassemblerLLVMC::OpInfo (uint64_t PC,
589 uint64_t Offset,
590 uint64_t Size,
591 int TagType,
592 void *TagBug)
593{
594 switch (TagType)
595 {
596 default:
597 break;
598 case 1:
599 bzero (TagBug, sizeof(::LLVMOpInfo1));
600 break;
601 }
602 return 0;
603}
604
605const char *DisassemblerLLVMC::SymbolLookup (uint64_t ReferenceValue,
606 uint64_t *ReferenceType,
607 uint64_t ReferencePC,
608 const char **ReferenceName)
609{
610 const char *result_name = NULL;
611 uint64_t result_reference_type = LLVMDisassembler_ReferenceType_InOut_None;
612 const char *result_referred_name = NULL;
613
614 if (m_exe_scope && m_inst)
615 {
616 Address reference_address;
617
Johnny Chendd5e3632012-02-20 22:05:34 +0000618 TargetSP target_sp (m_exe_scope->CalculateTarget());
619 Target *target = target_sp.get();
Sean Callanan32a56ec2012-02-17 00:53:45 +0000620
621 if (target)
622 {
623 if (!target->GetSectionLoadList().IsEmpty())
624 target->GetSectionLoadList().ResolveLoadAddress(ReferenceValue, reference_address);
625 else
626 target->GetImages().ResolveFileAddress(ReferenceValue, reference_address);
627
Sean Callananaa12be42012-02-23 23:43:28 +0000628 if (reference_address.IsValid() && reference_address.GetSection())
Sean Callanan32a56ec2012-02-17 00:53:45 +0000629 {
Sean Callanan32a56ec2012-02-17 00:53:45 +0000630 StreamString ss;
631
Sean Callananaa12be42012-02-23 23:43:28 +0000632 reference_address.Dump (&ss,
633 target,
634 Address::DumpStyleResolvedDescriptionNoModule,
635 Address::DumpStyleSectionNameOffset);
Sean Callanan32a56ec2012-02-17 00:53:45 +0000636
637 m_inst->AddReferencedAddress(ss.GetString());
638 }
639 }
640 }
641
642 *ReferenceType = result_reference_type;
643 *ReferenceName = result_referred_name;
644
645 return result_name;
646}
647
648//------------------------------------------------------------------
649// PluginInterface protocol
650//------------------------------------------------------------------
651const char *
652DisassemblerLLVMC::GetPluginName()
653{
654 return "DisassemblerLLVMC";
655}
656
657const char *
658DisassemblerLLVMC::GetShortPluginName()
659{
660 return GetPluginNameStatic();
661}
662
663uint32_t
664DisassemblerLLVMC::GetPluginVersion()
665{
666 return 1;
667}
668