blob: a027201f6b8bbd6da326fc227651eb22bfc32b83 [file] [log] [blame]
Chris Lattner24943d22010-06-08 16:52:24 +00001//===-- DisassemblerLLVM.cpp ------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerLLVM.h"
11
12#include "llvm-c/EnhancedDisassembly.h"
13
14#include "lldb/Core/Address.h"
15#include "lldb/Core/DataExtractor.h"
16#include "lldb/Core/Disassembler.h"
17#include "lldb/Core/Module.h"
18#include "lldb/Core/PluginManager.h"
19#include "lldb/Core/Stream.h"
20#include "lldb/Core/StreamString.h"
21#include "lldb/Symbol/SymbolContext.h"
22
23#include "lldb/Target/ExecutionContext.h"
24#include "lldb/Target/Process.h"
25#include "lldb/Target/RegisterContext.h"
26#include "lldb/Target/Target.h"
27
Greg Claytonb01000f2011-01-17 03:46:26 +000028#include <assert.h>
Chris Lattner24943d22010-06-08 16:52:24 +000029
30using namespace lldb;
31using namespace lldb_private;
32
33
Greg Claytonb1888f22011-03-19 01:12:21 +000034static int
Greg Clayton7bc39082011-03-24 23:53:38 +000035DataExtractorByteReader (uint8_t *byte, uint64_t address, void *arg)
Chris Lattner24943d22010-06-08 16:52:24 +000036{
37 DataExtractor &extractor = *((DataExtractor *)arg);
38
39 if (extractor.ValidOffset(address))
40 {
41 *byte = *(extractor.GetDataStart() + address);
42 return 0;
43 }
44 else
45 {
46 return -1;
47 }
48}
49
50namespace {
51 struct RegisterReaderArg {
52 const lldb::addr_t instructionPointer;
53 const EDDisassemblerRef disassembler;
54
55 RegisterReaderArg(lldb::addr_t ip,
56 EDDisassemblerRef dis) :
57 instructionPointer(ip),
58 disassembler(dis)
59 {
60 }
61 };
62}
63
64static int IPRegisterReader(uint64_t *value, unsigned regID, void* arg)
65{
66 uint64_t instructionPointer = ((RegisterReaderArg*)arg)->instructionPointer;
67 EDDisassemblerRef disassembler = ((RegisterReaderArg*)arg)->disassembler;
68
Greg Claytonb1888f22011-03-19 01:12:21 +000069 if (EDRegisterIsProgramCounter(disassembler, regID)) {
Chris Lattner24943d22010-06-08 16:52:24 +000070 *value = instructionPointer;
71 return 0;
72 }
73
74 return -1;
75}
76
Caroline Ticeaf591802011-04-05 23:22:54 +000077InstructionLLVM::InstructionLLVM (const Address &addr,
78 AddressClass addr_class,
Greg Claytonabe0fed2011-04-18 08:33:37 +000079 EDDisassemblerRef disassembler,
Johnny Chen80ab18e2011-05-12 22:25:53 +000080 llvm::Triple::ArchType arch_type) :
Greg Clayton889fbd02011-03-26 19:14:58 +000081 Instruction (addr, addr_class),
Greg Claytonabe0fed2011-04-18 08:33:37 +000082 m_disassembler (disassembler),
Johnny Chen80ab18e2011-05-12 22:25:53 +000083 m_arch_type (arch_type)
Chris Lattner24943d22010-06-08 16:52:24 +000084{
85}
86
Caroline Ticeaf591802011-04-05 23:22:54 +000087InstructionLLVM::~InstructionLLVM()
Chris Lattner24943d22010-06-08 16:52:24 +000088{
89}
90
91static void
92PadString(Stream *s, const std::string &str, size_t width)
93{
94 int diff = width - str.length();
95
96 if (diff > 0)
97 s->Printf("%s%*.*s", str.c_str(), diff, diff, "");
98 else
99 s->Printf("%s ", str.c_str());
100}
Johnny Chend254eb92011-05-23 23:29:23 +0000101static void
102AddSymbolicInfo(const ExecutionContext *exe_ctx, ExecutionContextScope *exe_scope,
103 StreamString &comment, uint64_t operand_value, const Address &inst_addr)
104{
105 Address so_addr;
106 if (exe_ctx && exe_ctx->target && !exe_ctx->target->GetSectionLoadList().IsEmpty())
107 {
108 if (exe_ctx->target->GetSectionLoadList().ResolveLoadAddress(operand_value, so_addr))
109 so_addr.Dump(&comment, exe_scope, Address::DumpStyleResolvedDescriptionNoModule, Address::DumpStyleSectionNameOffset);
110 }
111 else
112 {
113 Module *module = inst_addr.GetModule();
114 if (module)
115 {
116 if (module->ResolveFileAddress(operand_value, so_addr))
117 so_addr.Dump(&comment, exe_scope, Address::DumpStyleResolvedDescriptionNoModule, Address::DumpStyleSectionNameOffset);
118 }
119 }
120}
Chris Lattner24943d22010-06-08 16:52:24 +0000121
Johnny Chen51ff2482011-05-19 01:05:37 +0000122#include "llvm/ADT/StringRef.h"
Johnny Chend254eb92011-05-23 23:29:23 +0000123static inline void StripSpaces(llvm::StringRef &Str)
Johnny Chen51ff2482011-05-19 01:05:37 +0000124{
125 while (!Str.empty() && isspace(Str[0]))
126 Str = Str.substr(1);
127 while (!Str.empty() && isspace(Str.back()))
128 Str = Str.substr(0, Str.size()-1);
129}
Johnny Chend254eb92011-05-23 23:29:23 +0000130static inline void RStrip(llvm::StringRef &Str, char c)
131{
132 if (!Str.empty() && Str.back() == c)
133 Str = Str.substr(0, Str.size()-1);
134}
Johnny Chen3930cbe2011-05-24 20:36:40 +0000135// Aligns the raw disassembly (passed as 'str') with the rest of edis'ed disassembly output.
136// This is called from non-raw mode when edis of the current m_inst fails for some reason.
Johnny Chen84d42e82011-05-21 00:55:57 +0000137static void
Johnny Chend17f8012011-05-23 18:00:40 +0000138Align(Stream *s, const char *str, size_t opcodeColWidth, size_t operandColWidth)
Johnny Chen84d42e82011-05-21 00:55:57 +0000139{
140 llvm::StringRef raw_disasm(str);
141 StripSpaces(raw_disasm);
Johnny Chend17f8012011-05-23 18:00:40 +0000142 // Split the raw disassembly into opcode and operands.
143 std::pair<llvm::StringRef, llvm::StringRef> p = raw_disasm.split('\t');
144 PadString(s, p.first, opcodeColWidth);
145 if (!p.second.empty())
146 PadString(s, p.second, operandColWidth);
Johnny Chen84d42e82011-05-21 00:55:57 +0000147}
Johnny Chen51ff2482011-05-19 01:05:37 +0000148
Johnny Chen66786762011-08-03 04:50:37 +0000149#define AlignPC(pc_val) (pc_val & 0xFFFFFFFC)
Chris Lattner24943d22010-06-08 16:52:24 +0000150void
Caroline Ticeaf591802011-04-05 23:22:54 +0000151InstructionLLVM::Dump
Chris Lattner24943d22010-06-08 16:52:24 +0000152(
153 Stream *s,
Greg Clayton889fbd02011-03-26 19:14:58 +0000154 uint32_t max_opcode_byte_size,
Greg Clayton5c4c7462010-10-06 03:09:58 +0000155 bool show_address,
Greg Clayton149731c2011-03-25 18:03:16 +0000156 bool show_bytes,
Greg Clayton5c4c7462010-10-06 03:09:58 +0000157 const lldb_private::ExecutionContext* exe_ctx,
Chris Lattner24943d22010-06-08 16:52:24 +0000158 bool raw
159)
160{
161 const size_t opcodeColumnWidth = 7;
162 const size_t operandColumnWidth = 25;
163
Greg Clayton5c4c7462010-10-06 03:09:58 +0000164 ExecutionContextScope *exe_scope = NULL;
165 if (exe_ctx)
166 exe_scope = exe_ctx->GetBestExecutionContextScope();
167
Chris Lattner24943d22010-06-08 16:52:24 +0000168 // If we have an address, print it out
Sean Callanan91557b02010-11-10 01:38:28 +0000169 if (GetAddress().IsValid() && show_address)
Greg Clayton70436352010-06-30 23:03:03 +0000170 {
Greg Clayton5c4c7462010-10-06 03:09:58 +0000171 if (GetAddress().Dump (s,
172 exe_scope,
173 Address::DumpStyleLoadAddress,
174 Address::DumpStyleModuleWithFileAddress,
175 0))
Greg Clayton70436352010-06-30 23:03:03 +0000176 s->PutCString(": ");
177 }
Chris Lattner24943d22010-06-08 16:52:24 +0000178
179 // If we are supposed to show bytes, "bytes" will be non-NULL.
Greg Clayton149731c2011-03-25 18:03:16 +0000180 if (show_bytes)
Chris Lattner24943d22010-06-08 16:52:24 +0000181 {
Greg Clayton149731c2011-03-25 18:03:16 +0000182 if (m_opcode.GetType() == Opcode::eTypeBytes)
Chris Lattner24943d22010-06-08 16:52:24 +0000183 {
Greg Clayton149731c2011-03-25 18:03:16 +0000184 // x86_64 and i386 are the only ones that use bytes right now so
185 // pad out the byte dump to be able to always show 15 bytes (3 chars each)
186 // plus a space
Greg Clayton889fbd02011-03-26 19:14:58 +0000187 if (max_opcode_byte_size > 0)
188 m_opcode.Dump (s, max_opcode_byte_size * 3 + 1);
189 else
190 m_opcode.Dump (s, 15 * 3 + 1);
Greg Clayton149731c2011-03-25 18:03:16 +0000191 }
192 else
193 {
194 // Else, we have ARM which can show up to a uint32_t 0x00000000 (10 spaces)
195 // plus two for padding...
Greg Clayton889fbd02011-03-26 19:14:58 +0000196 if (max_opcode_byte_size > 0)
197 m_opcode.Dump (s, max_opcode_byte_size * 3 + 1);
198 else
199 m_opcode.Dump (s, 12);
Chris Lattner24943d22010-06-08 16:52:24 +0000200 }
201 }
202
Greg Claytonf15996e2011-04-07 22:46:35 +0000203 int numTokens = -1;
204
Johnny Chen80ab18e2011-05-12 22:25:53 +0000205 // FIXME!!!
206 /* Remove the following section of code related to force_raw .... */
Johnny Chend17f8012011-05-23 18:00:40 +0000207 /*
Johnny Chen80ab18e2011-05-12 22:25:53 +0000208 bool force_raw = m_arch_type == llvm::Triple::arm ||
209 m_arch_type == llvm::Triple::thumb;
Greg Claytonf15996e2011-04-07 22:46:35 +0000210 if (!raw)
Johnny Chen80ab18e2011-05-12 22:25:53 +0000211 raw = force_raw;
Johnny Chend17f8012011-05-23 18:00:40 +0000212 */
Johnny Chen80ab18e2011-05-12 22:25:53 +0000213 /* .... when we fix the edis for arm/thumb. */
Greg Claytonabe0fed2011-04-18 08:33:37 +0000214
Johnny Chen611b7e92011-08-19 17:31:59 +0000215 if (!raw)
Greg Claytonf15996e2011-04-07 22:46:35 +0000216 numTokens = EDNumTokens(m_inst);
Chris Lattner24943d22010-06-08 16:52:24 +0000217
218 int currentOpIndex = -1;
219
Greg Claytonf15996e2011-04-07 22:46:35 +0000220 bool printTokenized = false;
221
222 if (numTokens != -1 && !raw)
Sean Callanan8541f2f2010-07-23 02:19:15 +0000223 {
224 addr_t base_addr = LLDB_INVALID_ADDRESS;
Greg Claytonf15996e2011-04-07 22:46:35 +0000225
Greg Clayton5c4c7462010-10-06 03:09:58 +0000226 if (exe_ctx && exe_ctx->target && !exe_ctx->target->GetSectionLoadList().IsEmpty())
227 base_addr = GetAddress().GetLoadAddress (exe_ctx->target);
Sean Callanan8541f2f2010-07-23 02:19:15 +0000228 if (base_addr == LLDB_INVALID_ADDRESS)
Greg Clayton5c4c7462010-10-06 03:09:58 +0000229 base_addr = GetAddress().GetFileAddress ();
Greg Claytonf15996e2011-04-07 22:46:35 +0000230
Johnny Chen80ab18e2011-05-12 22:25:53 +0000231 lldb::addr_t PC = base_addr + EDInstByteSize(m_inst);
232
233 // When executing an ARM instruction, PC reads as the address of the
234 // current instruction plus 8. And for Thumb, it is plus 4.
235 if (m_arch_type == llvm::Triple::arm)
236 PC = base_addr + 8;
237 else if (m_arch_type == llvm::Triple::thumb)
238 PC = base_addr + 4;
239
240 RegisterReaderArg rra(PC, m_disassembler);
Johnny Chenc5272bf2011-05-12 18:48:11 +0000241
Chris Lattner24943d22010-06-08 16:52:24 +0000242 printTokenized = true;
243
244 // Handle the opcode column.
245
246 StreamString opcode;
247
248 int tokenIndex = 0;
249
250 EDTokenRef token;
251 const char *tokenStr;
252
Johnny Chenff8fea62011-05-18 22:48:41 +0000253 if (EDGetToken(&token, m_inst, tokenIndex)) // 0 on success
254 printTokenized = false;
255 else if (!EDTokenIsOpcode(token))
256 printTokenized = false;
257 else if (EDGetTokenString(&tokenStr, token)) // 0 on success
Chris Lattner24943d22010-06-08 16:52:24 +0000258 printTokenized = false;
259
Johnny Chenff8fea62011-05-18 22:48:41 +0000260 if (printTokenized)
Chris Lattner24943d22010-06-08 16:52:24 +0000261 {
Johnny Chenff8fea62011-05-18 22:48:41 +0000262 // Put the token string into our opcode string
263 opcode.PutCString(tokenStr);
Chris Lattner24943d22010-06-08 16:52:24 +0000264
Johnny Chenff8fea62011-05-18 22:48:41 +0000265 // If anything follows, it probably starts with some whitespace. Skip it.
266 if (++tokenIndex < numTokens)
267 {
268 if (EDGetToken(&token, m_inst, tokenIndex)) // 0 on success
269 printTokenized = false;
270 else if (!EDTokenIsWhitespace(token))
271 printTokenized = false;
272 }
273
274 ++tokenIndex;
Chris Lattner24943d22010-06-08 16:52:24 +0000275 }
276
Chris Lattner24943d22010-06-08 16:52:24 +0000277 // Handle the operands and the comment.
Chris Lattner24943d22010-06-08 16:52:24 +0000278 StreamString operands;
279 StreamString comment;
280
281 if (printTokenized)
282 {
Johnny Chen51ff2482011-05-19 01:05:37 +0000283 bool show_token = false;
Chris Lattner24943d22010-06-08 16:52:24 +0000284
285 for (; tokenIndex < numTokens; ++tokenIndex)
286 {
287 if (EDGetToken(&token, m_inst, tokenIndex))
288 return;
289
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000290 int operandIndex = EDOperandIndexForToken(token);
Chris Lattner24943d22010-06-08 16:52:24 +0000291
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000292 if (operandIndex >= 0)
293 {
294 if (operandIndex != currentOpIndex)
Chris Lattner24943d22010-06-08 16:52:24 +0000295 {
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000296 show_token = true;
297
298 currentOpIndex = operandIndex;
299 EDOperandRef operand;
300
301 if (!EDGetOperand(&operand, m_inst, currentOpIndex))
Chris Lattner24943d22010-06-08 16:52:24 +0000302 {
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000303 if (EDOperandIsMemory(operand))
Chris Lattner24943d22010-06-08 16:52:24 +0000304 {
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000305 uint64_t operand_value;
306
307 if (!EDEvaluateOperand(&operand_value, operand, IPRegisterReader, &rra))
Chris Lattner24943d22010-06-08 16:52:24 +0000308 {
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000309 if (EDInstIsBranch(m_inst))
Chris Lattner24943d22010-06-08 16:52:24 +0000310 {
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000311 operands.Printf("0x%llx ", operand_value);
312 show_token = false;
313 }
314 else
315 {
316 // Put the address value into the comment
317 comment.Printf("0x%llx ", operand_value);
318 }
Chris Lattner24943d22010-06-08 16:52:24 +0000319
Johnny Chend254eb92011-05-23 23:29:23 +0000320 AddSymbolicInfo(exe_ctx, exe_scope, comment, operand_value, GetAddress());
Johnny Chen6d61ebf2011-05-18 22:08:52 +0000321 } // EDEvaluateOperand
322 } // EDOperandIsMemory
323 } // EDGetOperand
324 } // operandIndex != currentOpIndex
325 } // operandIndex >= 0
Chris Lattner24943d22010-06-08 16:52:24 +0000326
327 if (show_token)
328 {
Enrico Granata4c3fb4b2011-07-19 18:03:25 +0000329 if (EDGetTokenString(&tokenStr, token))
Chris Lattner24943d22010-06-08 16:52:24 +0000330 {
331 printTokenized = false;
332 break;
333 }
334
335 operands.PutCString(tokenStr);
336 }
337 } // for (tokenIndex)
338
Johnny Chende5cc8c2011-05-20 17:27:37 +0000339 // FIXME!!!
340 // Workaround for llvm::tB's operands not properly parsed by ARMAsmParser.
341 if (m_arch_type == llvm::Triple::thumb && opcode.GetString() == "b") {
342 const char *inst_str;
Peter Collingbourned77c0392011-05-20 22:42:59 +0000343 const char *pos = NULL;
Johnny Chenc298a982011-05-23 19:41:31 +0000344 operands.Clear(); comment.Clear();
Johnny Chende5cc8c2011-05-20 17:27:37 +0000345 if (EDGetInstString(&inst_str, m_inst) == 0 && (pos = strstr(inst_str, "#")) != NULL) {
346 uint64_t operand_value = PC + atoi(++pos);
Johnny Chend254eb92011-05-23 23:29:23 +0000347 // Put the address value into the operands.
Johnny Chende5cc8c2011-05-20 17:27:37 +0000348 operands.Printf("0x%llx ", operand_value);
Johnny Chend254eb92011-05-23 23:29:23 +0000349 AddSymbolicInfo(exe_ctx, exe_scope, comment, operand_value, GetAddress());
Johnny Chende5cc8c2011-05-20 17:27:37 +0000350 }
351 }
Johnny Chenc298a982011-05-23 19:41:31 +0000352 // Yet more workaround for "bl #..." and "blx #...".
353 if ((m_arch_type == llvm::Triple::arm || m_arch_type == llvm::Triple::thumb) &&
354 (opcode.GetString() == "bl" || opcode.GetString() == "blx")) {
355 const char *inst_str;
356 const char *pos = NULL;
357 operands.Clear(); comment.Clear();
358 if (EDGetInstString(&inst_str, m_inst) == 0 && (pos = strstr(inst_str, "#")) != NULL) {
Johnny Chen66786762011-08-03 04:50:37 +0000359 if (m_arch_type == llvm::Triple::thumb && opcode.GetString() == "blx") {
360 // A8.6.23 BLX (immediate)
361 // Target Address = Align(PC,4) + offset value
362 PC = AlignPC(PC);
363 }
Johnny Chenc298a982011-05-23 19:41:31 +0000364 uint64_t operand_value = PC + atoi(++pos);
Johnny Chend254eb92011-05-23 23:29:23 +0000365 // Put the address value into the comment.
Johnny Chenc298a982011-05-23 19:41:31 +0000366 comment.Printf("0x%llx ", operand_value);
Johnny Chend254eb92011-05-23 23:29:23 +0000367 // And the original token string into the operands.
368 llvm::StringRef Str(pos - 1);
369 RStrip(Str, '\n');
370 operands.PutCString(Str.str().c_str());
371 AddSymbolicInfo(exe_ctx, exe_scope, comment, operand_value, GetAddress());
Johnny Chenc298a982011-05-23 19:41:31 +0000372 }
373 }
Johnny Chende5cc8c2011-05-20 17:27:37 +0000374 // END of workaround.
375
Johnny Chen51ff2482011-05-19 01:05:37 +0000376 // If both operands and comment are empty, we will just print out
377 // the raw disassembly.
378 if (operands.GetString().empty() && comment.GetString().empty())
Chris Lattner24943d22010-06-08 16:52:24 +0000379 {
Johnny Chen51ff2482011-05-19 01:05:37 +0000380 const char *str;
381
382 if (EDGetInstString(&str, m_inst))
383 return;
Johnny Chend17f8012011-05-23 18:00:40 +0000384 Align(s, str, opcodeColumnWidth, operandColumnWidth);
Johnny Chen51ff2482011-05-19 01:05:37 +0000385 }
386 else
387 {
388 PadString(s, opcode.GetString(), opcodeColumnWidth);
389
390 if (comment.GetString().empty())
391 s->PutCString(operands.GetString().c_str());
Chris Lattner24943d22010-06-08 16:52:24 +0000392 else
393 {
Johnny Chen51ff2482011-05-19 01:05:37 +0000394 PadString(s, operands.GetString(), operandColumnWidth);
Chris Lattner24943d22010-06-08 16:52:24 +0000395
Johnny Chen51ff2482011-05-19 01:05:37 +0000396 s->PutCString("; ");
397 s->PutCString(comment.GetString().c_str());
398 } // else (comment.GetString().empty())
399 } // else (operands.GetString().empty() && comment.GetString().empty())
400 } // printTokenized
Chris Lattner24943d22010-06-08 16:52:24 +0000401 } // numTokens != -1
402
403 if (!printTokenized)
404 {
405 const char *str;
406
Johnny Chen51ff2482011-05-19 01:05:37 +0000407 if (EDGetInstString(&str, m_inst)) // 0 on success
Chris Lattner24943d22010-06-08 16:52:24 +0000408 return;
Johnny Chen08251ef2011-05-21 00:44:42 +0000409 if (raw)
410 s->Write(str, strlen(str) - 1);
411 else
412 {
413 // EDis fails to parse the tokens of this inst. Need to align this
Johnny Chen84d42e82011-05-21 00:55:57 +0000414 // raw disassembly's opcode with the rest of output.
Johnny Chend17f8012011-05-23 18:00:40 +0000415 Align(s, str, opcodeColumnWidth, operandColumnWidth);
Johnny Chen08251ef2011-05-21 00:44:42 +0000416 }
Chris Lattner24943d22010-06-08 16:52:24 +0000417 }
418}
419
420bool
Caroline Ticeaf591802011-04-05 23:22:54 +0000421InstructionLLVM::DoesBranch() const
Chris Lattner24943d22010-06-08 16:52:24 +0000422{
423 return EDInstIsBranch(m_inst);
424}
425
426size_t
Caroline Ticeaf591802011-04-05 23:22:54 +0000427InstructionLLVM::Decode (const Disassembler &disassembler,
428 const lldb_private::DataExtractor &data,
429 uint32_t data_offset)
Chris Lattner24943d22010-06-08 16:52:24 +0000430{
431 if (EDCreateInsts(&m_inst, 1, m_disassembler, DataExtractorByteReader, data_offset, (void*)(&data)))
Greg Clayton7bc39082011-03-24 23:53:38 +0000432 {
433 const int byte_size = EDInstByteSize(m_inst);
434 uint32_t offset = data_offset;
435 // Make a copy of the opcode in m_opcode
436 switch (disassembler.GetArchitecture().GetMachine())
437 {
438 case llvm::Triple::x86:
439 case llvm::Triple::x86_64:
440 m_opcode.SetOpcodeBytes (data.PeekData (data_offset, byte_size), byte_size);
441 break;
442
443 case llvm::Triple::arm:
Greg Clayton7bc39082011-03-24 23:53:38 +0000444 case llvm::Triple::thumb:
Greg Clayton149731c2011-03-25 18:03:16 +0000445 switch (byte_size)
446 {
447 case 2:
448 m_opcode.SetOpcode16 (data.GetU16 (&offset));
449 break;
450
451 case 4:
Caroline Tice6b8d3b52011-04-19 23:30:03 +0000452 {
453 if (GetAddressClass() == eAddressClassCodeAlternateISA)
454 {
455 // If it is a 32-bit THUMB instruction, we need to swap the upper & lower halves.
456 uint32_t orig_bytes = data.GetU32 (&offset);
457 uint16_t upper_bits = (orig_bytes >> 16) & ((1u << 16) - 1);
458 uint16_t lower_bits = orig_bytes & ((1u << 16) - 1);
459 uint32_t swapped = (lower_bits << 16) | upper_bits;
460 m_opcode.SetOpcode32 (swapped);
461 }
462 else
463 m_opcode.SetOpcode32 (data.GetU32 (&offset));
464 }
Greg Clayton149731c2011-03-25 18:03:16 +0000465 break;
466
467 default:
468 assert (!"Invalid ARM opcode size");
469 break;
470 }
Greg Clayton7bc39082011-03-24 23:53:38 +0000471 break;
472
473 default:
474 assert (!"This shouldn't happen since we control the architecture we allow DisassemblerLLVM to be created for");
475 break;
476 }
477 return byte_size;
478 }
Chris Lattner24943d22010-06-08 16:52:24 +0000479 else
480 return 0;
481}
482
Chris Lattner24943d22010-06-08 16:52:24 +0000483static inline EDAssemblySyntax_t
Greg Claytoncf015052010-06-11 03:25:34 +0000484SyntaxForArchSpec (const ArchSpec &arch)
Chris Lattner24943d22010-06-08 16:52:24 +0000485{
Greg Clayton940b1032011-02-23 00:35:02 +0000486 switch (arch.GetMachine ())
Greg Clayton5e4f4a22011-02-16 00:00:43 +0000487 {
Greg Clayton940b1032011-02-23 00:35:02 +0000488 case llvm::Triple::x86:
489 case llvm::Triple::x86_64:
Chris Lattner24943d22010-06-08 16:52:24 +0000490 return kEDAssemblySyntaxX86ATT;
Sean Callanand151c8a2011-03-09 01:02:51 +0000491 case llvm::Triple::arm:
Greg Clayton889fbd02011-03-26 19:14:58 +0000492 case llvm::Triple::thumb:
Sean Callanand151c8a2011-03-09 01:02:51 +0000493 return kEDAssemblySyntaxARMUAL;
Greg Clayton5e4f4a22011-02-16 00:00:43 +0000494 default:
495 break;
496 }
Greg Claytoncf015052010-06-11 03:25:34 +0000497 return (EDAssemblySyntax_t)0; // default
Chris Lattner24943d22010-06-08 16:52:24 +0000498}
499
500Disassembler *
501DisassemblerLLVM::CreateInstance(const ArchSpec &arch)
502{
Greg Clayton5e4f4a22011-02-16 00:00:43 +0000503 std::auto_ptr<DisassemblerLLVM> disasm_ap (new DisassemblerLLVM(arch));
504
Caroline Tice080bf612011-04-05 18:46:00 +0000505 if (disasm_ap.get() && disasm_ap->IsValid())
Greg Clayton5e4f4a22011-02-16 00:00:43 +0000506 return disasm_ap.release();
Chris Lattner24943d22010-06-08 16:52:24 +0000507
Greg Claytoncf015052010-06-11 03:25:34 +0000508 return NULL;
Chris Lattner24943d22010-06-08 16:52:24 +0000509}
510
511DisassemblerLLVM::DisassemblerLLVM(const ArchSpec &arch) :
Greg Claytonb01000f2011-01-17 03:46:26 +0000512 Disassembler (arch),
Greg Claytonb1888f22011-03-19 01:12:21 +0000513 m_disassembler (NULL),
514 m_disassembler_thumb (NULL) // For ARM only
Chris Lattner24943d22010-06-08 16:52:24 +0000515{
Greg Clayton5e4f4a22011-02-16 00:00:43 +0000516 const std::string &arch_triple = arch.GetTriple().str();
517 if (!arch_triple.empty())
Greg Claytoncf015052010-06-11 03:25:34 +0000518 {
Greg Clayton5e4f4a22011-02-16 00:00:43 +0000519 if (EDGetDisassembler(&m_disassembler, arch_triple.c_str(), SyntaxForArchSpec (arch)))
520 m_disassembler = NULL;
Greg Claytonb1888f22011-03-19 01:12:21 +0000521 llvm::Triple::ArchType llvm_arch = arch.GetTriple().getArch();
Greg Clayton889fbd02011-03-26 19:14:58 +0000522 // Don't have the lldb::Triple::thumb architecture here. If someone specifies
523 // "thumb" as the architecture, we want a thumb only disassembler. But if any
524 // architecture starting with "arm" if specified, we want to auto detect the
525 // arm/thumb code automatically using the AddressClass from section offset
526 // addresses.
Greg Claytonb1888f22011-03-19 01:12:21 +0000527 if (llvm_arch == llvm::Triple::arm)
528 {
529 if (EDGetDisassembler(&m_disassembler_thumb, "thumb-apple-darwin", kEDAssemblySyntaxARMUAL))
530 m_disassembler_thumb = NULL;
531 }
Greg Claytoncf015052010-06-11 03:25:34 +0000532 }
Chris Lattner24943d22010-06-08 16:52:24 +0000533}
534
535DisassemblerLLVM::~DisassemblerLLVM()
536{
537}
538
539size_t
Greg Clayton70436352010-06-30 23:03:03 +0000540DisassemblerLLVM::DecodeInstructions
Chris Lattner24943d22010-06-08 16:52:24 +0000541(
Greg Clayton5c4c7462010-10-06 03:09:58 +0000542 const Address &base_addr,
Chris Lattner24943d22010-06-08 16:52:24 +0000543 const DataExtractor& data,
544 uint32_t data_offset,
Jim Inghamaa3e3e12011-03-22 01:48:42 +0000545 uint32_t num_instructions,
546 bool append
Chris Lattner24943d22010-06-08 16:52:24 +0000547)
548{
Greg Claytonb01000f2011-01-17 03:46:26 +0000549 if (m_disassembler == NULL)
550 return 0;
551
Chris Lattner24943d22010-06-08 16:52:24 +0000552 size_t total_inst_byte_size = 0;
553
Jim Inghamaa3e3e12011-03-22 01:48:42 +0000554 if (!append)
555 m_instruction_list.Clear();
Chris Lattner24943d22010-06-08 16:52:24 +0000556
557 while (data.ValidOffset(data_offset) && num_instructions)
558 {
Greg Clayton5c4c7462010-10-06 03:09:58 +0000559 Address inst_addr (base_addr);
560 inst_addr.Slide(data_offset);
Greg Claytonb1888f22011-03-19 01:12:21 +0000561
562 bool use_thumb = false;
563 // If we have a thumb disassembler, then we have an ARM architecture
564 // so we need to check what the instruction address class is to make
565 // sure we shouldn't be disassembling as thumb...
Greg Clayton889fbd02011-03-26 19:14:58 +0000566 AddressClass inst_address_class = eAddressClassInvalid;
Greg Claytonb1888f22011-03-19 01:12:21 +0000567 if (m_disassembler_thumb)
568 {
Greg Clayton889fbd02011-03-26 19:14:58 +0000569 inst_address_class = inst_addr.GetAddressClass ();
570 if (inst_address_class == eAddressClassCodeAlternateISA)
Greg Claytonb1888f22011-03-19 01:12:21 +0000571 use_thumb = true;
572 }
Johnny Chen80ab18e2011-05-12 22:25:53 +0000573
Greg Clayton7bc39082011-03-24 23:53:38 +0000574 InstructionSP inst_sp (new InstructionLLVM (inst_addr,
Greg Clayton889fbd02011-03-26 19:14:58 +0000575 inst_address_class,
Greg Claytonabe0fed2011-04-18 08:33:37 +0000576 use_thumb ? m_disassembler_thumb : m_disassembler,
Johnny Chen1608c872011-05-18 18:22:16 +0000577 use_thumb ? llvm::Triple::thumb : m_arch.GetMachine()));
Chris Lattner24943d22010-06-08 16:52:24 +0000578
Greg Clayton889fbd02011-03-26 19:14:58 +0000579 size_t inst_byte_size = inst_sp->Decode (*this, data, data_offset);
Chris Lattner24943d22010-06-08 16:52:24 +0000580
581 if (inst_byte_size == 0)
582 break;
583
Greg Clayton5c4c7462010-10-06 03:09:58 +0000584 m_instruction_list.Append (inst_sp);
Chris Lattner24943d22010-06-08 16:52:24 +0000585
586 total_inst_byte_size += inst_byte_size;
587 data_offset += inst_byte_size;
588 num_instructions--;
589 }
590
591 return total_inst_byte_size;
592}
593
594void
595DisassemblerLLVM::Initialize()
596{
597 PluginManager::RegisterPlugin (GetPluginNameStatic(),
598 GetPluginDescriptionStatic(),
599 CreateInstance);
600}
601
602void
603DisassemblerLLVM::Terminate()
604{
605 PluginManager::UnregisterPlugin (CreateInstance);
606}
607
608
609const char *
610DisassemblerLLVM::GetPluginNameStatic()
611{
Greg Clayton149731c2011-03-25 18:03:16 +0000612 return "llvm";
Chris Lattner24943d22010-06-08 16:52:24 +0000613}
614
615const char *
616DisassemblerLLVM::GetPluginDescriptionStatic()
617{
Greg Clayton149731c2011-03-25 18:03:16 +0000618 return "Disassembler that uses LLVM opcode tables to disassemble i386, x86_64 and ARM.";
Chris Lattner24943d22010-06-08 16:52:24 +0000619}
620
621//------------------------------------------------------------------
622// PluginInterface protocol
623//------------------------------------------------------------------
624const char *
625DisassemblerLLVM::GetPluginName()
626{
627 return "DisassemblerLLVM";
628}
629
630const char *
631DisassemblerLLVM::GetShortPluginName()
632{
633 return GetPluginNameStatic();
634}
635
636uint32_t
637DisassemblerLLVM::GetPluginVersion()
638{
639 return 1;
640}
641