blob: 9b381dd3b96cbf88216b711c268dfbf8e40392c6 [file] [log] [blame]
Sean Callanan95e5c632012-02-17 00:53:45 +00001//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
Eugene Zelenko45a40142015-10-22 21:24:37 +000010// C Includes
11// C++ Includes
12// Project includes
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +000013#include "DisassemblerLLVMC.h"
14
15// Other libraries and framework includes
Sean Callanan95e5c632012-02-17 00:53:45 +000016#include "llvm-c/Disassembler.h"
Benjamin Kramer79dad1d2016-01-26 16:45:00 +000017#include "llvm/ADT/SmallString.h"
Jim Ingham0f063ba2013-03-02 00:26:47 +000018#include "llvm/MC/MCAsmInfo.h"
19#include "llvm/MC/MCContext.h"
Benjamin Kramer79dad1d2016-01-26 16:45:00 +000020#include "llvm/MC/MCDisassembler/MCDisassembler.h"
21#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
22#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
Jim Ingham0f063ba2013-03-02 00:26:47 +000023#include "llvm/MC/MCInst.h"
24#include "llvm/MC/MCInstPrinter.h"
25#include "llvm/MC/MCInstrInfo.h"
26#include "llvm/MC/MCRegisterInfo.h"
27#include "llvm/MC/MCSubtargetInfo.h"
28#include "llvm/Support/ErrorHandling.h"
Kate Stoneb9c1b512016-09-06 20:57:50 +000029#include "llvm/Support/ScopedPrinter.h"
Jim Ingham0f063ba2013-03-02 00:26:47 +000030#include "llvm/Support/TargetRegistry.h"
Sean Callanan95e5c632012-02-17 00:53:45 +000031#include "llvm/Support/TargetSelect.h"
Jim Ingham0f063ba2013-03-02 00:26:47 +000032
Sean Callanan95e5c632012-02-17 00:53:45 +000033#include "lldb/Core/Address.h"
Greg Clayton1f746072012-08-29 21:13:06 +000034#include "lldb/Core/Module.h"
Sean Callanan95e5c632012-02-17 00:53:45 +000035#include "lldb/Symbol/SymbolContext.h"
36#include "lldb/Target/ExecutionContext.h"
37#include "lldb/Target/Process.h"
38#include "lldb/Target/RegisterContext.h"
Greg Claytond5944cd2013-12-06 01:12:00 +000039#include "lldb/Target/SectionLoadList.h"
Jason Molendab57e4a12013-11-04 09:33:30 +000040#include "lldb/Target/StackFrame.h"
Kate Stoneb9c1b512016-09-06 20:57:50 +000041#include "lldb/Target/Target.h"
Zachary Turner666cc0b2017-03-04 01:30:05 +000042#include "lldb/Utility/DataExtractor.h"
Zachary Turner6f9e6902017-03-03 20:56:28 +000043#include "lldb/Utility/Log.h"
Zachary Turnerbf9a7732017-02-02 21:39:50 +000044#include "lldb/Utility/RegularExpression.h"
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +000045#include "lldb/Utility/Stream.h"
Sean Callanan95e5c632012-02-17 00:53:45 +000046
47using namespace lldb;
48using namespace lldb_private;
49
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +000050class DisassemblerLLVMC::MCDisasmInstance {
51public:
52 static std::unique_ptr<MCDisasmInstance>
53 Create(const char *triple, const char *cpu, const char *features_str,
54 unsigned flavor, DisassemblerLLVMC &owner);
55
56 ~MCDisasmInstance() = default;
57
58 uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
59 lldb::addr_t pc, llvm::MCInst &mc_inst) const;
60 void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string,
61 std::string &comments_string);
62 void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
63 bool CanBranch(llvm::MCInst &mc_inst) const;
64 bool HasDelaySlot(llvm::MCInst &mc_inst) const;
65 bool IsCall(llvm::MCInst &mc_inst) const;
66
67private:
68 MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
69 std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
70 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
71 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
72 std::unique_ptr<llvm::MCContext> &&context_up,
73 std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
74 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up);
75
76 std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
77 std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
78 std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
79 std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
80 std::unique_ptr<llvm::MCContext> m_context_up;
81 std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
82 std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
83};
84
Kate Stoneb9c1b512016-09-06 20:57:50 +000085class InstructionLLVMC : public lldb_private::Instruction {
Sean Callanan95e5c632012-02-17 00:53:45 +000086public:
Kate Stoneb9c1b512016-09-06 20:57:50 +000087 InstructionLLVMC(DisassemblerLLVMC &disasm,
88 const lldb_private::Address &address,
89 AddressClass addr_class)
90 : Instruction(address, addr_class),
91 m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
92 disasm.shared_from_this())),
93 m_does_branch(eLazyBoolCalculate), m_has_delay_slot(eLazyBoolCalculate),
94 m_is_call(eLazyBoolCalculate), m_is_valid(false),
95 m_using_file_addr(false) {}
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +000096
Kate Stoneb9c1b512016-09-06 20:57:50 +000097 ~InstructionLLVMC() override = default;
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +000098
Kate Stoneb9c1b512016-09-06 20:57:50 +000099 bool DoesBranch() override {
100 if (m_does_branch == eLazyBoolCalculate) {
101 std::shared_ptr<DisassemblerLLVMC> disasm_sp(GetDisassembler());
102 if (disasm_sp) {
103 disasm_sp->Lock(this, NULL);
Greg Claytonba812f42012-05-10 02:52:23 +0000104 DataExtractor data;
Kate Stoneb9c1b512016-09-06 20:57:50 +0000105 if (m_opcode.GetData(data)) {
106 bool is_alternate_isa;
107 lldb::addr_t pc = m_address.GetFileAddress();
Greg Claytonba812f42012-05-10 02:52:23 +0000108
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000109 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
Kate Stoneb9c1b512016-09-06 20:57:50 +0000110 GetDisasmToUse(is_alternate_isa);
111 const uint8_t *opcode_data = data.GetDataStart();
112 const size_t opcode_data_len = data.GetByteSize();
113 llvm::MCInst inst;
114 const size_t inst_size =
115 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
116 // Be conservative, if we didn't understand the instruction, say it
117 // might branch...
118 if (inst_size == 0)
119 m_does_branch = eLazyBoolYes;
120 else {
121 const bool can_branch = mc_disasm_ptr->CanBranch(inst);
122 if (can_branch)
123 m_does_branch = eLazyBoolYes;
124 else
125 m_does_branch = eLazyBoolNo;
126 }
Greg Claytonba812f42012-05-10 02:52:23 +0000127 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000128 disasm_sp->Unlock();
129 }
Sean Callanan95e5c632012-02-17 00:53:45 +0000130 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000131 return m_does_branch == eLazyBoolYes;
132 }
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +0000133
Kate Stoneb9c1b512016-09-06 20:57:50 +0000134 bool HasDelaySlot() override {
135 if (m_has_delay_slot == eLazyBoolCalculate) {
136 std::shared_ptr<DisassemblerLLVMC> disasm_sp(GetDisassembler());
137 if (disasm_sp) {
138 disasm_sp->Lock(this, NULL);
139 DataExtractor data;
140 if (m_opcode.GetData(data)) {
141 bool is_alternate_isa;
142 lldb::addr_t pc = m_address.GetFileAddress();
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +0000143
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000144 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
Kate Stoneb9c1b512016-09-06 20:57:50 +0000145 GetDisasmToUse(is_alternate_isa);
146 const uint8_t *opcode_data = data.GetDataStart();
147 const size_t opcode_data_len = data.GetByteSize();
148 llvm::MCInst inst;
149 const size_t inst_size =
150 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
151 // if we didn't understand the instruction, say it doesn't have a
152 // delay slot...
153 if (inst_size == 0)
154 m_has_delay_slot = eLazyBoolNo;
155 else {
156 const bool has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
157 if (has_delay_slot)
158 m_has_delay_slot = eLazyBoolYes;
159 else
160 m_has_delay_slot = eLazyBoolNo;
161 }
Sean Callanan4740a732016-09-06 04:48:36 +0000162 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000163 disasm_sp->Unlock();
164 }
165 }
166 return m_has_delay_slot == eLazyBoolYes;
167 }
168
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000169 DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
Kate Stoneb9c1b512016-09-06 20:57:50 +0000170 is_alternate_isa = false;
171 std::shared_ptr<DisassemblerLLVMC> disasm_sp(GetDisassembler());
172 if (disasm_sp) {
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000173 if (disasm_sp->m_alternate_disasm_up) {
Kate Stoneb9c1b512016-09-06 20:57:50 +0000174 const AddressClass address_class = GetAddressClass();
175
Tatyana Krasnukha04803b32018-06-26 13:06:54 +0000176 if (address_class == AddressClass::eCodeAlternateISA) {
Kate Stoneb9c1b512016-09-06 20:57:50 +0000177 is_alternate_isa = true;
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000178 return disasm_sp->m_alternate_disasm_up.get();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000179 }
180 }
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000181 return disasm_sp->m_disasm_up.get();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000182 }
183 return nullptr;
184 }
185
186 size_t Decode(const lldb_private::Disassembler &disassembler,
187 const lldb_private::DataExtractor &data,
188 lldb::offset_t data_offset) override {
189 // All we have to do is read the opcode which can be easy for some
190 // architectures
191 bool got_op = false;
192 std::shared_ptr<DisassemblerLLVMC> disasm_sp(GetDisassembler());
193 if (disasm_sp) {
194 const ArchSpec &arch = disasm_sp->GetArchitecture();
195 const lldb::ByteOrder byte_order = data.GetByteOrder();
196
197 const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
198 const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
199 if (min_op_byte_size == max_op_byte_size) {
200 // Fixed size instructions, just read that amount of data.
201 if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
202 return false;
203
204 switch (min_op_byte_size) {
205 case 1:
206 m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
207 got_op = true;
208 break;
209
210 case 2:
211 m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
212 got_op = true;
213 break;
214
215 case 4:
216 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
217 got_op = true;
218 break;
219
220 case 8:
221 m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
222 got_op = true;
223 break;
224
225 default:
226 m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
227 min_op_byte_size);
228 got_op = true;
229 break;
230 }
231 }
232 if (!got_op) {
233 bool is_alternate_isa = false;
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000234 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
Kate Stoneb9c1b512016-09-06 20:57:50 +0000235 GetDisasmToUse(is_alternate_isa);
236
237 const llvm::Triple::ArchType machine = arch.GetMachine();
238 if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
239 if (machine == llvm::Triple::thumb || is_alternate_isa) {
240 uint32_t thumb_opcode = data.GetU16(&data_offset);
241 if ((thumb_opcode & 0xe000) != 0xe000 ||
242 ((thumb_opcode & 0x1800u) == 0)) {
243 m_opcode.SetOpcode16(thumb_opcode, byte_order);
244 m_is_valid = true;
245 } else {
246 thumb_opcode <<= 16;
247 thumb_opcode |= data.GetU16(&data_offset);
248 m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
249 m_is_valid = true;
250 }
251 } else {
252 m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
253 m_is_valid = true;
254 }
255 } else {
256 // The opcode isn't evenly sized, so we need to actually use the llvm
257 // disassembler to parse it and get the size.
258 uint8_t *opcode_data =
259 const_cast<uint8_t *>(data.PeekData(data_offset, 1));
260 const size_t opcode_data_len = data.BytesLeft(data_offset);
261 const addr_t pc = m_address.GetFileAddress();
262 llvm::MCInst inst;
263
264 disasm_sp->Lock(this, NULL);
265 const size_t inst_size =
266 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
267 disasm_sp->Unlock();
268 if (inst_size == 0)
269 m_opcode.Clear();
270 else {
271 m_opcode.SetOpcodeBytes(opcode_data, inst_size);
272 m_is_valid = true;
273 }
274 }
275 }
276 return m_opcode.GetByteSize();
277 }
278 return 0;
279 }
280
281 void AppendComment(std::string &description) {
282 if (m_comment.empty())
283 m_comment.swap(description);
284 else {
285 m_comment.append(", ");
286 m_comment.append(description);
287 }
288 }
289
290 void CalculateMnemonicOperandsAndComment(
291 const lldb_private::ExecutionContext *exe_ctx) override {
292 DataExtractor data;
293 const AddressClass address_class = GetAddressClass();
294
295 if (m_opcode.GetData(data)) {
296 std::string out_string;
297 std::string comment_string;
298
299 std::shared_ptr<DisassemblerLLVMC> disasm_sp(GetDisassembler());
300 if (disasm_sp) {
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000301 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;
Kate Stoneb9c1b512016-09-06 20:57:50 +0000302
Tatyana Krasnukha04803b32018-06-26 13:06:54 +0000303 if (address_class == AddressClass::eCodeAlternateISA)
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000304 mc_disasm_ptr = disasm_sp->m_alternate_disasm_up.get();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000305 else
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000306 mc_disasm_ptr = disasm_sp->m_disasm_up.get();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000307
308 lldb::addr_t pc = m_address.GetFileAddress();
309 m_using_file_addr = true;
310
311 const bool data_from_file = disasm_sp->m_data_from_file;
312 bool use_hex_immediates = true;
313 Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
314
315 if (exe_ctx) {
316 Target *target = exe_ctx->GetTargetPtr();
317 if (target) {
318 use_hex_immediates = target->GetUseHexImmediates();
319 hex_style = target->GetHexImmediateStyle();
320
321 if (!data_from_file) {
322 const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
323 if (load_addr != LLDB_INVALID_ADDRESS) {
324 pc = load_addr;
325 m_using_file_addr = false;
326 }
327 }
328 }
329 }
330
331 disasm_sp->Lock(this, exe_ctx);
332
333 const uint8_t *opcode_data = data.GetDataStart();
334 const size_t opcode_data_len = data.GetByteSize();
335 llvm::MCInst inst;
336 size_t inst_size =
337 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
338
339 if (inst_size > 0) {
340 mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
341 mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string);
342
343 if (!comment_string.empty()) {
344 AppendComment(comment_string);
345 }
346 }
347
348 disasm_sp->Unlock();
349
350 if (inst_size == 0) {
351 m_comment.assign("unknown opcode");
352 inst_size = m_opcode.GetByteSize();
353 StreamString mnemonic_strm;
354 lldb::offset_t offset = 0;
355 lldb::ByteOrder byte_order = data.GetByteOrder();
356 switch (inst_size) {
357 case 1: {
358 const uint8_t uval8 = data.GetU8(&offset);
359 m_opcode.SetOpcode8(uval8, byte_order);
360 m_opcode_name.assign(".byte");
361 mnemonic_strm.Printf("0x%2.2x", uval8);
362 } break;
363 case 2: {
364 const uint16_t uval16 = data.GetU16(&offset);
365 m_opcode.SetOpcode16(uval16, byte_order);
366 m_opcode_name.assign(".short");
367 mnemonic_strm.Printf("0x%4.4x", uval16);
368 } break;
369 case 4: {
370 const uint32_t uval32 = data.GetU32(&offset);
371 m_opcode.SetOpcode32(uval32, byte_order);
372 m_opcode_name.assign(".long");
373 mnemonic_strm.Printf("0x%8.8x", uval32);
374 } break;
375 case 8: {
376 const uint64_t uval64 = data.GetU64(&offset);
377 m_opcode.SetOpcode64(uval64, byte_order);
378 m_opcode_name.assign(".quad");
379 mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
380 } break;
381 default:
382 if (inst_size == 0)
383 return;
384 else {
385 const uint8_t *bytes = data.PeekData(offset, inst_size);
386 if (bytes == NULL)
387 return;
388 m_opcode_name.assign(".byte");
389 m_opcode.SetOpcodeBytes(bytes, inst_size);
390 mnemonic_strm.Printf("0x%2.2x", bytes[0]);
391 for (uint32_t i = 1; i < inst_size; ++i)
392 mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
393 }
394 break;
395 }
Zachary Turnerc1564272016-11-16 21:15:24 +0000396 m_mnemonics = mnemonic_strm.GetString();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000397 return;
398 } else {
399 if (m_does_branch == eLazyBoolCalculate) {
400 const bool can_branch = mc_disasm_ptr->CanBranch(inst);
401 if (can_branch)
402 m_does_branch = eLazyBoolYes;
403 else
404 m_does_branch = eLazyBoolNo;
405 }
406 }
407
Zachary Turner95eae422016-09-21 16:01:28 +0000408 static RegularExpression s_regex(
409 llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
Kate Stoneb9c1b512016-09-06 20:57:50 +0000410
411 RegularExpression::Match matches(3);
412
Zachary Turner95eae422016-09-21 16:01:28 +0000413 if (s_regex.Execute(out_string, &matches)) {
Kate Stoneb9c1b512016-09-06 20:57:50 +0000414 matches.GetMatchAtIndex(out_string.c_str(), 1, m_opcode_name);
415 matches.GetMatchAtIndex(out_string.c_str(), 2, m_mnemonics);
416 }
417 }
418 }
419 }
420
421 bool IsValid() const { return m_is_valid; }
422
423 bool UsingFileAddress() const { return m_using_file_addr; }
424 size_t GetByteSize() const { return m_opcode.GetByteSize(); }
425
426 std::shared_ptr<DisassemblerLLVMC> GetDisassembler() {
427 return m_disasm_wp.lock();
428 }
429
430 static llvm::StringRef::const_iterator
431 ConsumeWhitespace(llvm::StringRef::const_iterator osi,
432 llvm::StringRef::const_iterator ose) {
433 while (osi != ose) {
434 switch (*osi) {
435 default:
Sean Callanan4740a732016-09-06 04:48:36 +0000436 return osi;
Kate Stoneb9c1b512016-09-06 20:57:50 +0000437 case ' ':
438 case '\t':
439 break;
440 }
441 ++osi;
Sean Callanan4740a732016-09-06 04:48:36 +0000442 }
443
Kate Stoneb9c1b512016-09-06 20:57:50 +0000444 return osi;
445 }
Sean Callanan4740a732016-09-06 04:48:36 +0000446
Kate Stoneb9c1b512016-09-06 20:57:50 +0000447 static std::pair<bool, llvm::StringRef::const_iterator>
448 ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
449 llvm::StringRef::const_iterator ose) {
450 bool found = false;
451
452 osi = ConsumeWhitespace(osi, ose);
453 if (osi != ose && *osi == c) {
454 found = true;
455 ++osi;
Sean Callanan4740a732016-09-06 04:48:36 +0000456 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000457
458 return std::make_pair(found, osi);
459 }
460
461 static std::pair<Operand, llvm::StringRef::const_iterator>
462 ParseRegisterName(llvm::StringRef::const_iterator osi,
463 llvm::StringRef::const_iterator ose) {
464 Operand ret;
465 ret.m_type = Operand::Type::Register;
466 std::string str;
467
468 osi = ConsumeWhitespace(osi, ose);
469
470 while (osi != ose) {
471 if (*osi >= '0' && *osi <= '9') {
472 if (str.empty()) {
473 return std::make_pair(Operand(), osi);
474 } else {
475 str.push_back(*osi);
Sean Callanan4740a732016-09-06 04:48:36 +0000476 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000477 } else if (*osi >= 'a' && *osi <= 'z') {
478 str.push_back(*osi);
479 } else {
480 switch (*osi) {
481 default:
482 if (str.empty()) {
Sean Callanan4740a732016-09-06 04:48:36 +0000483 return std::make_pair(Operand(), osi);
Kate Stoneb9c1b512016-09-06 20:57:50 +0000484 } else {
485 ret.m_register = ConstString(str);
486 return std::make_pair(ret, osi);
487 }
488 case '%':
489 if (!str.empty()) {
Sean Callanan4740a732016-09-06 04:48:36 +0000490 return std::make_pair(Operand(), osi);
Kate Stoneb9c1b512016-09-06 20:57:50 +0000491 }
492 break;
Sean Callanan4740a732016-09-06 04:48:36 +0000493 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000494 }
495 ++osi;
Sean Callanan4740a732016-09-06 04:48:36 +0000496 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000497
498 ret.m_register = ConstString(str);
499 return std::make_pair(ret, osi);
500 }
501
502 static std::pair<Operand, llvm::StringRef::const_iterator>
503 ParseImmediate(llvm::StringRef::const_iterator osi,
504 llvm::StringRef::const_iterator ose) {
505 Operand ret;
506 ret.m_type = Operand::Type::Immediate;
507 std::string str;
508 bool is_hex = false;
509
510 osi = ConsumeWhitespace(osi, ose);
511
512 while (osi != ose) {
513 if (*osi >= '0' && *osi <= '9') {
514 str.push_back(*osi);
515 } else if (*osi >= 'a' && *osi <= 'f') {
516 if (is_hex) {
517 str.push_back(*osi);
518 } else {
519 return std::make_pair(Operand(), osi);
520 }
521 } else {
522 switch (*osi) {
523 default:
524 if (str.empty()) {
Sean Callanan4740a732016-09-06 04:48:36 +0000525 return std::make_pair(Operand(), osi);
Kate Stoneb9c1b512016-09-06 20:57:50 +0000526 } else {
527 ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
528 return std::make_pair(ret, osi);
529 }
530 case 'x':
531 if (!str.compare("0")) {
532 is_hex = true;
533 str.push_back(*osi);
534 } else {
Sean Callanan4740a732016-09-06 04:48:36 +0000535 return std::make_pair(Operand(), osi);
Kate Stoneb9c1b512016-09-06 20:57:50 +0000536 }
537 break;
538 case '#':
539 case '$':
540 if (!str.empty()) {
Sean Callanan4740a732016-09-06 04:48:36 +0000541 return std::make_pair(Operand(), osi);
Kate Stoneb9c1b512016-09-06 20:57:50 +0000542 }
543 break;
544 case '-':
545 if (str.empty()) {
546 ret.m_negative = true;
547 } else {
Sean Callanan4740a732016-09-06 04:48:36 +0000548 return std::make_pair(Operand(), osi);
Kate Stoneb9c1b512016-09-06 20:57:50 +0000549 }
Sean Callanan4740a732016-09-06 04:48:36 +0000550 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000551 }
552 ++osi;
Sean Callanan4740a732016-09-06 04:48:36 +0000553 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000554
555 ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
556 return std::make_pair(ret, osi);
557 }
558
559 // -0x5(%rax,%rax,2)
560 static std::pair<Operand, llvm::StringRef::const_iterator>
561 ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
562 llvm::StringRef::const_iterator ose) {
563 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
564 ParseImmediate(osi, ose);
565 if (offset_and_iterator.first.IsValid()) {
566 osi = offset_and_iterator.second;
Sean Callanan4740a732016-09-06 04:48:36 +0000567 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000568
569 bool found = false;
570 std::tie(found, osi) = ConsumeChar(osi, '(', ose);
571 if (!found) {
572 return std::make_pair(Operand(), osi);
Sean Callanan4740a732016-09-06 04:48:36 +0000573 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000574
575 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
576 ParseRegisterName(osi, ose);
577 if (base_and_iterator.first.IsValid()) {
578 osi = base_and_iterator.second;
579 } else {
580 return std::make_pair(Operand(), osi);
581 }
582
583 std::tie(found, osi) = ConsumeChar(osi, ',', ose);
584 if (!found) {
585 return std::make_pair(Operand(), osi);
586 }
587
588 std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
589 ParseRegisterName(osi, ose);
590 if (index_and_iterator.first.IsValid()) {
591 osi = index_and_iterator.second;
592 } else {
593 return std::make_pair(Operand(), osi);
594 }
595
596 std::tie(found, osi) = ConsumeChar(osi, ',', ose);
597 if (!found) {
598 return std::make_pair(Operand(), osi);
599 }
600
601 std::pair<Operand, llvm::StringRef::const_iterator>
602 multiplier_and_iterator = ParseImmediate(osi, ose);
603 if (index_and_iterator.first.IsValid()) {
604 osi = index_and_iterator.second;
605 } else {
606 return std::make_pair(Operand(), osi);
607 }
608
609 std::tie(found, osi) = ConsumeChar(osi, ')', ose);
610 if (!found) {
611 return std::make_pair(Operand(), osi);
612 }
613
614 Operand product;
615 product.m_type = Operand::Type::Product;
616 product.m_children.push_back(index_and_iterator.first);
617 product.m_children.push_back(multiplier_and_iterator.first);
618
619 Operand index;
620 index.m_type = Operand::Type::Sum;
621 index.m_children.push_back(base_and_iterator.first);
622 index.m_children.push_back(product);
623
624 if (offset_and_iterator.first.IsValid()) {
625 Operand offset;
626 offset.m_type = Operand::Type::Sum;
627 offset.m_children.push_back(offset_and_iterator.first);
628 offset.m_children.push_back(index);
629
630 Operand deref;
631 deref.m_type = Operand::Type::Dereference;
632 deref.m_children.push_back(offset);
633 return std::make_pair(deref, osi);
634 } else {
635 Operand deref;
636 deref.m_type = Operand::Type::Dereference;
637 deref.m_children.push_back(index);
638 return std::make_pair(deref, osi);
639 }
640 }
641
642 // -0x10(%rbp)
643 static std::pair<Operand, llvm::StringRef::const_iterator>
644 ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
645 llvm::StringRef::const_iterator ose) {
646 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
647 ParseImmediate(osi, ose);
648 if (offset_and_iterator.first.IsValid()) {
649 osi = offset_and_iterator.second;
650 }
651
652 bool found = false;
653 std::tie(found, osi) = ConsumeChar(osi, '(', ose);
654 if (!found) {
655 return std::make_pair(Operand(), osi);
656 }
657
658 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
659 ParseRegisterName(osi, ose);
660 if (base_and_iterator.first.IsValid()) {
661 osi = base_and_iterator.second;
662 } else {
663 return std::make_pair(Operand(), osi);
664 }
665
666 std::tie(found, osi) = ConsumeChar(osi, ')', ose);
667 if (!found) {
668 return std::make_pair(Operand(), osi);
669 }
670
671 if (offset_and_iterator.first.IsValid()) {
672 Operand offset;
673 offset.m_type = Operand::Type::Sum;
674 offset.m_children.push_back(offset_and_iterator.first);
675 offset.m_children.push_back(base_and_iterator.first);
676
677 Operand deref;
678 deref.m_type = Operand::Type::Dereference;
679 deref.m_children.push_back(offset);
680 return std::make_pair(deref, osi);
681 } else {
682 Operand deref;
683 deref.m_type = Operand::Type::Dereference;
684 deref.m_children.push_back(base_and_iterator.first);
685 return std::make_pair(deref, osi);
686 }
687 }
688
689 // [sp, #8]!
690 static std::pair<Operand, llvm::StringRef::const_iterator>
691 ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
692 llvm::StringRef::const_iterator ose) {
693 bool found = false;
694 std::tie(found, osi) = ConsumeChar(osi, '[', ose);
695 if (!found) {
696 return std::make_pair(Operand(), osi);
697 }
698
699 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
700 ParseRegisterName(osi, ose);
701 if (base_and_iterator.first.IsValid()) {
702 osi = base_and_iterator.second;
703 } else {
704 return std::make_pair(Operand(), osi);
705 }
706
707 std::tie(found, osi) = ConsumeChar(osi, ',', ose);
708 if (!found) {
709 return std::make_pair(Operand(), osi);
710 }
711
712 std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
713 ParseImmediate(osi, ose);
714 if (offset_and_iterator.first.IsValid()) {
715 osi = offset_and_iterator.second;
716 }
717
718 std::tie(found, osi) = ConsumeChar(osi, ']', ose);
719 if (!found) {
720 return std::make_pair(Operand(), osi);
721 }
722
723 Operand offset;
724 offset.m_type = Operand::Type::Sum;
725 offset.m_children.push_back(offset_and_iterator.first);
726 offset.m_children.push_back(base_and_iterator.first);
727
728 Operand deref;
729 deref.m_type = Operand::Type::Dereference;
730 deref.m_children.push_back(offset);
731 return std::make_pair(deref, osi);
732 }
733
734 // [sp]
735 static std::pair<Operand, llvm::StringRef::const_iterator>
736 ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
737 llvm::StringRef::const_iterator ose) {
738 bool found = false;
739 std::tie(found, osi) = ConsumeChar(osi, '[', ose);
740 if (!found) {
741 return std::make_pair(Operand(), osi);
742 }
743
744 std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
745 ParseRegisterName(osi, ose);
746 if (base_and_iterator.first.IsValid()) {
747 osi = base_and_iterator.second;
748 } else {
749 return std::make_pair(Operand(), osi);
750 }
751
752 std::tie(found, osi) = ConsumeChar(osi, ']', ose);
753 if (!found) {
754 return std::make_pair(Operand(), osi);
755 }
756
757 Operand deref;
758 deref.m_type = Operand::Type::Dereference;
759 deref.m_children.push_back(base_and_iterator.first);
760 return std::make_pair(deref, osi);
761 }
762
763 static void DumpOperand(const Operand &op, Stream &s) {
764 switch (op.m_type) {
765 case Operand::Type::Dereference:
766 s.PutCString("*");
767 DumpOperand(op.m_children[0], s);
768 break;
769 case Operand::Type::Immediate:
770 if (op.m_negative) {
771 s.PutCString("-");
772 }
Malcolm Parsons771ef6d2016-11-02 20:34:10 +0000773 s.PutCString(llvm::to_string(op.m_immediate));
Kate Stoneb9c1b512016-09-06 20:57:50 +0000774 break;
775 case Operand::Type::Invalid:
776 s.PutCString("Invalid");
777 break;
778 case Operand::Type::Product:
779 s.PutCString("(");
780 DumpOperand(op.m_children[0], s);
781 s.PutCString("*");
782 DumpOperand(op.m_children[1], s);
783 s.PutCString(")");
784 break;
785 case Operand::Type::Register:
786 s.PutCString(op.m_register.AsCString());
787 break;
788 case Operand::Type::Sum:
789 s.PutCString("(");
790 DumpOperand(op.m_children[0], s);
791 s.PutCString("+");
792 DumpOperand(op.m_children[1], s);
793 s.PutCString(")");
794 break;
795 }
796 }
797
798 bool ParseOperands(
799 llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
800 const char *operands_string = GetOperands(nullptr);
801
802 if (!operands_string) {
803 return false;
804 }
805
806 llvm::StringRef operands_ref(operands_string);
807
808 llvm::StringRef::const_iterator osi = operands_ref.begin();
809 llvm::StringRef::const_iterator ose = operands_ref.end();
810
811 while (osi != ose) {
812 Operand operand;
813 llvm::StringRef::const_iterator iter;
814
815 if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
816 operand.IsValid()) ||
817 (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
818 operand.IsValid()) ||
819 (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
820 operand.IsValid()) ||
821 (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
822 operand.IsValid()) ||
823 (std::tie(operand, iter) = ParseRegisterName(osi, ose),
824 operand.IsValid()) ||
825 (std::tie(operand, iter) = ParseImmediate(osi, ose),
826 operand.IsValid())) {
827 osi = iter;
828 operands.push_back(operand);
829 } else {
830 return false;
831 }
832
833 std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
834 ConsumeChar(osi, ',', ose);
835 if (found_and_iter.first) {
836 osi = found_and_iter.second;
837 }
838
839 osi = ConsumeWhitespace(osi, ose);
840 }
841
842 DisassemblerSP disasm_sp = m_disasm_wp.lock();
843
844 if (disasm_sp && operands.size() > 1) {
845 // TODO tie this into the MC Disassembler's notion of clobbers.
846 switch (disasm_sp->GetArchitecture().GetMachine()) {
847 default:
848 break;
849 case llvm::Triple::x86:
850 case llvm::Triple::x86_64:
851 operands[operands.size() - 1].m_clobbered = true;
852 break;
853 case llvm::Triple::arm:
854 operands[0].m_clobbered = true;
855 break;
856 }
857 }
858
859 if (Log *log =
860 lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)) {
861 StreamString ss;
862
863 ss.Printf("[%s] expands to %zu operands:\n", operands_string,
864 operands.size());
865 for (const Operand &operand : operands) {
866 ss.PutCString(" ");
867 DumpOperand(operand, ss);
868 ss.PutCString("\n");
869 }
870
Zachary Turnerc1564272016-11-16 21:15:24 +0000871 log->PutString(ss.GetString());
Kate Stoneb9c1b512016-09-06 20:57:50 +0000872 }
873
874 return true;
875 }
876
877 bool IsCall() override {
878 if (m_is_call == eLazyBoolCalculate) {
879 std::shared_ptr<DisassemblerLLVMC> disasm_sp(GetDisassembler());
880 if (disasm_sp) {
881 disasm_sp->Lock(this, NULL);
882 DataExtractor data;
883 if (m_opcode.GetData(data)) {
884 bool is_alternate_isa;
885 lldb::addr_t pc = m_address.GetFileAddress();
886
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000887 DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
Kate Stoneb9c1b512016-09-06 20:57:50 +0000888 GetDisasmToUse(is_alternate_isa);
889 const uint8_t *opcode_data = data.GetDataStart();
890 const size_t opcode_data_len = data.GetByteSize();
891 llvm::MCInst inst;
892 const size_t inst_size =
893 mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
894 if (inst_size == 0) {
895 m_is_call = eLazyBoolNo;
896 } else {
897 if (mc_disasm_ptr->IsCall(inst))
898 m_is_call = eLazyBoolYes;
Sean Callanan4740a732016-09-06 04:48:36 +0000899 else
Kate Stoneb9c1b512016-09-06 20:57:50 +0000900 m_is_call = eLazyBoolNo;
901 }
Sean Callanan4740a732016-09-06 04:48:36 +0000902 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000903 disasm_sp->Unlock();
904 }
Sean Callanan4740a732016-09-06 04:48:36 +0000905 }
Kate Stoneb9c1b512016-09-06 20:57:50 +0000906 return m_is_call == eLazyBoolYes;
907 }
908
Sean Callanan95e5c632012-02-17 00:53:45 +0000909protected:
Kate Stoneb9c1b512016-09-06 20:57:50 +0000910 std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
911 LazyBool m_does_branch;
912 LazyBool m_has_delay_slot;
913 LazyBool m_is_call;
914 bool m_is_valid;
915 bool m_using_file_addr;
Sean Callanan95e5c632012-02-17 00:53:45 +0000916};
917
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000918std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
919DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
920 const char *features_str,
921 unsigned flavor,
922 DisassemblerLLVMC &owner) {
923 using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
924
Zachary Turner97206d52017-05-12 04:51:55 +0000925 std::string Status;
Kate Stoneb9c1b512016-09-06 20:57:50 +0000926 const llvm::Target *curr_target =
Zachary Turner97206d52017-05-12 04:51:55 +0000927 llvm::TargetRegistry::lookupTarget(triple, Status);
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000928 if (!curr_target)
929 return Instance();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000930
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000931 std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
932 curr_target->createMCInstrInfo());
933 if (!instr_info_up)
934 return Instance();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000935
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000936 std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
Kate Stoneb9c1b512016-09-06 20:57:50 +0000937 curr_target->createMCRegInfo(triple));
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000938 if (!reg_info_up)
939 return Instance();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000940
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000941 std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
942 curr_target->createMCSubtargetInfo(triple, cpu, features_str));
943 if (!subtarget_info_up)
944 return Instance();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000945
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000946 std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
947 curr_target->createMCAsmInfo(*reg_info_up, triple));
948 if (!asm_info_up)
949 return Instance();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000950
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000951 std::unique_ptr<llvm::MCContext> context_up(
952 new llvm::MCContext(asm_info_up.get(), reg_info_up.get(), 0));
953 if (!context_up)
954 return Instance();
Kate Stoneb9c1b512016-09-06 20:57:50 +0000955
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000956 std::unique_ptr<llvm::MCDisassembler> disasm_up(
957 curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
958 if (!disasm_up)
959 return Instance();
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +0000960
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000961 std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
962 curr_target->createMCRelocationInfo(triple, *context_up));
963 if (!rel_info_up)
964 return Instance();
965
966 std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
967 curr_target->createMCSymbolizer(
968 triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
969 context_up.get(), std::move(rel_info_up)));
970 disasm_up->setSymbolizer(std::move(symbolizer_up));
971
972 unsigned asm_printer_variant =
973 flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
974
975 std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
976 curr_target->createMCInstPrinter(llvm::Triple{triple},
977 asm_printer_variant, *asm_info_up,
978 *instr_info_up, *reg_info_up));
979 if (!instr_printer_up)
980 return Instance();
981
982 return Instance(
983 new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up),
984 std::move(subtarget_info_up), std::move(asm_info_up),
985 std::move(context_up), std::move(disasm_up),
986 std::move(instr_printer_up)));
Jim Ingham0f063ba2013-03-02 00:26:47 +0000987}
988
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +0000989DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
990 std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
991 std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
992 std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
993 std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
994 std::unique_ptr<llvm::MCContext> &&context_up,
995 std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
996 std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up)
997 : m_instr_info_up(std::move(instr_info_up)),
998 m_reg_info_up(std::move(reg_info_up)),
999 m_subtarget_info_up(std::move(subtarget_info_up)),
1000 m_asm_info_up(std::move(asm_info_up)),
1001 m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1002 m_instr_printer_up(std::move(instr_printer_up)) {
1003 assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
1004 m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
1005}
Eugene Zelenko8dd3fdb2015-10-21 01:42:15 +00001006
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001007uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
Kate Stoneb9c1b512016-09-06 20:57:50 +00001008 const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001009 llvm::MCInst &mc_inst) const {
Kate Stoneb9c1b512016-09-06 20:57:50 +00001010 llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1011 llvm::MCDisassembler::DecodeStatus status;
Jim Ingham0f063ba2013-03-02 00:26:47 +00001012
Kate Stoneb9c1b512016-09-06 20:57:50 +00001013 uint64_t new_inst_size;
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001014 status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
Kate Stoneb9c1b512016-09-06 20:57:50 +00001015 llvm::nulls(), llvm::nulls());
1016 if (status == llvm::MCDisassembler::Success)
1017 return new_inst_size;
1018 else
1019 return 0;
Jim Ingham0f063ba2013-03-02 00:26:47 +00001020}
1021
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001022void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
Kate Stoneb9c1b512016-09-06 20:57:50 +00001023 llvm::MCInst &mc_inst, std::string &inst_string,
1024 std::string &comments_string) {
1025 llvm::raw_string_ostream inst_stream(inst_string);
1026 llvm::raw_string_ostream comments_stream(comments_string);
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001027
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001028 m_instr_printer_up->setCommentStream(comments_stream);
1029 m_instr_printer_up->printInst(&mc_inst, inst_stream, llvm::StringRef(),
1030 *m_subtarget_info_up);
1031 m_instr_printer_up->setCommentStream(llvm::nulls());
Kate Stoneb9c1b512016-09-06 20:57:50 +00001032 comments_stream.flush();
1033
1034 static std::string g_newlines("\r\n");
1035
1036 for (size_t newline_pos = 0;
1037 (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
1038 comments_string.npos;
1039 /**/) {
1040 comments_string.replace(comments_string.begin() + newline_pos,
1041 comments_string.begin() + newline_pos + 1, 1, ' ');
1042 }
Jim Ingham0f063ba2013-03-02 00:26:47 +00001043}
1044
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001045void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
Kate Stoneb9c1b512016-09-06 20:57:50 +00001046 bool use_hex_immed, HexImmediateStyle hex_style) {
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001047 m_instr_printer_up->setPrintImmHex(use_hex_immed);
Kate Stoneb9c1b512016-09-06 20:57:50 +00001048 switch (hex_style) {
1049 case eHexStyleC:
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001050 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
Kate Stoneb9c1b512016-09-06 20:57:50 +00001051 break;
1052 case eHexStyleAsm:
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001053 m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
Kate Stoneb9c1b512016-09-06 20:57:50 +00001054 break;
1055 }
Daniel Malead79ae052013-08-07 21:54:09 +00001056}
1057
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001058bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1059 llvm::MCInst &mc_inst) const {
1060 return m_instr_info_up->get(mc_inst.getOpcode())
1061 .mayAffectControlFlow(mc_inst, *m_reg_info_up);
Jim Ingham0f063ba2013-03-02 00:26:47 +00001062}
1063
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001064bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1065 llvm::MCInst &mc_inst) const {
1066 return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
Bhushan D. Attarde7f3daed2015-08-26 06:04:54 +00001067}
1068
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001069bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1070 return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
Sean Callanan4740a732016-09-06 04:48:36 +00001071}
1072
Kate Stoneb9c1b512016-09-06 20:57:50 +00001073DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
1074 const char *flavor_string)
1075 : Disassembler(arch, flavor_string), m_exe_ctx(NULL), m_inst(NULL),
1076 m_data_from_file(false) {
1077 if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1078 m_flavor.assign("default");
1079 }
1080
1081 unsigned flavor = ~0U;
1082 llvm::Triple triple = arch.GetTriple();
1083
1084 // So far the only supported flavor is "intel" on x86. The base class will
Adrian Prantl05097242018-04-30 16:49:04 +00001085 // set this correctly coming in.
Kate Stoneb9c1b512016-09-06 20:57:50 +00001086 if (triple.getArch() == llvm::Triple::x86 ||
1087 triple.getArch() == llvm::Triple::x86_64) {
1088 if (m_flavor == "intel") {
1089 flavor = 1;
1090 } else if (m_flavor == "att") {
1091 flavor = 0;
Jim Ingham0f063ba2013-03-02 00:26:47 +00001092 }
Kate Stoneb9c1b512016-09-06 20:57:50 +00001093 }
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001094
Kate Stoneb9c1b512016-09-06 20:57:50 +00001095 ArchSpec thumb_arch(arch);
1096 if (triple.getArch() == llvm::Triple::arm) {
1097 std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1098 // Replace "arm" with "thumb" so we get all thumb variants correct
1099 if (thumb_arch_name.size() > 3) {
1100 thumb_arch_name.erase(0, 3);
1101 thumb_arch_name.insert(0, "thumb");
1102 } else {
1103 thumb_arch_name = "thumbv8.2a";
Jim Ingham0f063ba2013-03-02 00:26:47 +00001104 }
Malcolm Parsons771ef6d2016-11-02 20:34:10 +00001105 thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
Kate Stoneb9c1b512016-09-06 20:57:50 +00001106 }
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001107
Kate Stoneb9c1b512016-09-06 20:57:50 +00001108 // If no sub architecture specified then use the most recent arm architecture
Adrian Prantl05097242018-04-30 16:49:04 +00001109 // so the disassembler will return all instruction. Without it we will see a
1110 // lot of unknow opcode in case the code uses instructions which are not
1111 // available in the oldest arm version (used when no sub architecture is
1112 // specified)
Kate Stoneb9c1b512016-09-06 20:57:50 +00001113 if (triple.getArch() == llvm::Triple::arm &&
1114 triple.getSubArch() == llvm::Triple::NoSubArch)
1115 triple.setArchName("armv8.2a");
1116
1117 const char *triple_str = triple.getTriple().c_str();
1118
1119 // ARM Cortex M0-M7 devices only execute thumb instructions
1120 if (arch.IsAlwaysThumbInstructions()) {
1121 triple_str = thumb_arch.GetTriple().getTriple().c_str();
1122 }
1123
1124 const char *cpu = "";
1125
1126 switch (arch.GetCore()) {
1127 case ArchSpec::eCore_mips32:
1128 case ArchSpec::eCore_mips32el:
1129 cpu = "mips32";
1130 break;
1131 case ArchSpec::eCore_mips32r2:
1132 case ArchSpec::eCore_mips32r2el:
1133 cpu = "mips32r2";
1134 break;
1135 case ArchSpec::eCore_mips32r3:
1136 case ArchSpec::eCore_mips32r3el:
1137 cpu = "mips32r3";
1138 break;
1139 case ArchSpec::eCore_mips32r5:
1140 case ArchSpec::eCore_mips32r5el:
1141 cpu = "mips32r5";
1142 break;
1143 case ArchSpec::eCore_mips32r6:
1144 case ArchSpec::eCore_mips32r6el:
1145 cpu = "mips32r6";
1146 break;
1147 case ArchSpec::eCore_mips64:
1148 case ArchSpec::eCore_mips64el:
1149 cpu = "mips64";
1150 break;
1151 case ArchSpec::eCore_mips64r2:
1152 case ArchSpec::eCore_mips64r2el:
1153 cpu = "mips64r2";
1154 break;
1155 case ArchSpec::eCore_mips64r3:
1156 case ArchSpec::eCore_mips64r3el:
1157 cpu = "mips64r3";
1158 break;
1159 case ArchSpec::eCore_mips64r5:
1160 case ArchSpec::eCore_mips64r5el:
1161 cpu = "mips64r5";
1162 break;
1163 case ArchSpec::eCore_mips64r6:
1164 case ArchSpec::eCore_mips64r6el:
1165 cpu = "mips64r6";
1166 break;
1167 default:
1168 cpu = "";
1169 break;
1170 }
1171
1172 std::string features_str = "";
1173 if (triple.getArch() == llvm::Triple::mips ||
1174 triple.getArch() == llvm::Triple::mipsel ||
1175 triple.getArch() == llvm::Triple::mips64 ||
1176 triple.getArch() == llvm::Triple::mips64el) {
1177 uint32_t arch_flags = arch.GetFlags();
1178 if (arch_flags & ArchSpec::eMIPSAse_msa)
1179 features_str += "+msa,";
1180 if (arch_flags & ArchSpec::eMIPSAse_dsp)
1181 features_str += "+dsp,";
1182 if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1183 features_str += "+dspr2,";
1184 }
1185
Adrian Prantl05097242018-04-30 16:49:04 +00001186 // If any AArch64 variant, enable the ARMv8.2 ISA extensions so we can
1187 // disassemble newer instructions.
Jason Molendaa22e9232017-12-22 00:16:04 +00001188 if (triple.getArch() == llvm::Triple::aarch64)
1189 features_str += "+v8.2a";
1190
Adrian Prantl05097242018-04-30 16:49:04 +00001191 // We use m_disasm_ap.get() to tell whether we are valid or not, so if this
1192 // isn't good for some reason, we won't be valid and FindPlugin will fail and
1193 // we won't get used.
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001194 m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1195 flavor, *this);
Kate Stoneb9c1b512016-09-06 20:57:50 +00001196
1197 llvm::Triple::ArchType llvm_arch = triple.getArch();
1198
1199 // For arm CPUs that can execute arm or thumb instructions, also create a
1200 // thumb instruction disassembler.
1201 if (llvm_arch == llvm::Triple::arm) {
1202 std::string thumb_triple(thumb_arch.GetTriple().getTriple());
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001203 m_alternate_disasm_up =
1204 MCDisasmInstance::Create(thumb_triple.c_str(), "", "", flavor, *this);
1205 if (!m_alternate_disasm_up)
1206 m_disasm_up.reset();
1207
Kate Stoneb9c1b512016-09-06 20:57:50 +00001208 } else if (llvm_arch == llvm::Triple::mips ||
1209 llvm_arch == llvm::Triple::mipsel ||
1210 llvm_arch == llvm::Triple::mips64 ||
1211 llvm_arch == llvm::Triple::mips64el) {
1212 /* Create alternate disassembler for MIPS16 and microMIPS */
1213 uint32_t arch_flags = arch.GetFlags();
1214 if (arch_flags & ArchSpec::eMIPSAse_mips16)
1215 features_str += "+mips16,";
1216 else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1217 features_str += "+micromips,";
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001218
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001219 m_alternate_disasm_up = MCDisasmInstance::Create(
1220 triple_str, cpu, features_str.c_str(), flavor, *this);
1221 if (!m_alternate_disasm_up)
1222 m_disasm_up.reset();
Kate Stoneb9c1b512016-09-06 20:57:50 +00001223 }
Sean Callanan95e5c632012-02-17 00:53:45 +00001224}
1225
Eugene Zelenko45a40142015-10-22 21:24:37 +00001226DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1227
Kate Stoneb9c1b512016-09-06 20:57:50 +00001228Disassembler *DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
1229 const char *flavor) {
1230 if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1231 std::unique_ptr<DisassemblerLLVMC> disasm_ap(
1232 new DisassemblerLLVMC(arch, flavor));
Eugene Zelenko45a40142015-10-22 21:24:37 +00001233
Kate Stoneb9c1b512016-09-06 20:57:50 +00001234 if (disasm_ap.get() && disasm_ap->IsValid())
1235 return disasm_ap.release();
1236 }
1237 return NULL;
Eugene Zelenko8dd3fdb2015-10-21 01:42:15 +00001238}
1239
Kate Stoneb9c1b512016-09-06 20:57:50 +00001240size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
1241 const DataExtractor &data,
1242 lldb::offset_t data_offset,
1243 size_t num_instructions,
1244 bool append, bool data_from_file) {
1245 if (!append)
1246 m_instruction_list.Clear();
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001247
Kate Stoneb9c1b512016-09-06 20:57:50 +00001248 if (!IsValid())
Sean Callanan95e5c632012-02-17 00:53:45 +00001249 return 0;
Kate Stoneb9c1b512016-09-06 20:57:50 +00001250
1251 m_data_from_file = data_from_file;
1252 uint32_t data_cursor = data_offset;
1253 const size_t data_byte_size = data.GetByteSize();
1254 uint32_t instructions_parsed = 0;
1255 Address inst_addr(base_addr);
1256
1257 while (data_cursor < data_byte_size &&
1258 instructions_parsed < num_instructions) {
1259
Tatyana Krasnukha04803b32018-06-26 13:06:54 +00001260 AddressClass address_class = AddressClass::eCode;
Kate Stoneb9c1b512016-09-06 20:57:50 +00001261
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001262 if (m_alternate_disasm_up)
Kate Stoneb9c1b512016-09-06 20:57:50 +00001263 address_class = inst_addr.GetAddressClass();
1264
1265 InstructionSP inst_sp(
1266 new InstructionLLVMC(*this, inst_addr, address_class));
1267
1268 if (!inst_sp)
1269 break;
1270
1271 uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1272
1273 if (inst_size == 0)
1274 break;
1275
1276 m_instruction_list.Append(inst_sp);
1277 data_cursor += inst_size;
1278 inst_addr.Slide(inst_size);
1279 instructions_parsed++;
1280 }
1281
1282 return data_cursor - data_offset;
Sean Callanan95e5c632012-02-17 00:53:45 +00001283}
1284
Kate Stoneb9c1b512016-09-06 20:57:50 +00001285void DisassemblerLLVMC::Initialize() {
1286 PluginManager::RegisterPlugin(GetPluginNameStatic(),
1287 "Disassembler that uses LLVM MC to disassemble "
1288 "i386, x86_64, ARM, and ARM64.",
1289 CreateInstance);
Jason Molendac980fa92015-02-13 23:24:21 +00001290
Kate Stoneb9c1b512016-09-06 20:57:50 +00001291 llvm::InitializeAllTargetInfos();
1292 llvm::InitializeAllTargetMCs();
1293 llvm::InitializeAllAsmParsers();
1294 llvm::InitializeAllDisassemblers();
1295}
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001296
Kate Stoneb9c1b512016-09-06 20:57:50 +00001297void DisassemblerLLVMC::Terminate() {
1298 PluginManager::UnregisterPlugin(CreateInstance);
1299}
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001300
Kate Stoneb9c1b512016-09-06 20:57:50 +00001301ConstString DisassemblerLLVMC::GetPluginNameStatic() {
1302 static ConstString g_name("llvm-mc");
1303 return g_name;
1304}
Sylvestre Ledrua3e4ceb2014-04-15 12:08:57 +00001305
Kate Stoneb9c1b512016-09-06 20:57:50 +00001306int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1307 uint64_t offset, uint64_t size,
1308 int tag_type, void *tag_bug) {
1309 return static_cast<DisassemblerLLVMC *>(disassembler)
1310 ->OpInfo(pc, offset, size, tag_type, tag_bug);
1311}
1312
1313const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1314 uint64_t value,
1315 uint64_t *type, uint64_t pc,
1316 const char **name) {
1317 return static_cast<DisassemblerLLVMC *>(disassembler)
1318 ->SymbolLookup(value, type, pc, name);
1319}
1320
1321bool DisassemblerLLVMC::FlavorValidForArchSpec(
1322 const lldb_private::ArchSpec &arch, const char *flavor) {
1323 llvm::Triple triple = arch.GetTriple();
1324 if (flavor == NULL || strcmp(flavor, "default") == 0)
1325 return true;
1326
1327 if (triple.getArch() == llvm::Triple::x86 ||
1328 triple.getArch() == llvm::Triple::x86_64) {
1329 if (strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0)
1330 return true;
1331 else
1332 return false;
1333 } else
1334 return false;
1335}
1336
Tatyana Krasnukha6c2c08f2018-01-11 12:06:22 +00001337bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1338
Kate Stoneb9c1b512016-09-06 20:57:50 +00001339int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1340 int tag_type, void *tag_bug) {
1341 switch (tag_type) {
1342 default:
1343 break;
1344 case 1:
1345 memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1346 break;
1347 }
1348 return 0;
1349}
1350
1351const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1352 uint64_t pc, const char **name) {
1353 if (*type_ptr) {
1354 if (m_exe_ctx && m_inst) {
1355 // std::string remove_this_prior_to_checkin;
1356 Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : NULL;
1357 Address value_so_addr;
1358 Address pc_so_addr;
1359 if (m_inst->UsingFileAddress()) {
1360 ModuleSP module_sp(m_inst->GetAddress().GetModule());
1361 if (module_sp) {
1362 module_sp->ResolveFileAddress(value, value_so_addr);
1363 module_sp->ResolveFileAddress(pc, pc_so_addr);
Sean Callanan95e5c632012-02-17 00:53:45 +00001364 }
Kate Stoneb9c1b512016-09-06 20:57:50 +00001365 } else if (target && !target->GetSectionLoadList().IsEmpty()) {
1366 target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1367 target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1368 }
Greg Claytonba812f42012-05-10 02:52:23 +00001369
Kate Stoneb9c1b512016-09-06 20:57:50 +00001370 SymbolContext sym_ctx;
1371 const uint32_t resolve_scope =
1372 eSymbolContextFunction | eSymbolContextSymbol;
1373 if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1374 pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1375 pc_so_addr, resolve_scope, sym_ctx);
1376 }
1377
1378 if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1379 StreamString ss;
1380
1381 bool format_omitting_current_func_name = false;
1382 if (sym_ctx.symbol || sym_ctx.function) {
1383 AddressRange range;
1384 if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1385 range.GetBaseAddress().IsValid() &&
1386 range.ContainsLoadAddress(value_so_addr, target)) {
1387 format_omitting_current_func_name = true;
1388 }
1389 }
1390
Adrian Prantl05097242018-04-30 16:49:04 +00001391 // If the "value" address (the target address we're symbolicating) is
1392 // inside the same SymbolContext as the current instruction pc
Kate Stoneb9c1b512016-09-06 20:57:50 +00001393 // (pc_so_addr), don't print the full function name - just print it
1394 // with DumpStyleNoFunctionName style, e.g. "<+36>".
1395 if (format_omitting_current_func_name) {
1396 value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1397 Address::DumpStyleSectionNameOffset);
1398 } else {
1399 value_so_addr.Dump(
1400 &ss, target,
1401 Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1402 Address::DumpStyleSectionNameOffset);
1403 }
1404
1405 if (!ss.GetString().empty()) {
1406 // If Address::Dump returned a multi-line description, most commonly
Adrian Prantl05097242018-04-30 16:49:04 +00001407 // seen when we have multiple levels of inlined functions at an
1408 // address, only show the first line.
Zachary Turnerc1564272016-11-16 21:15:24 +00001409 std::string str = ss.GetString();
Kate Stoneb9c1b512016-09-06 20:57:50 +00001410 size_t first_eol_char = str.find_first_of("\r\n");
1411 if (first_eol_char != std::string::npos) {
1412 str.erase(first_eol_char);
1413 }
Zachary Turnerc1564272016-11-16 21:15:24 +00001414 m_inst->AppendComment(str);
Kate Stoneb9c1b512016-09-06 20:57:50 +00001415 }
1416 }
1417 }
1418 }
1419
1420 *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1421 *name = NULL;
1422 return NULL;
Sean Callanan95e5c632012-02-17 00:53:45 +00001423}
1424
1425//------------------------------------------------------------------
1426// PluginInterface protocol
1427//------------------------------------------------------------------
Kate Stoneb9c1b512016-09-06 20:57:50 +00001428ConstString DisassemblerLLVMC::GetPluginName() { return GetPluginNameStatic(); }
Sean Callanan95e5c632012-02-17 00:53:45 +00001429
Kate Stoneb9c1b512016-09-06 20:57:50 +00001430uint32_t DisassemblerLLVMC::GetPluginVersion() { return 1; }