blob: 62e7357b8f34b1d76572342e8e41b380991058ab [file] [log] [blame]
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +00001//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Sean Callanan8ed9f512009-12-19 02:59:52 +00009//
10// This file is part of the X86 Disassembler.
11// It contains code to translate the data produced by the decoder into
12// MCInsts.
13// Documentation for the disassembler can be found in X86Disassembler.h.
14//
15//===----------------------------------------------------------------------===//
16
17#include "X86Disassembler.h"
18#include "X86DisassemblerDecoder.h"
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000019
Sean Callanan9899f702010-04-13 21:21:57 +000020#include "llvm/MC/EDInstInfo.h"
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000021#include "llvm/MC/MCDisassembler.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000022#include "llvm/MC/MCDisassembler.h"
23#include "llvm/MC/MCInst.h"
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000024#include "llvm/Target/TargetRegistry.h"
Sean Callanana144c3f2010-04-02 21:23:51 +000025#include "llvm/Support/Debug.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000026#include "llvm/Support/MemoryObject.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000027#include "llvm/Support/raw_ostream.h"
Sean Callanan0122c902009-12-22 01:11:26 +000028
Douglas Gregor3dac3b72009-12-22 17:25:11 +000029#include "X86GenRegisterNames.inc"
Sean Callanan9899f702010-04-13 21:21:57 +000030#include "X86GenEDInfo.inc"
Sean Callanan0122c902009-12-22 01:11:26 +000031
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000032using namespace llvm;
Sean Callanan8ed9f512009-12-19 02:59:52 +000033using namespace llvm::X86Disassembler;
34
Sean Callanana144c3f2010-04-02 21:23:51 +000035void x86DisassemblerDebug(const char *file,
36 unsigned line,
37 const char *s) {
38 dbgs() << file << ":" << line << ": " << s;
39}
40
41#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
42
Sean Callanan8ed9f512009-12-19 02:59:52 +000043namespace llvm {
44
45// Fill-ins to make the compiler happy. These constants are never actually
46// assigned; they are just filler to make an automatically-generated switch
47// statement work.
48namespace X86 {
49 enum {
50 BX_SI = 500,
51 BX_DI = 501,
52 BP_SI = 502,
53 BP_DI = 503,
54 sib = 504,
55 sib64 = 505
56 };
57}
58
Sean Callanan0122c902009-12-22 01:11:26 +000059extern Target TheX86_32Target, TheX86_64Target;
60
Sean Callanan8ed9f512009-12-19 02:59:52 +000061}
62
Sean Callanana144c3f2010-04-02 21:23:51 +000063static bool translateInstruction(MCInst &target,
64 InternalInstruction &source);
Sean Callanan8ed9f512009-12-19 02:59:52 +000065
66X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
67 MCDisassembler(),
68 fMode(mode) {
69}
70
71X86GenericDisassembler::~X86GenericDisassembler() {
72}
73
Sean Callanan9899f702010-04-13 21:21:57 +000074EDInstInfo *X86GenericDisassembler::getEDInfo() const {
75 return instInfoX86;
76}
77
Sean Callanan8ed9f512009-12-19 02:59:52 +000078/// regionReader - a callback function that wraps the readByte method from
79/// MemoryObject.
80///
81/// @param arg - The generic callback parameter. In this case, this should
82/// be a pointer to a MemoryObject.
83/// @param byte - A pointer to the byte to be read.
84/// @param address - The address to be read.
85static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
86 MemoryObject* region = static_cast<MemoryObject*>(arg);
87 return region->readByte(address, byte);
88}
89
90/// logger - a callback function that wraps the operator<< method from
91/// raw_ostream.
92///
93/// @param arg - The generic callback parameter. This should be a pointe
94/// to a raw_ostream.
95/// @param log - A string to be logged. logger() adds a newline.
96static void logger(void* arg, const char* log) {
97 if (!arg)
98 return;
99
100 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
101 vStream << log << "\n";
102}
103
104//
105// Public interface for the disassembler
106//
107
108bool X86GenericDisassembler::getInstruction(MCInst &instr,
109 uint64_t &size,
110 const MemoryObject &region,
111 uint64_t address,
112 raw_ostream &vStream) const {
113 InternalInstruction internalInstr;
114
115 int ret = decodeInstruction(&internalInstr,
116 regionReader,
117 (void*)&region,
118 logger,
119 (void*)&vStream,
120 address,
121 fMode);
122
Sean Callanana144c3f2010-04-02 21:23:51 +0000123 if (ret) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000124 size = internalInstr.readerCursor - address;
125 return false;
126 }
127 else {
128 size = internalInstr.length;
Sean Callanana144c3f2010-04-02 21:23:51 +0000129 return !translateInstruction(instr, internalInstr);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000130 }
131}
132
133//
134// Private code that translates from struct InternalInstructions to MCInsts.
135//
136
137/// translateRegister - Translates an internal register to the appropriate LLVM
138/// register, and appends it as an operand to an MCInst.
139///
140/// @param mcInst - The MCInst to append to.
141/// @param reg - The Reg to append.
142static void translateRegister(MCInst &mcInst, Reg reg) {
143#define ENTRY(x) X86::x,
144 uint8_t llvmRegnums[] = {
145 ALL_REGS
146 0
147 };
148#undef ENTRY
149
150 uint8_t llvmRegnum = llvmRegnums[reg];
151 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
152}
153
154/// translateImmediate - Appends an immediate operand to an MCInst.
155///
156/// @param mcInst - The MCInst to append to.
157/// @param immediate - The immediate value to append.
158static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
159 mcInst.addOperand(MCOperand::CreateImm(immediate));
160}
161
162/// translateRMRegister - Translates a register stored in the R/M field of the
163/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
164/// @param mcInst - The MCInst to append to.
165/// @param insn - The internal instruction to extract the R/M field
166/// from.
Sean Callanana144c3f2010-04-02 21:23:51 +0000167/// @return - 0 on success; -1 otherwise
168static bool translateRMRegister(MCInst &mcInst,
Sean Callanan8ed9f512009-12-19 02:59:52 +0000169 InternalInstruction &insn) {
Sean Callanana144c3f2010-04-02 21:23:51 +0000170 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
171 debug("A R/M register operand may not have a SIB byte");
172 return true;
173 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000174
175 switch (insn.eaBase) {
Sean Callanana144c3f2010-04-02 21:23:51 +0000176 default:
177 debug("Unexpected EA base register");
178 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000179 case EA_BASE_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +0000180 debug("EA_BASE_NONE for ModR/M base");
181 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000182#define ENTRY(x) case EA_BASE_##x:
183 ALL_EA_BASES
184#undef ENTRY
Sean Callanana144c3f2010-04-02 21:23:51 +0000185 debug("A R/M register operand may not have a base; "
186 "the operand must be a register.");
187 return true;
188#define ENTRY(x) \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000189 case EA_REG_##x: \
190 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
191 ALL_REGS
192#undef ENTRY
Sean Callanan8ed9f512009-12-19 02:59:52 +0000193 }
Sean Callanana144c3f2010-04-02 21:23:51 +0000194
195 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000196}
197
198/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
199/// fields of an internal instruction (and possibly its SIB byte) to a memory
200/// operand in LLVM's format, and appends it to an MCInst.
201///
202/// @param mcInst - The MCInst to append to.
203/// @param insn - The instruction to extract Mod, R/M, and SIB fields
204/// from.
Sean Callanan7fb35a22009-12-22 21:12:55 +0000205/// @param sr - Whether or not to emit the segment register. The
206/// LEA instruction does not expect a segment-register
207/// operand.
Sean Callanana144c3f2010-04-02 21:23:51 +0000208/// @return - 0 on success; nonzero otherwise
209static bool translateRMMemory(MCInst &mcInst,
Sean Callanan7fb35a22009-12-22 21:12:55 +0000210 InternalInstruction &insn,
211 bool sr) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000212 // Addresses in an MCInst are represented as five operands:
213 // 1. basereg (register) The R/M base, or (if there is a SIB) the
214 // SIB base
215 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
216 // scale amount
217 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
218 // the index (which is multiplied by the
219 // scale amount)
220 // 4. displacement (immediate) 0, or the displacement if there is one
221 // 5. segmentreg (register) x86_registerNONE for now, but could be set
222 // if we have segment overrides
223
224 MCOperand baseReg;
225 MCOperand scaleAmount;
226 MCOperand indexReg;
227 MCOperand displacement;
228 MCOperand segmentReg;
229
230 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
231 if (insn.sibBase != SIB_BASE_NONE) {
232 switch (insn.sibBase) {
233 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000234 debug("Unexpected sibBase");
235 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000236#define ENTRY(x) \
Sean Callanan7fb35a22009-12-22 21:12:55 +0000237 case SIB_BASE_##x: \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000238 baseReg = MCOperand::CreateReg(X86::x); break;
239 ALL_SIB_BASES
240#undef ENTRY
241 }
242 } else {
243 baseReg = MCOperand::CreateReg(0);
244 }
245
246 if (insn.sibIndex != SIB_INDEX_NONE) {
247 switch (insn.sibIndex) {
248 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000249 debug("Unexpected sibIndex");
250 return true;
Sean Callanan7fb35a22009-12-22 21:12:55 +0000251#define ENTRY(x) \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000252 case SIB_INDEX_##x: \
253 indexReg = MCOperand::CreateReg(X86::x); break;
254 EA_BASES_32BIT
255 EA_BASES_64BIT
256#undef ENTRY
257 }
258 } else {
259 indexReg = MCOperand::CreateReg(0);
260 }
261
262 scaleAmount = MCOperand::CreateImm(insn.sibScale);
263 } else {
264 switch (insn.eaBase) {
265 case EA_BASE_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +0000266 if (insn.eaDisplacement == EA_DISP_NONE) {
267 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
268 return true;
269 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000270 if (insn.mode == MODE_64BIT)
271 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
272 else
273 baseReg = MCOperand::CreateReg(0);
274
275 indexReg = MCOperand::CreateReg(0);
276 break;
277 case EA_BASE_BX_SI:
278 baseReg = MCOperand::CreateReg(X86::BX);
279 indexReg = MCOperand::CreateReg(X86::SI);
280 break;
281 case EA_BASE_BX_DI:
282 baseReg = MCOperand::CreateReg(X86::BX);
283 indexReg = MCOperand::CreateReg(X86::DI);
284 break;
285 case EA_BASE_BP_SI:
286 baseReg = MCOperand::CreateReg(X86::BP);
287 indexReg = MCOperand::CreateReg(X86::SI);
288 break;
289 case EA_BASE_BP_DI:
290 baseReg = MCOperand::CreateReg(X86::BP);
291 indexReg = MCOperand::CreateReg(X86::DI);
292 break;
293 default:
294 indexReg = MCOperand::CreateReg(0);
295 switch (insn.eaBase) {
296 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000297 debug("Unexpected eaBase");
298 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000299 // Here, we will use the fill-ins defined above. However,
300 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
301 // sib and sib64 were handled in the top-level if, so they're only
302 // placeholders to keep the compiler happy.
303#define ENTRY(x) \
304 case EA_BASE_##x: \
305 baseReg = MCOperand::CreateReg(X86::x); break;
306 ALL_EA_BASES
307#undef ENTRY
308#define ENTRY(x) case EA_REG_##x:
309 ALL_REGS
310#undef ENTRY
Sean Callanana144c3f2010-04-02 21:23:51 +0000311 debug("A R/M memory operand may not be a register; "
312 "the base field must be a base.");
313 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000314 }
315 }
Sean Callanan7fb35a22009-12-22 21:12:55 +0000316
317 scaleAmount = MCOperand::CreateImm(1);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000318 }
319
320 displacement = MCOperand::CreateImm(insn.displacement);
321
322 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
323 0, // SEG_OVERRIDE_NONE
324 X86::CS,
325 X86::SS,
326 X86::DS,
327 X86::ES,
328 X86::FS,
329 X86::GS
330 };
331
332 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
333
334 mcInst.addOperand(baseReg);
335 mcInst.addOperand(scaleAmount);
336 mcInst.addOperand(indexReg);
337 mcInst.addOperand(displacement);
Sean Callanan7fb35a22009-12-22 21:12:55 +0000338
339 if (sr)
340 mcInst.addOperand(segmentReg);
Sean Callanana144c3f2010-04-02 21:23:51 +0000341
342 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000343}
344
345/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
346/// byte of an instruction to LLVM form, and appends it to an MCInst.
347///
348/// @param mcInst - The MCInst to append to.
349/// @param operand - The operand, as stored in the descriptor table.
350/// @param insn - The instruction to extract Mod, R/M, and SIB fields
351/// from.
Sean Callanana144c3f2010-04-02 21:23:51 +0000352/// @return - 0 on success; nonzero otherwise
353static bool translateRM(MCInst &mcInst,
354 OperandSpecifier &operand,
355 InternalInstruction &insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000356 switch (operand.type) {
357 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000358 debug("Unexpected type for a R/M operand");
359 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000360 case TYPE_R8:
361 case TYPE_R16:
362 case TYPE_R32:
363 case TYPE_R64:
364 case TYPE_Rv:
365 case TYPE_MM:
366 case TYPE_MM32:
367 case TYPE_MM64:
368 case TYPE_XMM:
369 case TYPE_XMM32:
370 case TYPE_XMM64:
371 case TYPE_XMM128:
372 case TYPE_DEBUGREG:
373 case TYPE_CR32:
374 case TYPE_CR64:
Sean Callanana144c3f2010-04-02 21:23:51 +0000375 return translateRMRegister(mcInst, insn);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000376 case TYPE_M:
377 case TYPE_M8:
378 case TYPE_M16:
379 case TYPE_M32:
380 case TYPE_M64:
381 case TYPE_M128:
382 case TYPE_M512:
383 case TYPE_Mv:
384 case TYPE_M32FP:
385 case TYPE_M64FP:
386 case TYPE_M80FP:
387 case TYPE_M16INT:
388 case TYPE_M32INT:
389 case TYPE_M64INT:
390 case TYPE_M1616:
391 case TYPE_M1632:
392 case TYPE_M1664:
Sean Callanana144c3f2010-04-02 21:23:51 +0000393 return translateRMMemory(mcInst, insn, true);
Sean Callanan7fb35a22009-12-22 21:12:55 +0000394 case TYPE_LEA:
Sean Callanana144c3f2010-04-02 21:23:51 +0000395 return translateRMMemory(mcInst, insn, false);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000396 }
397}
398
399/// translateFPRegister - Translates a stack position on the FPU stack to its
400/// LLVM form, and appends it to an MCInst.
401///
402/// @param mcInst - The MCInst to append to.
403/// @param stackPos - The stack position to translate.
Sean Callanana144c3f2010-04-02 21:23:51 +0000404/// @return - 0 on success; nonzero otherwise.
405static bool translateFPRegister(MCInst &mcInst,
406 uint8_t stackPos) {
407 if (stackPos >= 8) {
408 debug("Invalid FP stack position");
409 return true;
410 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000411
412 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
Sean Callanana144c3f2010-04-02 21:23:51 +0000413
414 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000415}
416
417/// translateOperand - Translates an operand stored in an internal instruction
418/// to LLVM's format and appends it to an MCInst.
419///
420/// @param mcInst - The MCInst to append to.
421/// @param operand - The operand, as stored in the descriptor table.
422/// @param insn - The internal instruction.
Sean Callanana144c3f2010-04-02 21:23:51 +0000423/// @return - false on success; true otherwise.
424static bool translateOperand(MCInst &mcInst,
425 OperandSpecifier &operand,
426 InternalInstruction &insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000427 switch (operand.encoding) {
428 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000429 debug("Unhandled operand encoding during translation");
430 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000431 case ENCODING_REG:
432 translateRegister(mcInst, insn.reg);
Sean Callanana144c3f2010-04-02 21:23:51 +0000433 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000434 case ENCODING_RM:
Sean Callanana144c3f2010-04-02 21:23:51 +0000435 return translateRM(mcInst, operand, insn);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000436 case ENCODING_CB:
437 case ENCODING_CW:
438 case ENCODING_CD:
439 case ENCODING_CP:
440 case ENCODING_CO:
441 case ENCODING_CT:
Sean Callanana144c3f2010-04-02 21:23:51 +0000442 debug("Translation of code offsets isn't supported.");
443 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000444 case ENCODING_IB:
445 case ENCODING_IW:
446 case ENCODING_ID:
447 case ENCODING_IO:
448 case ENCODING_Iv:
449 case ENCODING_Ia:
450 translateImmediate(mcInst,
451 insn.immediates[insn.numImmediatesTranslated++]);
Sean Callanana144c3f2010-04-02 21:23:51 +0000452 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000453 case ENCODING_RB:
454 case ENCODING_RW:
455 case ENCODING_RD:
456 case ENCODING_RO:
457 translateRegister(mcInst, insn.opcodeRegister);
Sean Callanana144c3f2010-04-02 21:23:51 +0000458 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000459 case ENCODING_I:
Sean Callanana144c3f2010-04-02 21:23:51 +0000460 return translateFPRegister(mcInst, insn.opcodeModifier);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000461 case ENCODING_Rv:
462 translateRegister(mcInst, insn.opcodeRegister);
Sean Callanana144c3f2010-04-02 21:23:51 +0000463 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000464 case ENCODING_DUP:
Sean Callanana144c3f2010-04-02 21:23:51 +0000465 return translateOperand(mcInst,
466 insn.spec->operands[operand.type - TYPE_DUP0],
467 insn);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000468 }
469}
470
471/// translateInstruction - Translates an internal instruction and all its
472/// operands to an MCInst.
473///
474/// @param mcInst - The MCInst to populate with the instruction's data.
475/// @param insn - The internal instruction.
Sean Callanana144c3f2010-04-02 21:23:51 +0000476/// @return - false on success; true otherwise.
477static bool translateInstruction(MCInst &mcInst,
478 InternalInstruction &insn) {
479 if (!insn.spec) {
480 debug("Instruction has no specification");
481 return true;
482 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000483
484 mcInst.setOpcode(insn.instructionID);
485
486 int index;
487
488 insn.numImmediatesTranslated = 0;
489
490 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
Sean Callanana144c3f2010-04-02 21:23:51 +0000491 if (insn.spec->operands[index].encoding != ENCODING_NONE) {
492 if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
493 return true;
494 }
495 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000496 }
Sean Callanana144c3f2010-04-02 21:23:51 +0000497
498 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000499}
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +0000500
Daniel Dunbar5d067fe2010-03-20 22:36:22 +0000501static MCDisassembler *createX86_32Disassembler(const Target &T) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000502 return new X86Disassembler::X86_32Disassembler;
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +0000503}
504
Daniel Dunbar5d067fe2010-03-20 22:36:22 +0000505static MCDisassembler *createX86_64Disassembler(const Target &T) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000506 return new X86Disassembler::X86_64Disassembler;
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +0000507}
508
509extern "C" void LLVMInitializeX86Disassembler() {
510 // Register the disassembler.
511 TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
512 createX86_32Disassembler);
513 TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
514 createX86_64Disassembler);
515}