blob: 7328dc0ba8d760cde8152505787d2b97fdbaa3c0 [file] [log] [blame]
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +00001//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
Sean Callanan8ed9f512009-12-19 02:59:52 +00009//
10// This file is part of the X86 Disassembler.
11// It contains code to translate the data produced by the decoder into
12// MCInsts.
13// Documentation for the disassembler can be found in X86Disassembler.h.
14//
15//===----------------------------------------------------------------------===//
16
17#include "X86Disassembler.h"
18#include "X86DisassemblerDecoder.h"
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000019
20#include "llvm/MC/MCDisassembler.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000021#include "llvm/MC/MCDisassembler.h"
22#include "llvm/MC/MCInst.h"
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000023#include "llvm/Target/TargetRegistry.h"
Sean Callanana144c3f2010-04-02 21:23:51 +000024#include "llvm/Support/Debug.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000025#include "llvm/Support/MemoryObject.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000026#include "llvm/Support/raw_ostream.h"
Sean Callanan0122c902009-12-22 01:11:26 +000027
Douglas Gregor3dac3b72009-12-22 17:25:11 +000028#include "X86GenRegisterNames.inc"
Sean Callanan0122c902009-12-22 01:11:26 +000029
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +000030using namespace llvm;
Sean Callanan8ed9f512009-12-19 02:59:52 +000031using namespace llvm::X86Disassembler;
32
Sean Callanana144c3f2010-04-02 21:23:51 +000033void x86DisassemblerDebug(const char *file,
34 unsigned line,
35 const char *s) {
36 dbgs() << file << ":" << line << ": " << s;
37}
38
39#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
40
Sean Callanan8ed9f512009-12-19 02:59:52 +000041namespace llvm {
42
43// Fill-ins to make the compiler happy. These constants are never actually
44// assigned; they are just filler to make an automatically-generated switch
45// statement work.
46namespace X86 {
47 enum {
48 BX_SI = 500,
49 BX_DI = 501,
50 BP_SI = 502,
51 BP_DI = 503,
52 sib = 504,
53 sib64 = 505
54 };
55}
56
Sean Callanan0122c902009-12-22 01:11:26 +000057extern Target TheX86_32Target, TheX86_64Target;
58
Sean Callanan8ed9f512009-12-19 02:59:52 +000059}
60
Sean Callanana144c3f2010-04-02 21:23:51 +000061static bool translateInstruction(MCInst &target,
62 InternalInstruction &source);
Sean Callanan8ed9f512009-12-19 02:59:52 +000063
64X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
65 MCDisassembler(),
66 fMode(mode) {
67}
68
69X86GenericDisassembler::~X86GenericDisassembler() {
70}
71
72/// regionReader - a callback function that wraps the readByte method from
73/// MemoryObject.
74///
75/// @param arg - The generic callback parameter. In this case, this should
76/// be a pointer to a MemoryObject.
77/// @param byte - A pointer to the byte to be read.
78/// @param address - The address to be read.
79static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
80 MemoryObject* region = static_cast<MemoryObject*>(arg);
81 return region->readByte(address, byte);
82}
83
84/// logger - a callback function that wraps the operator<< method from
85/// raw_ostream.
86///
87/// @param arg - The generic callback parameter. This should be a pointe
88/// to a raw_ostream.
89/// @param log - A string to be logged. logger() adds a newline.
90static void logger(void* arg, const char* log) {
91 if (!arg)
92 return;
93
94 raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
95 vStream << log << "\n";
96}
97
98//
99// Public interface for the disassembler
100//
101
102bool X86GenericDisassembler::getInstruction(MCInst &instr,
103 uint64_t &size,
104 const MemoryObject &region,
105 uint64_t address,
106 raw_ostream &vStream) const {
107 InternalInstruction internalInstr;
108
109 int ret = decodeInstruction(&internalInstr,
110 regionReader,
111 (void*)&region,
112 logger,
113 (void*)&vStream,
114 address,
115 fMode);
116
Sean Callanana144c3f2010-04-02 21:23:51 +0000117 if (ret) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000118 size = internalInstr.readerCursor - address;
119 return false;
120 }
121 else {
122 size = internalInstr.length;
Sean Callanana144c3f2010-04-02 21:23:51 +0000123 return !translateInstruction(instr, internalInstr);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000124 }
125}
126
127//
128// Private code that translates from struct InternalInstructions to MCInsts.
129//
130
131/// translateRegister - Translates an internal register to the appropriate LLVM
132/// register, and appends it as an operand to an MCInst.
133///
134/// @param mcInst - The MCInst to append to.
135/// @param reg - The Reg to append.
136static void translateRegister(MCInst &mcInst, Reg reg) {
137#define ENTRY(x) X86::x,
138 uint8_t llvmRegnums[] = {
139 ALL_REGS
140 0
141 };
142#undef ENTRY
143
144 uint8_t llvmRegnum = llvmRegnums[reg];
145 mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
146}
147
148/// translateImmediate - Appends an immediate operand to an MCInst.
149///
150/// @param mcInst - The MCInst to append to.
151/// @param immediate - The immediate value to append.
152static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
153 mcInst.addOperand(MCOperand::CreateImm(immediate));
154}
155
156/// translateRMRegister - Translates a register stored in the R/M field of the
157/// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
158/// @param mcInst - The MCInst to append to.
159/// @param insn - The internal instruction to extract the R/M field
160/// from.
Sean Callanana144c3f2010-04-02 21:23:51 +0000161/// @return - 0 on success; -1 otherwise
162static bool translateRMRegister(MCInst &mcInst,
Sean Callanan8ed9f512009-12-19 02:59:52 +0000163 InternalInstruction &insn) {
Sean Callanana144c3f2010-04-02 21:23:51 +0000164 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
165 debug("A R/M register operand may not have a SIB byte");
166 return true;
167 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000168
169 switch (insn.eaBase) {
Sean Callanana144c3f2010-04-02 21:23:51 +0000170 default:
171 debug("Unexpected EA base register");
172 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000173 case EA_BASE_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +0000174 debug("EA_BASE_NONE for ModR/M base");
175 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000176#define ENTRY(x) case EA_BASE_##x:
177 ALL_EA_BASES
178#undef ENTRY
Sean Callanana144c3f2010-04-02 21:23:51 +0000179 debug("A R/M register operand may not have a base; "
180 "the operand must be a register.");
181 return true;
182#define ENTRY(x) \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000183 case EA_REG_##x: \
184 mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
185 ALL_REGS
186#undef ENTRY
Sean Callanan8ed9f512009-12-19 02:59:52 +0000187 }
Sean Callanana144c3f2010-04-02 21:23:51 +0000188
189 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000190}
191
192/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
193/// fields of an internal instruction (and possibly its SIB byte) to a memory
194/// operand in LLVM's format, and appends it to an MCInst.
195///
196/// @param mcInst - The MCInst to append to.
197/// @param insn - The instruction to extract Mod, R/M, and SIB fields
198/// from.
Sean Callanan7fb35a22009-12-22 21:12:55 +0000199/// @param sr - Whether or not to emit the segment register. The
200/// LEA instruction does not expect a segment-register
201/// operand.
Sean Callanana144c3f2010-04-02 21:23:51 +0000202/// @return - 0 on success; nonzero otherwise
203static bool translateRMMemory(MCInst &mcInst,
Sean Callanan7fb35a22009-12-22 21:12:55 +0000204 InternalInstruction &insn,
205 bool sr) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000206 // Addresses in an MCInst are represented as five operands:
207 // 1. basereg (register) The R/M base, or (if there is a SIB) the
208 // SIB base
209 // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
210 // scale amount
211 // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
212 // the index (which is multiplied by the
213 // scale amount)
214 // 4. displacement (immediate) 0, or the displacement if there is one
215 // 5. segmentreg (register) x86_registerNONE for now, but could be set
216 // if we have segment overrides
217
218 MCOperand baseReg;
219 MCOperand scaleAmount;
220 MCOperand indexReg;
221 MCOperand displacement;
222 MCOperand segmentReg;
223
224 if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
225 if (insn.sibBase != SIB_BASE_NONE) {
226 switch (insn.sibBase) {
227 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000228 debug("Unexpected sibBase");
229 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000230#define ENTRY(x) \
Sean Callanan7fb35a22009-12-22 21:12:55 +0000231 case SIB_BASE_##x: \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000232 baseReg = MCOperand::CreateReg(X86::x); break;
233 ALL_SIB_BASES
234#undef ENTRY
235 }
236 } else {
237 baseReg = MCOperand::CreateReg(0);
238 }
239
240 if (insn.sibIndex != SIB_INDEX_NONE) {
241 switch (insn.sibIndex) {
242 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000243 debug("Unexpected sibIndex");
244 return true;
Sean Callanan7fb35a22009-12-22 21:12:55 +0000245#define ENTRY(x) \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000246 case SIB_INDEX_##x: \
247 indexReg = MCOperand::CreateReg(X86::x); break;
248 EA_BASES_32BIT
249 EA_BASES_64BIT
250#undef ENTRY
251 }
252 } else {
253 indexReg = MCOperand::CreateReg(0);
254 }
255
256 scaleAmount = MCOperand::CreateImm(insn.sibScale);
257 } else {
258 switch (insn.eaBase) {
259 case EA_BASE_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +0000260 if (insn.eaDisplacement == EA_DISP_NONE) {
261 debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
262 return true;
263 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000264 if (insn.mode == MODE_64BIT)
265 baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
266 else
267 baseReg = MCOperand::CreateReg(0);
268
269 indexReg = MCOperand::CreateReg(0);
270 break;
271 case EA_BASE_BX_SI:
272 baseReg = MCOperand::CreateReg(X86::BX);
273 indexReg = MCOperand::CreateReg(X86::SI);
274 break;
275 case EA_BASE_BX_DI:
276 baseReg = MCOperand::CreateReg(X86::BX);
277 indexReg = MCOperand::CreateReg(X86::DI);
278 break;
279 case EA_BASE_BP_SI:
280 baseReg = MCOperand::CreateReg(X86::BP);
281 indexReg = MCOperand::CreateReg(X86::SI);
282 break;
283 case EA_BASE_BP_DI:
284 baseReg = MCOperand::CreateReg(X86::BP);
285 indexReg = MCOperand::CreateReg(X86::DI);
286 break;
287 default:
288 indexReg = MCOperand::CreateReg(0);
289 switch (insn.eaBase) {
290 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000291 debug("Unexpected eaBase");
292 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000293 // Here, we will use the fill-ins defined above. However,
294 // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
295 // sib and sib64 were handled in the top-level if, so they're only
296 // placeholders to keep the compiler happy.
297#define ENTRY(x) \
298 case EA_BASE_##x: \
299 baseReg = MCOperand::CreateReg(X86::x); break;
300 ALL_EA_BASES
301#undef ENTRY
302#define ENTRY(x) case EA_REG_##x:
303 ALL_REGS
304#undef ENTRY
Sean Callanana144c3f2010-04-02 21:23:51 +0000305 debug("A R/M memory operand may not be a register; "
306 "the base field must be a base.");
307 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000308 }
309 }
Sean Callanan7fb35a22009-12-22 21:12:55 +0000310
311 scaleAmount = MCOperand::CreateImm(1);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000312 }
313
314 displacement = MCOperand::CreateImm(insn.displacement);
315
316 static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
317 0, // SEG_OVERRIDE_NONE
318 X86::CS,
319 X86::SS,
320 X86::DS,
321 X86::ES,
322 X86::FS,
323 X86::GS
324 };
325
326 segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
327
328 mcInst.addOperand(baseReg);
329 mcInst.addOperand(scaleAmount);
330 mcInst.addOperand(indexReg);
331 mcInst.addOperand(displacement);
Sean Callanan7fb35a22009-12-22 21:12:55 +0000332
333 if (sr)
334 mcInst.addOperand(segmentReg);
Sean Callanana144c3f2010-04-02 21:23:51 +0000335
336 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000337}
338
339/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
340/// byte of an instruction to LLVM form, and appends it to an MCInst.
341///
342/// @param mcInst - The MCInst to append to.
343/// @param operand - The operand, as stored in the descriptor table.
344/// @param insn - The instruction to extract Mod, R/M, and SIB fields
345/// from.
Sean Callanana144c3f2010-04-02 21:23:51 +0000346/// @return - 0 on success; nonzero otherwise
347static bool translateRM(MCInst &mcInst,
348 OperandSpecifier &operand,
349 InternalInstruction &insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000350 switch (operand.type) {
351 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000352 debug("Unexpected type for a R/M operand");
353 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000354 case TYPE_R8:
355 case TYPE_R16:
356 case TYPE_R32:
357 case TYPE_R64:
358 case TYPE_Rv:
359 case TYPE_MM:
360 case TYPE_MM32:
361 case TYPE_MM64:
362 case TYPE_XMM:
363 case TYPE_XMM32:
364 case TYPE_XMM64:
365 case TYPE_XMM128:
366 case TYPE_DEBUGREG:
367 case TYPE_CR32:
368 case TYPE_CR64:
Sean Callanana144c3f2010-04-02 21:23:51 +0000369 return translateRMRegister(mcInst, insn);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000370 case TYPE_M:
371 case TYPE_M8:
372 case TYPE_M16:
373 case TYPE_M32:
374 case TYPE_M64:
375 case TYPE_M128:
376 case TYPE_M512:
377 case TYPE_Mv:
378 case TYPE_M32FP:
379 case TYPE_M64FP:
380 case TYPE_M80FP:
381 case TYPE_M16INT:
382 case TYPE_M32INT:
383 case TYPE_M64INT:
384 case TYPE_M1616:
385 case TYPE_M1632:
386 case TYPE_M1664:
Sean Callanana144c3f2010-04-02 21:23:51 +0000387 return translateRMMemory(mcInst, insn, true);
Sean Callanan7fb35a22009-12-22 21:12:55 +0000388 case TYPE_LEA:
Sean Callanana144c3f2010-04-02 21:23:51 +0000389 return translateRMMemory(mcInst, insn, false);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000390 }
391}
392
393/// translateFPRegister - Translates a stack position on the FPU stack to its
394/// LLVM form, and appends it to an MCInst.
395///
396/// @param mcInst - The MCInst to append to.
397/// @param stackPos - The stack position to translate.
Sean Callanana144c3f2010-04-02 21:23:51 +0000398/// @return - 0 on success; nonzero otherwise.
399static bool translateFPRegister(MCInst &mcInst,
400 uint8_t stackPos) {
401 if (stackPos >= 8) {
402 debug("Invalid FP stack position");
403 return true;
404 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000405
406 mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
Sean Callanana144c3f2010-04-02 21:23:51 +0000407
408 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000409}
410
411/// translateOperand - Translates an operand stored in an internal instruction
412/// to LLVM's format and appends it to an MCInst.
413///
414/// @param mcInst - The MCInst to append to.
415/// @param operand - The operand, as stored in the descriptor table.
416/// @param insn - The internal instruction.
Sean Callanana144c3f2010-04-02 21:23:51 +0000417/// @return - false on success; true otherwise.
418static bool translateOperand(MCInst &mcInst,
419 OperandSpecifier &operand,
420 InternalInstruction &insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000421 switch (operand.encoding) {
422 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000423 debug("Unhandled operand encoding during translation");
424 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000425 case ENCODING_REG:
426 translateRegister(mcInst, insn.reg);
Sean Callanana144c3f2010-04-02 21:23:51 +0000427 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000428 case ENCODING_RM:
Sean Callanana144c3f2010-04-02 21:23:51 +0000429 return translateRM(mcInst, operand, insn);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000430 case ENCODING_CB:
431 case ENCODING_CW:
432 case ENCODING_CD:
433 case ENCODING_CP:
434 case ENCODING_CO:
435 case ENCODING_CT:
Sean Callanana144c3f2010-04-02 21:23:51 +0000436 debug("Translation of code offsets isn't supported.");
437 return true;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000438 case ENCODING_IB:
439 case ENCODING_IW:
440 case ENCODING_ID:
441 case ENCODING_IO:
442 case ENCODING_Iv:
443 case ENCODING_Ia:
444 translateImmediate(mcInst,
445 insn.immediates[insn.numImmediatesTranslated++]);
Sean Callanana144c3f2010-04-02 21:23:51 +0000446 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000447 case ENCODING_RB:
448 case ENCODING_RW:
449 case ENCODING_RD:
450 case ENCODING_RO:
451 translateRegister(mcInst, insn.opcodeRegister);
Sean Callanana144c3f2010-04-02 21:23:51 +0000452 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000453 case ENCODING_I:
Sean Callanana144c3f2010-04-02 21:23:51 +0000454 return translateFPRegister(mcInst, insn.opcodeModifier);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000455 case ENCODING_Rv:
456 translateRegister(mcInst, insn.opcodeRegister);
Sean Callanana144c3f2010-04-02 21:23:51 +0000457 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000458 case ENCODING_DUP:
Sean Callanana144c3f2010-04-02 21:23:51 +0000459 return translateOperand(mcInst,
460 insn.spec->operands[operand.type - TYPE_DUP0],
461 insn);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000462 }
463}
464
465/// translateInstruction - Translates an internal instruction and all its
466/// operands to an MCInst.
467///
468/// @param mcInst - The MCInst to populate with the instruction's data.
469/// @param insn - The internal instruction.
Sean Callanana144c3f2010-04-02 21:23:51 +0000470/// @return - false on success; true otherwise.
471static bool translateInstruction(MCInst &mcInst,
472 InternalInstruction &insn) {
473 if (!insn.spec) {
474 debug("Instruction has no specification");
475 return true;
476 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000477
478 mcInst.setOpcode(insn.instructionID);
479
480 int index;
481
482 insn.numImmediatesTranslated = 0;
483
484 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
Sean Callanana144c3f2010-04-02 21:23:51 +0000485 if (insn.spec->operands[index].encoding != ENCODING_NONE) {
486 if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
487 return true;
488 }
489 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000490 }
Sean Callanana144c3f2010-04-02 21:23:51 +0000491
492 return false;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000493}
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +0000494
Daniel Dunbar5d067fe2010-03-20 22:36:22 +0000495static MCDisassembler *createX86_32Disassembler(const Target &T) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000496 return new X86Disassembler::X86_32Disassembler;
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +0000497}
498
Daniel Dunbar5d067fe2010-03-20 22:36:22 +0000499static MCDisassembler *createX86_64Disassembler(const Target &T) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000500 return new X86Disassembler::X86_64Disassembler;
Daniel Dunbar5f9b9ef2009-11-25 06:53:08 +0000501}
502
503extern "C" void LLVMInitializeX86Disassembler() {
504 // Register the disassembler.
505 TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
506 createX86_32Disassembler);
507 TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
508 createX86_64Disassembler);
509}