blob: 3f4f18f0f72e4fe7f18fe2d6f8e6ba580c66c144 [file] [log] [blame]
Jia Liub22310f2012-02-18 12:03:15 +00001/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
Sean Callanan04cc3072009-12-19 02:59:52 +00002 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
Sean Callanan04cc3072009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbarc745a622009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan04cc3072009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
23#include "X86GenDisassemblerTables.inc"
24
25#define TRUE 1
26#define FALSE 0
27
Sean Callanan010b3732010-04-02 21:23:51 +000028typedef int8_t bool;
29
Sean Callanan010b3732010-04-02 21:23:51 +000030#ifndef NDEBUG
31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32#else
33#define debug(s) do { } while (0)
34#endif
35
Sean Callanan04cc3072009-12-19 02:59:52 +000036
37/*
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
40 *
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
44 */
Sean Callanan588785c2009-12-22 22:51:40 +000045static InstructionContext contextForAttrs(uint8_t attrMask) {
Sean Callanan04cc3072009-12-19 02:59:52 +000046 return CONTEXTS_SYM[attrMask];
47}
48
49/*
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
52 *
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
55 * contextForAttrs.
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
59 */
Sean Callanan588785c2009-12-22 22:51:40 +000060static int modRMRequired(OpcodeType type,
Craig Topper21c33652011-10-02 16:56:09 +000061 InstructionContext insnContext,
62 uint8_t opcode) {
Daniel Dunbar8b532de2009-12-22 01:41:37 +000063 const struct ContextDecision* decision = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +000064
Sean Callanan04cc3072009-12-19 02:59:52 +000065 switch (type) {
66 case ONEBYTE:
67 decision = &ONEBYTE_SYM;
68 break;
69 case TWOBYTE:
70 decision = &TWOBYTE_SYM;
71 break;
72 case THREEBYTE_38:
73 decision = &THREEBYTE38_SYM;
74 break;
75 case THREEBYTE_3A:
76 decision = &THREEBYTE3A_SYM;
77 break;
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +000078 case THREEBYTE_A6:
79 decision = &THREEBYTEA6_SYM;
80 break;
81 case THREEBYTE_A7:
82 decision = &THREEBYTEA7_SYM;
83 break;
Sean Callanan04cc3072009-12-19 02:59:52 +000084 }
Ahmed Charles636a3d62012-02-19 11:37:01 +000085
Sean Callanan04cc3072009-12-19 02:59:52 +000086 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87 modrm_type != MODRM_ONEENTRY;
Sean Callanan04cc3072009-12-19 02:59:52 +000088}
89
90/*
91 * decode - Reads the appropriate instruction table to obtain the unique ID of
92 * an instruction.
93 *
94 * @param type - See modRMRequired().
95 * @param insnContext - See modRMRequired().
96 * @param opcode - See modRMRequired().
97 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanan010b3732010-04-02 21:23:51 +000098 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan04cc3072009-12-19 02:59:52 +000099 */
Sean Callanan588785c2009-12-22 22:51:40 +0000100static InstrUID decode(OpcodeType type,
Sean Callanan010b3732010-04-02 21:23:51 +0000101 InstructionContext insnContext,
102 uint8_t opcode,
103 uint8_t modRM) {
Duncan Sandsae22c602012-02-05 14:20:11 +0000104 const struct ModRMDecision* dec = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000105
Sean Callanan04cc3072009-12-19 02:59:52 +0000106 switch (type) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000107 case ONEBYTE:
108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
109 break;
110 case TWOBYTE:
111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112 break;
113 case THREEBYTE_38:
114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115 break;
116 case THREEBYTE_3A:
117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118 break;
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000119 case THREEBYTE_A6:
120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121 break;
122 case THREEBYTE_A7:
123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124 break;
Sean Callanan04cc3072009-12-19 02:59:52 +0000125 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000126
Sean Callanan04cc3072009-12-19 02:59:52 +0000127 switch (dec->modrm_type) {
128 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000129 debug("Corrupt table! Unknown modrm_type");
130 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000131 case MODRM_ONEENTRY:
Craig Topper487e7442012-02-09 07:45:30 +0000132 return modRMTable[dec->instructionIDs];
Sean Callanan04cc3072009-12-19 02:59:52 +0000133 case MODRM_SPLITRM:
134 if (modFromModRM(modRM) == 0x3)
Craig Topper487e7442012-02-09 07:45:30 +0000135 return modRMTable[dec->instructionIDs+1];
136 return modRMTable[dec->instructionIDs];
Craig Toppera0cd9702012-02-09 08:58:07 +0000137 case MODRM_SPLITREG:
138 if (modFromModRM(modRM) == 0x3)
139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Craig Topper963305b2012-09-13 05:45:42 +0000141 case MODRM_SPLITMISC:
142 if (modFromModRM(modRM) == 0x3)
143 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
144 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Sean Callanan04cc3072009-12-19 02:59:52 +0000145 case MODRM_FULL:
Craig Topper487e7442012-02-09 07:45:30 +0000146 return modRMTable[dec->instructionIDs+modRM];
Sean Callanan04cc3072009-12-19 02:59:52 +0000147 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000148}
149
150/*
151 * specifierForUID - Given a UID, returns the name and operand specification for
152 * that instruction.
153 *
154 * @param uid - The unique ID for the instruction. This should be returned by
155 * decode(); specifierForUID will not check bounds.
156 * @return - A pointer to the specification for that instruction.
157 */
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000158static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000159 return &INSTRUCTIONS_SYM[uid];
160}
161
162/*
163 * consumeByte - Uses the reader function provided by the user to consume one
164 * byte from the instruction's memory and advance the cursor.
165 *
166 * @param insn - The instruction with the reader function to use. The cursor
167 * for this instruction is advanced.
168 * @param byte - A pointer to a pre-allocated memory buffer to be populated
169 * with the data read.
170 * @return - 0 if the read was successful; nonzero otherwise.
171 */
Sean Callanan588785c2009-12-22 22:51:40 +0000172static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000173 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000174
Sean Callanan04cc3072009-12-19 02:59:52 +0000175 if (!ret)
176 ++(insn->readerCursor);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000177
Sean Callanan04cc3072009-12-19 02:59:52 +0000178 return ret;
179}
180
181/*
182 * lookAtByte - Like consumeByte, but does not advance the cursor.
183 *
184 * @param insn - See consumeByte().
185 * @param byte - See consumeByte().
186 * @return - See consumeByte().
187 */
Sean Callanan588785c2009-12-22 22:51:40 +0000188static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000189 return insn->reader(insn->readerArg, byte, insn->readerCursor);
190}
191
Sean Callanan588785c2009-12-22 22:51:40 +0000192static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000193 insn->readerCursor--;
194}
195
Sean Callanan588785c2009-12-22 22:51:40 +0000196#define CONSUME_FUNC(name, type) \
197 static int name(struct InternalInstruction* insn, type* ptr) { \
198 type combined = 0; \
199 unsigned offset; \
200 for (offset = 0; offset < sizeof(type); ++offset) { \
201 uint8_t byte; \
202 int ret = insn->reader(insn->readerArg, \
203 &byte, \
204 insn->readerCursor + offset); \
205 if (ret) \
206 return ret; \
Richard Smith228e6d42012-08-24 23:29:28 +0000207 combined = combined | ((uint64_t)byte << (offset * 8)); \
Sean Callanan588785c2009-12-22 22:51:40 +0000208 } \
209 *ptr = combined; \
210 insn->readerCursor += sizeof(type); \
211 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +0000212 }
213
214/*
215 * consume* - Use the reader function provided by the user to consume data
216 * values of various sizes from the instruction's memory and advance the
217 * cursor appropriately. These readers perform endian conversion.
218 *
219 * @param insn - See consumeByte().
220 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
221 * be populated with the data read.
222 * @return - See consumeByte().
223 */
224CONSUME_FUNC(consumeInt8, int8_t)
225CONSUME_FUNC(consumeInt16, int16_t)
226CONSUME_FUNC(consumeInt32, int32_t)
227CONSUME_FUNC(consumeUInt16, uint16_t)
228CONSUME_FUNC(consumeUInt32, uint32_t)
229CONSUME_FUNC(consumeUInt64, uint64_t)
230
231/*
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000232 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan04cc3072009-12-19 02:59:52 +0000233 * message, typically without a carriage-return.
234 *
235 * @param insn - The instruction containing the logging function.
236 * @param format - See printf().
237 * @param ... - See printf().
238 */
Sean Callanan588785c2009-12-22 22:51:40 +0000239static void dbgprintf(struct InternalInstruction* insn,
240 const char* format,
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000241 ...) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000242 char buffer[256];
243 va_list ap;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000244
Sean Callanan04cc3072009-12-19 02:59:52 +0000245 if (!insn->dlog)
246 return;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000247
Sean Callanan04cc3072009-12-19 02:59:52 +0000248 va_start(ap, format);
249 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
250 va_end(ap);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000251
Sean Callanan04cc3072009-12-19 02:59:52 +0000252 insn->dlog(insn->dlogArg, buffer);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000253
Sean Callanan04cc3072009-12-19 02:59:52 +0000254 return;
255}
256
257/*
258 * setPrefixPresent - Marks that a particular prefix is present at a particular
259 * location.
260 *
261 * @param insn - The instruction to be marked as having the prefix.
262 * @param prefix - The prefix that is present.
263 * @param location - The location where the prefix is located (in the address
264 * space of the instruction's reader).
265 */
Sean Callanan588785c2009-12-22 22:51:40 +0000266static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000267 uint8_t prefix,
268 uint64_t location)
269{
270 insn->prefixPresent[prefix] = 1;
271 insn->prefixLocations[prefix] = location;
272}
273
274/*
275 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
276 * present at a given location.
277 *
278 * @param insn - The instruction to be queried.
279 * @param prefix - The prefix.
280 * @param location - The location to query.
281 * @return - Whether the prefix is at that location.
282 */
Sean Callanan588785c2009-12-22 22:51:40 +0000283static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
284 uint8_t prefix,
285 uint64_t location)
Sean Callanan04cc3072009-12-19 02:59:52 +0000286{
287 if (insn->prefixPresent[prefix] == 1 &&
288 insn->prefixLocations[prefix] == location)
289 return TRUE;
290 else
291 return FALSE;
292}
293
294/*
295 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
296 * instruction as having them. Also sets the instruction's default operand,
297 * address, and other relevant data sizes to report operands correctly.
298 *
299 * @param insn - The instruction whose prefixes are to be read.
300 * @return - 0 if the instruction could be read until the end of the prefix
301 * bytes, and no prefixes conflicted; nonzero otherwise.
302 */
303static int readPrefixes(struct InternalInstruction* insn) {
304 BOOL isPrefix = TRUE;
305 BOOL prefixGroups[4] = { FALSE };
306 uint64_t prefixLocation;
Ted Kremenek3c4408c2011-01-23 17:05:06 +0000307 uint8_t byte = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000308
Sean Callanan04cc3072009-12-19 02:59:52 +0000309 BOOL hasAdSize = FALSE;
310 BOOL hasOpSize = FALSE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000311
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000312 dbgprintf(insn, "readPrefixes()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000313
Sean Callanan04cc3072009-12-19 02:59:52 +0000314 while (isPrefix) {
315 prefixLocation = insn->readerCursor;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000316
Sean Callanan04cc3072009-12-19 02:59:52 +0000317 if (consumeByte(insn, &byte))
318 return -1;
Kevin Enderby014e1cd2012-03-09 17:52:49 +0000319
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000320 /*
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000321 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
322 * break and let it be disassembled as a normal "instruction".
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000323 */
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000324 if (insn->readerCursor - 1 == insn->startLocation
325 && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) {
326 if (byte == 0xf0)
327 break;
328 uint8_t nextByte;
329 if (lookAtByte(insn, &nextByte))
330 return -1;
331 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
332 if (consumeByte(insn, &nextByte))
333 return -1;
334 if (lookAtByte(insn, &nextByte))
335 return -1;
336 unconsumeByte(insn);
337 }
338 if (nextByte != 0x0f && nextByte != 0x90)
339 break;
340 }
341
Sean Callanan04cc3072009-12-19 02:59:52 +0000342 switch (byte) {
343 case 0xf0: /* LOCK */
344 case 0xf2: /* REPNE/REPNZ */
345 case 0xf3: /* REP or REPE/REPZ */
346 if (prefixGroups[0])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000347 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000348 prefixGroups[0] = TRUE;
349 setPrefixPresent(insn, byte, prefixLocation);
350 break;
351 case 0x2e: /* CS segment override -OR- Branch not taken */
352 case 0x36: /* SS segment override -OR- Branch taken */
353 case 0x3e: /* DS segment override */
354 case 0x26: /* ES segment override */
355 case 0x64: /* FS segment override */
356 case 0x65: /* GS segment override */
357 switch (byte) {
358 case 0x2e:
359 insn->segmentOverride = SEG_OVERRIDE_CS;
360 break;
361 case 0x36:
362 insn->segmentOverride = SEG_OVERRIDE_SS;
363 break;
364 case 0x3e:
365 insn->segmentOverride = SEG_OVERRIDE_DS;
366 break;
367 case 0x26:
368 insn->segmentOverride = SEG_OVERRIDE_ES;
369 break;
370 case 0x64:
371 insn->segmentOverride = SEG_OVERRIDE_FS;
372 break;
373 case 0x65:
374 insn->segmentOverride = SEG_OVERRIDE_GS;
375 break;
376 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000377 debug("Unhandled override");
378 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +0000379 }
380 if (prefixGroups[1])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000381 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000382 prefixGroups[1] = TRUE;
383 setPrefixPresent(insn, byte, prefixLocation);
384 break;
385 case 0x66: /* Operand-size override */
386 if (prefixGroups[2])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000387 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000388 prefixGroups[2] = TRUE;
389 hasOpSize = TRUE;
390 setPrefixPresent(insn, byte, prefixLocation);
391 break;
392 case 0x67: /* Address-size override */
393 if (prefixGroups[3])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000394 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000395 prefixGroups[3] = TRUE;
396 hasAdSize = TRUE;
397 setPrefixPresent(insn, byte, prefixLocation);
398 break;
399 default: /* Not a prefix byte */
400 isPrefix = FALSE;
401 break;
402 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000403
Sean Callanan04cc3072009-12-19 02:59:52 +0000404 if (isPrefix)
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000405 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan04cc3072009-12-19 02:59:52 +0000406 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000407
Sean Callananc3fd5232011-03-15 01:23:15 +0000408 insn->vexSize = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000409
Sean Callananc3fd5232011-03-15 01:23:15 +0000410 if (byte == 0xc4) {
411 uint8_t byte1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000412
Sean Callananc3fd5232011-03-15 01:23:15 +0000413 if (lookAtByte(insn, &byte1)) {
414 dbgprintf(insn, "Couldn't read second byte of VEX");
415 return -1;
416 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000417
Craig Topper45faba92011-09-26 05:12:43 +0000418 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000419 insn->vexSize = 3;
420 insn->necessaryPrefixLocation = insn->readerCursor - 1;
421 }
422 else {
Sean Callanan04cc3072009-12-19 02:59:52 +0000423 unconsumeByte(insn);
424 insn->necessaryPrefixLocation = insn->readerCursor - 1;
425 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000426
Sean Callananc3fd5232011-03-15 01:23:15 +0000427 if (insn->vexSize == 3) {
428 insn->vexPrefix[0] = byte;
429 consumeByte(insn, &insn->vexPrefix[1]);
430 consumeByte(insn, &insn->vexPrefix[2]);
431
432 /* We simulate the REX prefix for simplicity's sake */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000433
Craig Topper31854ba2011-10-03 07:51:09 +0000434 if (insn->mode == MODE_64BIT) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000435 insn->rexPrefix = 0x40
Craig Topper31854ba2011-10-03 07:51:09 +0000436 | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
437 | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
438 | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
439 | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
440 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000441
Sean Callananc3fd5232011-03-15 01:23:15 +0000442 switch (ppFromVEX3of3(insn->vexPrefix[2]))
443 {
444 default:
445 break;
446 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000447 hasOpSize = TRUE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000448 break;
449 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000450
Sean Callananc3fd5232011-03-15 01:23:15 +0000451 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
452 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000453 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000454 else if (byte == 0xc5) {
455 uint8_t byte1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000456
Sean Callananc3fd5232011-03-15 01:23:15 +0000457 if (lookAtByte(insn, &byte1)) {
458 dbgprintf(insn, "Couldn't read second byte of VEX");
459 return -1;
460 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000461
Craig Topper45faba92011-09-26 05:12:43 +0000462 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000463 insn->vexSize = 2;
464 }
465 else {
466 unconsumeByte(insn);
467 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000468
Sean Callananc3fd5232011-03-15 01:23:15 +0000469 if (insn->vexSize == 2) {
470 insn->vexPrefix[0] = byte;
471 consumeByte(insn, &insn->vexPrefix[1]);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000472
Craig Topper31854ba2011-10-03 07:51:09 +0000473 if (insn->mode == MODE_64BIT) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000474 insn->rexPrefix = 0x40
Craig Topper31854ba2011-10-03 07:51:09 +0000475 | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
476 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000477
Sean Callananc3fd5232011-03-15 01:23:15 +0000478 switch (ppFromVEX2of2(insn->vexPrefix[1]))
479 {
480 default:
481 break;
482 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000483 hasOpSize = TRUE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000484 break;
485 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000486
Sean Callananc3fd5232011-03-15 01:23:15 +0000487 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
488 }
489 }
490 else {
491 if (insn->mode == MODE_64BIT) {
492 if ((byte & 0xf0) == 0x40) {
493 uint8_t opcodeByte;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000494
Sean Callananc3fd5232011-03-15 01:23:15 +0000495 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
496 dbgprintf(insn, "Redundant REX prefix");
497 return -1;
498 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000499
Sean Callananc3fd5232011-03-15 01:23:15 +0000500 insn->rexPrefix = byte;
501 insn->necessaryPrefixLocation = insn->readerCursor - 2;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000502
Sean Callananc3fd5232011-03-15 01:23:15 +0000503 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000504 } else {
Sean Callananc3fd5232011-03-15 01:23:15 +0000505 unconsumeByte(insn);
506 insn->necessaryPrefixLocation = insn->readerCursor - 1;
507 }
508 } else {
509 unconsumeByte(insn);
510 insn->necessaryPrefixLocation = insn->readerCursor - 1;
511 }
512 }
513
Sean Callanan04cc3072009-12-19 02:59:52 +0000514 if (insn->mode == MODE_16BIT) {
515 insn->registerSize = (hasOpSize ? 4 : 2);
516 insn->addressSize = (hasAdSize ? 4 : 2);
517 insn->displacementSize = (hasAdSize ? 4 : 2);
518 insn->immediateSize = (hasOpSize ? 4 : 2);
519 } else if (insn->mode == MODE_32BIT) {
520 insn->registerSize = (hasOpSize ? 2 : 4);
521 insn->addressSize = (hasAdSize ? 2 : 4);
522 insn->displacementSize = (hasAdSize ? 2 : 4);
Sean Callanan9f6c6222010-10-22 01:24:11 +0000523 insn->immediateSize = (hasOpSize ? 2 : 4);
Sean Callanan04cc3072009-12-19 02:59:52 +0000524 } else if (insn->mode == MODE_64BIT) {
525 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
526 insn->registerSize = 8;
527 insn->addressSize = (hasAdSize ? 4 : 8);
528 insn->displacementSize = 4;
529 insn->immediateSize = 4;
530 } else if (insn->rexPrefix) {
531 insn->registerSize = (hasOpSize ? 2 : 4);
532 insn->addressSize = (hasAdSize ? 4 : 8);
533 insn->displacementSize = (hasOpSize ? 2 : 4);
534 insn->immediateSize = (hasOpSize ? 2 : 4);
535 } else {
536 insn->registerSize = (hasOpSize ? 2 : 4);
537 insn->addressSize = (hasAdSize ? 4 : 8);
538 insn->displacementSize = (hasOpSize ? 2 : 4);
539 insn->immediateSize = (hasOpSize ? 2 : 4);
540 }
541 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000542
Sean Callanan04cc3072009-12-19 02:59:52 +0000543 return 0;
544}
545
546/*
547 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
548 * extended or escape opcodes).
549 *
550 * @param insn - The instruction whose opcode is to be read.
551 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
552 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000553static int readOpcode(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000554 /* Determine the length of the primary opcode */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000555
Sean Callanan04cc3072009-12-19 02:59:52 +0000556 uint8_t current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000557
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000558 dbgprintf(insn, "readOpcode()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000559
Sean Callanan04cc3072009-12-19 02:59:52 +0000560 insn->opcodeType = ONEBYTE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000561
Sean Callananc3fd5232011-03-15 01:23:15 +0000562 if (insn->vexSize == 3)
563 {
564 switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
565 {
566 default:
567 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000568 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +0000569 case 0:
570 break;
571 case VEX_LOB_0F:
572 insn->twoByteEscape = 0x0f;
573 insn->opcodeType = TWOBYTE;
574 return consumeByte(insn, &insn->opcode);
575 case VEX_LOB_0F38:
576 insn->twoByteEscape = 0x0f;
577 insn->threeByteEscape = 0x38;
578 insn->opcodeType = THREEBYTE_38;
579 return consumeByte(insn, &insn->opcode);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000580 case VEX_LOB_0F3A:
Sean Callananc3fd5232011-03-15 01:23:15 +0000581 insn->twoByteEscape = 0x0f;
582 insn->threeByteEscape = 0x3a;
583 insn->opcodeType = THREEBYTE_3A;
584 return consumeByte(insn, &insn->opcode);
585 }
586 }
587 else if (insn->vexSize == 2)
588 {
589 insn->twoByteEscape = 0x0f;
590 insn->opcodeType = TWOBYTE;
591 return consumeByte(insn, &insn->opcode);
592 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000593
Sean Callanan04cc3072009-12-19 02:59:52 +0000594 if (consumeByte(insn, &current))
595 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000596
Sean Callanan04cc3072009-12-19 02:59:52 +0000597 if (current == 0x0f) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000598 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000599
Sean Callanan04cc3072009-12-19 02:59:52 +0000600 insn->twoByteEscape = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000601
Sean Callanan04cc3072009-12-19 02:59:52 +0000602 if (consumeByte(insn, &current))
603 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000604
Sean Callanan04cc3072009-12-19 02:59:52 +0000605 if (current == 0x38) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000606 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000607
Sean Callanan04cc3072009-12-19 02:59:52 +0000608 insn->threeByteEscape = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000609
Sean Callanan04cc3072009-12-19 02:59:52 +0000610 if (consumeByte(insn, &current))
611 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000612
Sean Callanan04cc3072009-12-19 02:59:52 +0000613 insn->opcodeType = THREEBYTE_38;
614 } else if (current == 0x3a) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000615 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000616
Sean Callanan04cc3072009-12-19 02:59:52 +0000617 insn->threeByteEscape = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000618
Sean Callanan04cc3072009-12-19 02:59:52 +0000619 if (consumeByte(insn, &current))
620 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000621
Sean Callanan04cc3072009-12-19 02:59:52 +0000622 insn->opcodeType = THREEBYTE_3A;
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000623 } else if (current == 0xa6) {
624 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000625
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000626 insn->threeByteEscape = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000627
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000628 if (consumeByte(insn, &current))
629 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000630
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000631 insn->opcodeType = THREEBYTE_A6;
632 } else if (current == 0xa7) {
633 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000634
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000635 insn->threeByteEscape = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000636
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000637 if (consumeByte(insn, &current))
638 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000639
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000640 insn->opcodeType = THREEBYTE_A7;
Sean Callanan04cc3072009-12-19 02:59:52 +0000641 } else {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000642 dbgprintf(insn, "Didn't find a three-byte escape prefix");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000643
Sean Callanan04cc3072009-12-19 02:59:52 +0000644 insn->opcodeType = TWOBYTE;
645 }
646 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000647
Sean Callanan04cc3072009-12-19 02:59:52 +0000648 /*
649 * At this point we have consumed the full opcode.
650 * Anything we consume from here on must be unconsumed.
651 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000652
Sean Callanan04cc3072009-12-19 02:59:52 +0000653 insn->opcode = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000654
Sean Callanan04cc3072009-12-19 02:59:52 +0000655 return 0;
656}
657
658static int readModRM(struct InternalInstruction* insn);
659
660/*
661 * getIDWithAttrMask - Determines the ID of an instruction, consuming
662 * the ModR/M byte as appropriate for extended and escape opcodes,
663 * and using a supplied attribute mask.
664 *
665 * @param instructionID - A pointer whose target is filled in with the ID of the
666 * instruction.
667 * @param insn - The instruction whose ID is to be determined.
668 * @param attrMask - The attribute mask to search.
669 * @return - 0 if the ModR/M could be read when needed or was not
670 * needed; nonzero otherwise.
671 */
672static int getIDWithAttrMask(uint16_t* instructionID,
673 struct InternalInstruction* insn,
674 uint8_t attrMask) {
675 BOOL hasModRMExtension;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000676
Sean Callanan04cc3072009-12-19 02:59:52 +0000677 uint8_t instructionClass;
678
679 instructionClass = contextForAttrs(attrMask);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000680
Sean Callanan04cc3072009-12-19 02:59:52 +0000681 hasModRMExtension = modRMRequired(insn->opcodeType,
682 instructionClass,
683 insn->opcode);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000684
Sean Callanan04cc3072009-12-19 02:59:52 +0000685 if (hasModRMExtension) {
Rafael Espindola9f9a1062011-01-06 16:48:42 +0000686 if (readModRM(insn))
687 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000688
Sean Callanan04cc3072009-12-19 02:59:52 +0000689 *instructionID = decode(insn->opcodeType,
690 instructionClass,
691 insn->opcode,
692 insn->modRM);
693 } else {
694 *instructionID = decode(insn->opcodeType,
695 instructionClass,
696 insn->opcode,
697 0);
698 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000699
Sean Callanan04cc3072009-12-19 02:59:52 +0000700 return 0;
701}
702
703/*
704 * is16BitEquivalent - Determines whether two instruction names refer to
705 * equivalent instructions but one is 16-bit whereas the other is not.
706 *
707 * @param orig - The instruction that is not 16-bit
708 * @param equiv - The instruction that is 16-bit
709 */
Joerg Sonnenberger2b86e482012-10-29 17:56:15 +0000710static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000711 off_t i;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000712
Sean Callanan010b3732010-04-02 21:23:51 +0000713 for (i = 0;; i++) {
714 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000715 return TRUE;
Sean Callanan010b3732010-04-02 21:23:51 +0000716 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000717 return FALSE;
Sean Callanan010b3732010-04-02 21:23:51 +0000718 if (orig[i] != equiv[i]) {
719 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan04cc3072009-12-19 02:59:52 +0000720 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000721 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan04cc3072009-12-19 02:59:52 +0000722 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000723 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan04cc3072009-12-19 02:59:52 +0000724 continue;
725 return FALSE;
726 }
727 }
728}
729
730/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000731 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
732 * appropriate for extended and escape opcodes. Determines the attributes and
Sean Callanan04cc3072009-12-19 02:59:52 +0000733 * context for the instruction before doing so.
734 *
735 * @param insn - The instruction whose ID is to be determined.
736 * @return - 0 if the ModR/M could be read when needed or was not needed;
737 * nonzero otherwise.
738 */
Roman Divacky67923802012-09-05 21:17:34 +0000739static int getID(struct InternalInstruction* insn, const void *miiArg) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000740 uint8_t attrMask;
741 uint16_t instructionID;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000742
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000743 dbgprintf(insn, "getID()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000744
Sean Callanan04cc3072009-12-19 02:59:52 +0000745 attrMask = ATTR_NONE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000746
Sean Callanan04cc3072009-12-19 02:59:52 +0000747 if (insn->mode == MODE_64BIT)
748 attrMask |= ATTR_64BIT;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000749
Sean Callananc3fd5232011-03-15 01:23:15 +0000750 if (insn->vexSize) {
751 attrMask |= ATTR_VEX;
752
753 if (insn->vexSize == 3) {
754 switch (ppFromVEX3of3(insn->vexPrefix[2])) {
755 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000756 attrMask |= ATTR_OPSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000757 break;
758 case VEX_PREFIX_F3:
759 attrMask |= ATTR_XS;
760 break;
761 case VEX_PREFIX_F2:
762 attrMask |= ATTR_XD;
763 break;
764 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000765
Sean Callananc3fd5232011-03-15 01:23:15 +0000766 if (lFromVEX3of3(insn->vexPrefix[2]))
767 attrMask |= ATTR_VEXL;
768 }
769 else if (insn->vexSize == 2) {
770 switch (ppFromVEX2of2(insn->vexPrefix[1])) {
771 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000772 attrMask |= ATTR_OPSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000773 break;
774 case VEX_PREFIX_F3:
775 attrMask |= ATTR_XS;
776 break;
777 case VEX_PREFIX_F2:
778 attrMask |= ATTR_XD;
779 break;
780 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000781
Sean Callananc3fd5232011-03-15 01:23:15 +0000782 if (lFromVEX2of2(insn->vexPrefix[1]))
783 attrMask |= ATTR_VEXL;
784 }
785 else {
786 return -1;
787 }
788 }
789 else {
Sean Callananc3fd5232011-03-15 01:23:15 +0000790 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
791 attrMask |= ATTR_OPSIZE;
Craig Topper6491c802012-02-27 01:54:29 +0000792 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
793 attrMask |= ATTR_ADSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000794 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
795 attrMask |= ATTR_XS;
796 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
797 attrMask |= ATTR_XD;
Sean Callananc3fd5232011-03-15 01:23:15 +0000798 }
799
Craig Topperf18c8962011-10-04 06:30:42 +0000800 if (insn->rexPrefix & 0x08)
801 attrMask |= ATTR_REXW;
Craig Topperf01f1b52011-11-06 23:04:08 +0000802
Sean Callanan010b3732010-04-02 21:23:51 +0000803 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan04cc3072009-12-19 02:59:52 +0000804 return -1;
Craig Topperf01f1b52011-11-06 23:04:08 +0000805
Sean Callanan04cc3072009-12-19 02:59:52 +0000806 /* The following clauses compensate for limitations of the tables. */
Craig Topperf01f1b52011-11-06 23:04:08 +0000807
808 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
809 !(attrMask & ATTR_OPSIZE)) {
Craig Topperf18c8962011-10-04 06:30:42 +0000810 /*
811 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
812 * has precedence since there are no L-bit with W-bit entries in the tables.
813 * So if the L-bit isn't significant we should use the W-bit instead.
Craig Topperf01f1b52011-11-06 23:04:08 +0000814 * We only need to do this if the instruction doesn't specify OpSize since
815 * there is a VEX_L_W_OPSIZE table.
Craig Topperf18c8962011-10-04 06:30:42 +0000816 */
817
818 const struct InstructionSpecifier *spec;
819 uint16_t instructionIDWithWBit;
820 const struct InstructionSpecifier *specWithWBit;
821
822 spec = specifierForUID(instructionID);
823
824 if (getIDWithAttrMask(&instructionIDWithWBit,
825 insn,
826 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
827 insn->instructionID = instructionID;
828 insn->spec = spec;
829 return 0;
830 }
831
832 specWithWBit = specifierForUID(instructionIDWithWBit);
833
834 if (instructionID != instructionIDWithWBit) {
835 insn->instructionID = instructionIDWithWBit;
836 insn->spec = specWithWBit;
837 } else {
838 insn->instructionID = instructionID;
839 insn->spec = spec;
840 }
841 return 0;
842 }
843
Sean Callanan04cc3072009-12-19 02:59:52 +0000844 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
845 /*
846 * The instruction tables make no distinction between instructions that
847 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
848 * particular spot (i.e., many MMX operations). In general we're
849 * conservative, but in the specific case where OpSize is present but not
850 * in the right place we check if there's a 16-bit operation.
851 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000852
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000853 const struct InstructionSpecifier *spec;
Sean Callanan04cc3072009-12-19 02:59:52 +0000854 uint16_t instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000855 const char *specName, *specWithOpSizeName;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000856
Sean Callanan04cc3072009-12-19 02:59:52 +0000857 spec = specifierForUID(instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000858
Sean Callanan04cc3072009-12-19 02:59:52 +0000859 if (getIDWithAttrMask(&instructionIDWithOpsize,
860 insn,
861 attrMask | ATTR_OPSIZE)) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000862 /*
Sean Callanan04cc3072009-12-19 02:59:52 +0000863 * ModRM required with OpSize but not present; give up and return version
864 * without OpSize set
865 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000866
Sean Callanan04cc3072009-12-19 02:59:52 +0000867 insn->instructionID = instructionID;
868 insn->spec = spec;
869 return 0;
870 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000871
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000872 specName = x86DisassemblerGetInstrName(instructionID, miiArg);
873 specWithOpSizeName =
Benjamin Kramer478e8de2012-02-11 14:50:54 +0000874 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
875
Joerg Sonnenberger2b86e482012-10-29 17:56:15 +0000876 if (is16BitEquivalent(specName, specWithOpSizeName)) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000877 insn->instructionID = instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000878 insn->spec = specifierForUID(instructionIDWithOpsize);
Sean Callanan04cc3072009-12-19 02:59:52 +0000879 } else {
880 insn->instructionID = instructionID;
881 insn->spec = spec;
882 }
883 return 0;
884 }
Craig Topper21c33652011-10-02 16:56:09 +0000885
886 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
887 insn->rexPrefix & 0x01) {
888 /*
889 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
890 * it should decode as XCHG %r8, %eax.
891 */
892
893 const struct InstructionSpecifier *spec;
894 uint16_t instructionIDWithNewOpcode;
895 const struct InstructionSpecifier *specWithNewOpcode;
896
897 spec = specifierForUID(instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000898
Craig Topperb58a9662011-10-05 03:29:32 +0000899 /* Borrow opcode from one of the other XCHGar opcodes */
Craig Topper21c33652011-10-02 16:56:09 +0000900 insn->opcode = 0x91;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000901
Craig Topper21c33652011-10-02 16:56:09 +0000902 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
903 insn,
904 attrMask)) {
905 insn->opcode = 0x90;
906
907 insn->instructionID = instructionID;
908 insn->spec = spec;
909 return 0;
910 }
911
912 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
913
Craig Topperb58a9662011-10-05 03:29:32 +0000914 /* Change back */
Craig Topper21c33652011-10-02 16:56:09 +0000915 insn->opcode = 0x90;
916
917 insn->instructionID = instructionIDWithNewOpcode;
918 insn->spec = specWithNewOpcode;
919
920 return 0;
921 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000922
Sean Callanan04cc3072009-12-19 02:59:52 +0000923 insn->instructionID = instructionID;
924 insn->spec = specifierForUID(insn->instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000925
Sean Callanan04cc3072009-12-19 02:59:52 +0000926 return 0;
927}
928
929/*
930 * readSIB - Consumes the SIB byte to determine addressing information for an
931 * instruction.
932 *
933 * @param insn - The instruction whose SIB byte is to be read.
934 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
935 */
936static int readSIB(struct InternalInstruction* insn) {
Daniel Dunbar8b532de2009-12-22 01:41:37 +0000937 SIBIndex sibIndexBase = 0;
938 SIBBase sibBaseBase = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000939 uint8_t index, base;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000940
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000941 dbgprintf(insn, "readSIB()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000942
Sean Callanan04cc3072009-12-19 02:59:52 +0000943 if (insn->consumedSIB)
944 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000945
Sean Callanan04cc3072009-12-19 02:59:52 +0000946 insn->consumedSIB = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000947
Sean Callanan04cc3072009-12-19 02:59:52 +0000948 switch (insn->addressSize) {
949 case 2:
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000950 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan04cc3072009-12-19 02:59:52 +0000951 return -1;
952 break;
953 case 4:
954 sibIndexBase = SIB_INDEX_EAX;
955 sibBaseBase = SIB_BASE_EAX;
956 break;
957 case 8:
958 sibIndexBase = SIB_INDEX_RAX;
959 sibBaseBase = SIB_BASE_RAX;
960 break;
961 }
962
963 if (consumeByte(insn, &insn->sib))
964 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000965
Sean Callanan04cc3072009-12-19 02:59:52 +0000966 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000967
Sean Callanan04cc3072009-12-19 02:59:52 +0000968 switch (index) {
969 case 0x4:
970 insn->sibIndex = SIB_INDEX_NONE;
971 break;
972 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +0000973 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
Sean Callanan04cc3072009-12-19 02:59:52 +0000974 if (insn->sibIndex == SIB_INDEX_sib ||
975 insn->sibIndex == SIB_INDEX_sib64)
976 insn->sibIndex = SIB_INDEX_NONE;
977 break;
978 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000979
Sean Callanan04cc3072009-12-19 02:59:52 +0000980 switch (scaleFromSIB(insn->sib)) {
981 case 0:
982 insn->sibScale = 1;
983 break;
984 case 1:
985 insn->sibScale = 2;
986 break;
987 case 2:
988 insn->sibScale = 4;
989 break;
990 case 3:
991 insn->sibScale = 8;
992 break;
993 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000994
Sean Callanan04cc3072009-12-19 02:59:52 +0000995 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000996
Sean Callanan04cc3072009-12-19 02:59:52 +0000997 switch (base) {
998 case 0x5:
999 switch (modFromModRM(insn->modRM)) {
1000 case 0x0:
1001 insn->eaDisplacement = EA_DISP_32;
1002 insn->sibBase = SIB_BASE_NONE;
1003 break;
1004 case 0x1:
1005 insn->eaDisplacement = EA_DISP_8;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001006 insn->sibBase = (insn->addressSize == 4 ?
Sean Callanan04cc3072009-12-19 02:59:52 +00001007 SIB_BASE_EBP : SIB_BASE_RBP);
1008 break;
1009 case 0x2:
1010 insn->eaDisplacement = EA_DISP_32;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001011 insn->sibBase = (insn->addressSize == 4 ?
Sean Callanan04cc3072009-12-19 02:59:52 +00001012 SIB_BASE_EBP : SIB_BASE_RBP);
1013 break;
1014 case 0x3:
Sean Callanan010b3732010-04-02 21:23:51 +00001015 debug("Cannot have Mod = 0b11 and a SIB byte");
1016 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001017 }
1018 break;
1019 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +00001020 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001021 break;
1022 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001023
Sean Callanan04cc3072009-12-19 02:59:52 +00001024 return 0;
1025}
1026
1027/*
1028 * readDisplacement - Consumes the displacement of an instruction.
1029 *
1030 * @param insn - The instruction whose displacement is to be read.
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001031 * @return - 0 if the displacement byte was successfully read; nonzero
Sean Callanan04cc3072009-12-19 02:59:52 +00001032 * otherwise.
1033 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001034static int readDisplacement(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001035 int8_t d8;
1036 int16_t d16;
1037 int32_t d32;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001038
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001039 dbgprintf(insn, "readDisplacement()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001040
Sean Callanan04cc3072009-12-19 02:59:52 +00001041 if (insn->consumedDisplacement)
1042 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001043
Sean Callanan04cc3072009-12-19 02:59:52 +00001044 insn->consumedDisplacement = TRUE;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001045 insn->displacementOffset = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001046
Sean Callanan04cc3072009-12-19 02:59:52 +00001047 switch (insn->eaDisplacement) {
1048 case EA_DISP_NONE:
1049 insn->consumedDisplacement = FALSE;
1050 break;
1051 case EA_DISP_8:
1052 if (consumeInt8(insn, &d8))
1053 return -1;
1054 insn->displacement = d8;
1055 break;
1056 case EA_DISP_16:
1057 if (consumeInt16(insn, &d16))
1058 return -1;
1059 insn->displacement = d16;
1060 break;
1061 case EA_DISP_32:
1062 if (consumeInt32(insn, &d32))
1063 return -1;
1064 insn->displacement = d32;
1065 break;
1066 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001067
Sean Callanan04cc3072009-12-19 02:59:52 +00001068 insn->consumedDisplacement = TRUE;
1069 return 0;
1070}
1071
1072/*
1073 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1074 * displacement) for an instruction and interprets it.
1075 *
1076 * @param insn - The instruction whose addressing information is to be read.
1077 * @return - 0 if the information was successfully read; nonzero otherwise.
1078 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001079static int readModRM(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001080 uint8_t mod, rm, reg;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001081
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001082 dbgprintf(insn, "readModRM()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001083
Sean Callanan04cc3072009-12-19 02:59:52 +00001084 if (insn->consumedModRM)
1085 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001086
Rafael Espindola9f9a1062011-01-06 16:48:42 +00001087 if (consumeByte(insn, &insn->modRM))
1088 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001089 insn->consumedModRM = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001090
Sean Callanan04cc3072009-12-19 02:59:52 +00001091 mod = modFromModRM(insn->modRM);
1092 rm = rmFromModRM(insn->modRM);
1093 reg = regFromModRM(insn->modRM);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001094
Sean Callanan04cc3072009-12-19 02:59:52 +00001095 /*
1096 * This goes by insn->registerSize to pick the correct register, which messes
1097 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1098 * fixupReg().
1099 */
1100 switch (insn->registerSize) {
1101 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001102 insn->regBase = MODRM_REG_AX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001103 insn->eaRegBase = EA_REG_AX;
1104 break;
1105 case 4:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001106 insn->regBase = MODRM_REG_EAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001107 insn->eaRegBase = EA_REG_EAX;
1108 break;
1109 case 8:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001110 insn->regBase = MODRM_REG_RAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001111 insn->eaRegBase = EA_REG_RAX;
1112 break;
1113 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001114
Sean Callanan04cc3072009-12-19 02:59:52 +00001115 reg |= rFromREX(insn->rexPrefix) << 3;
1116 rm |= bFromREX(insn->rexPrefix) << 3;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001117
Sean Callanan04cc3072009-12-19 02:59:52 +00001118 insn->reg = (Reg)(insn->regBase + reg);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001119
Sean Callanan04cc3072009-12-19 02:59:52 +00001120 switch (insn->addressSize) {
1121 case 2:
1122 insn->eaBaseBase = EA_BASE_BX_SI;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001123
Sean Callanan04cc3072009-12-19 02:59:52 +00001124 switch (mod) {
1125 case 0x0:
1126 if (rm == 0x6) {
1127 insn->eaBase = EA_BASE_NONE;
1128 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001129 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001130 return -1;
1131 } else {
1132 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1133 insn->eaDisplacement = EA_DISP_NONE;
1134 }
1135 break;
1136 case 0x1:
1137 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1138 insn->eaDisplacement = EA_DISP_8;
Sean Callanan010b3732010-04-02 21:23:51 +00001139 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001140 return -1;
1141 break;
1142 case 0x2:
1143 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1144 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001145 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001146 return -1;
1147 break;
1148 case 0x3:
1149 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001150 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001151 return -1;
1152 break;
1153 }
1154 break;
1155 case 4:
1156 case 8:
1157 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001158
Sean Callanan04cc3072009-12-19 02:59:52 +00001159 switch (mod) {
1160 case 0x0:
1161 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1162 switch (rm) {
1163 case 0x4:
1164 case 0xc: /* in case REXW.b is set */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001165 insn->eaBase = (insn->addressSize == 4 ?
Sean Callanan04cc3072009-12-19 02:59:52 +00001166 EA_BASE_sib : EA_BASE_sib64);
1167 readSIB(insn);
Sean Callanan010b3732010-04-02 21:23:51 +00001168 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001169 return -1;
1170 break;
1171 case 0x5:
1172 insn->eaBase = EA_BASE_NONE;
1173 insn->eaDisplacement = EA_DISP_32;
Sean Callanan010b3732010-04-02 21:23:51 +00001174 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001175 return -1;
1176 break;
1177 default:
1178 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1179 break;
1180 }
1181 break;
1182 case 0x1:
1183 case 0x2:
1184 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1185 switch (rm) {
1186 case 0x4:
1187 case 0xc: /* in case REXW.b is set */
1188 insn->eaBase = EA_BASE_sib;
1189 readSIB(insn);
Sean Callanan010b3732010-04-02 21:23:51 +00001190 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001191 return -1;
1192 break;
1193 default:
1194 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001195 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001196 return -1;
1197 break;
1198 }
1199 break;
1200 case 0x3:
1201 insn->eaDisplacement = EA_DISP_NONE;
1202 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1203 break;
1204 }
1205 break;
1206 } /* switch (insn->addressSize) */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001207
Sean Callanan04cc3072009-12-19 02:59:52 +00001208 return 0;
1209}
1210
1211#define GENERIC_FIXUP_FUNC(name, base, prefix) \
1212 static uint8_t name(struct InternalInstruction *insn, \
1213 OperandType type, \
1214 uint8_t index, \
1215 uint8_t *valid) { \
1216 *valid = 1; \
1217 switch (type) { \
1218 default: \
Sean Callanan010b3732010-04-02 21:23:51 +00001219 debug("Unhandled register type"); \
1220 *valid = 0; \
1221 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001222 case TYPE_Rv: \
1223 return base + index; \
1224 case TYPE_R8: \
Sean Callanan010b3732010-04-02 21:23:51 +00001225 if (insn->rexPrefix && \
Sean Callanan04cc3072009-12-19 02:59:52 +00001226 index >= 4 && index <= 7) { \
1227 return prefix##_SPL + (index - 4); \
1228 } else { \
1229 return prefix##_AL + index; \
1230 } \
1231 case TYPE_R16: \
1232 return prefix##_AX + index; \
1233 case TYPE_R32: \
1234 return prefix##_EAX + index; \
1235 case TYPE_R64: \
1236 return prefix##_RAX + index; \
Sean Callananc3fd5232011-03-15 01:23:15 +00001237 case TYPE_XMM256: \
1238 return prefix##_YMM0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001239 case TYPE_XMM128: \
1240 case TYPE_XMM64: \
1241 case TYPE_XMM32: \
1242 case TYPE_XMM: \
1243 return prefix##_XMM0 + index; \
1244 case TYPE_MM64: \
1245 case TYPE_MM32: \
1246 case TYPE_MM: \
Sean Callanan010b3732010-04-02 21:23:51 +00001247 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001248 *valid = 0; \
1249 return prefix##_MM0 + index; \
1250 case TYPE_SEGMENTREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001251 if (index > 5) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001252 *valid = 0; \
1253 return prefix##_ES + index; \
1254 case TYPE_DEBUGREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001255 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001256 *valid = 0; \
1257 return prefix##_DR0 + index; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001258 case TYPE_CONTROLREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001259 if (index > 8) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001260 *valid = 0; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001261 return prefix##_CR0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001262 } \
1263 }
1264
1265/*
1266 * fixup*Value - Consults an operand type to determine the meaning of the
1267 * reg or R/M field. If the operand is an XMM operand, for example, an
1268 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1269 * misinterpret it as.
1270 *
1271 * @param insn - The instruction containing the operand.
1272 * @param type - The operand type.
1273 * @param index - The existing value of the field as reported by readModRM().
1274 * @param valid - The address of a uint8_t. The target is set to 1 if the
1275 * field is valid for the register class; 0 if not.
Sean Callanan010b3732010-04-02 21:23:51 +00001276 * @return - The proper value.
Sean Callanan04cc3072009-12-19 02:59:52 +00001277 */
Sean Callanan2f9443f2009-12-22 02:07:42 +00001278GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan04cc3072009-12-19 02:59:52 +00001279GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1280
1281/*
1282 * fixupReg - Consults an operand specifier to determine which of the
1283 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1284 *
1285 * @param insn - See fixup*Value().
1286 * @param op - The operand specifier.
1287 * @return - 0 if fixup was successful; -1 if the register returned was
1288 * invalid for its class.
1289 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001290static int fixupReg(struct InternalInstruction *insn,
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +00001291 const struct OperandSpecifier *op) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001292 uint8_t valid;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001293
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001294 dbgprintf(insn, "fixupReg()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001295
Sean Callanan04cc3072009-12-19 02:59:52 +00001296 switch ((OperandEncoding)op->encoding) {
1297 default:
Sean Callanan010b3732010-04-02 21:23:51 +00001298 debug("Expected a REG or R/M encoding in fixupReg");
1299 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +00001300 case ENCODING_VVVV:
1301 insn->vvvv = (Reg)fixupRegValue(insn,
1302 (OperandType)op->type,
1303 insn->vvvv,
1304 &valid);
1305 if (!valid)
1306 return -1;
1307 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001308 case ENCODING_REG:
1309 insn->reg = (Reg)fixupRegValue(insn,
1310 (OperandType)op->type,
1311 insn->reg - insn->regBase,
1312 &valid);
1313 if (!valid)
1314 return -1;
1315 break;
1316 case ENCODING_RM:
1317 if (insn->eaBase >= insn->eaRegBase) {
1318 insn->eaBase = (EABase)fixupRMValue(insn,
1319 (OperandType)op->type,
1320 insn->eaBase - insn->eaRegBase,
1321 &valid);
1322 if (!valid)
1323 return -1;
1324 }
1325 break;
1326 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001327
Sean Callanan04cc3072009-12-19 02:59:52 +00001328 return 0;
1329}
1330
1331/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001332 * readOpcodeModifier - Reads an operand from the opcode field of an
Sean Callanan04cc3072009-12-19 02:59:52 +00001333 * instruction. Handles AddRegFrm instructions.
1334 *
1335 * @param insn - The instruction whose opcode field is to be read.
1336 * @param inModRM - Indicates that the opcode field is to be read from the
1337 * ModR/M extension; useful for escape opcodes
Sean Callanan010b3732010-04-02 21:23:51 +00001338 * @return - 0 on success; nonzero otherwise.
Sean Callanan04cc3072009-12-19 02:59:52 +00001339 */
Sean Callanan010b3732010-04-02 21:23:51 +00001340static int readOpcodeModifier(struct InternalInstruction* insn) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001341 dbgprintf(insn, "readOpcodeModifier()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001342
Sean Callanan04cc3072009-12-19 02:59:52 +00001343 if (insn->consumedOpcodeModifier)
Sean Callanan010b3732010-04-02 21:23:51 +00001344 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001345
Sean Callanan04cc3072009-12-19 02:59:52 +00001346 insn->consumedOpcodeModifier = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001347
Sean Callanan010b3732010-04-02 21:23:51 +00001348 switch (insn->spec->modifierType) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001349 default:
Sean Callanan010b3732010-04-02 21:23:51 +00001350 debug("Unknown modifier type.");
1351 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001352 case MODIFIER_NONE:
Sean Callanan010b3732010-04-02 21:23:51 +00001353 debug("No modifier but an operand expects one.");
1354 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001355 case MODIFIER_OPCODE:
1356 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
Sean Callanan010b3732010-04-02 21:23:51 +00001357 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001358 case MODIFIER_MODRM:
1359 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
Sean Callanan010b3732010-04-02 21:23:51 +00001360 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001361 }
Sean Callanan04cc3072009-12-19 02:59:52 +00001362}
1363
1364/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001365 * readOpcodeRegister - Reads an operand from the opcode field of an
Sean Callanan04cc3072009-12-19 02:59:52 +00001366 * instruction and interprets it appropriately given the operand width.
1367 * Handles AddRegFrm instructions.
1368 *
1369 * @param insn - See readOpcodeModifier().
1370 * @param size - The width (in bytes) of the register being specified.
1371 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1372 * RAX.
Sean Callanan010b3732010-04-02 21:23:51 +00001373 * @return - 0 on success; nonzero otherwise.
Sean Callanan04cc3072009-12-19 02:59:52 +00001374 */
Sean Callanan010b3732010-04-02 21:23:51 +00001375static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001376 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001377
Sean Callanan010b3732010-04-02 21:23:51 +00001378 if (readOpcodeModifier(insn))
1379 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001380
Sean Callanan04cc3072009-12-19 02:59:52 +00001381 if (size == 0)
1382 size = insn->registerSize;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001383
Sean Callanan04cc3072009-12-19 02:59:52 +00001384 switch (size) {
1385 case 1:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001386 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
Sean Callanan2f9443f2009-12-22 02:07:42 +00001387 | insn->opcodeModifier));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001388 if (insn->rexPrefix &&
Sean Callanan010b3732010-04-02 21:23:51 +00001389 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1390 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan2f9443f2009-12-22 02:07:42 +00001391 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1392 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan04cc3072009-12-19 02:59:52 +00001393 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001394
Sean Callanan04cc3072009-12-19 02:59:52 +00001395 break;
1396 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001397 insn->opcodeRegister = (Reg)(MODRM_REG_AX
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001398 + ((bFromREX(insn->rexPrefix) << 3)
Sean Callanan2f9443f2009-12-22 02:07:42 +00001399 | insn->opcodeModifier));
Sean Callanan04cc3072009-12-19 02:59:52 +00001400 break;
1401 case 4:
Sean Callanan010b3732010-04-02 21:23:51 +00001402 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001403 + ((bFromREX(insn->rexPrefix) << 3)
Sean Callanan2f9443f2009-12-22 02:07:42 +00001404 | insn->opcodeModifier));
Sean Callanan04cc3072009-12-19 02:59:52 +00001405 break;
1406 case 8:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001407 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1408 + ((bFromREX(insn->rexPrefix) << 3)
Sean Callanan2f9443f2009-12-22 02:07:42 +00001409 | insn->opcodeModifier));
Sean Callanan04cc3072009-12-19 02:59:52 +00001410 break;
1411 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001412
Sean Callanan010b3732010-04-02 21:23:51 +00001413 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001414}
1415
1416/*
1417 * readImmediate - Consumes an immediate operand from an instruction, given the
1418 * desired operand size.
1419 *
1420 * @param insn - The instruction whose operand is to be read.
1421 * @param size - The width (in bytes) of the operand.
1422 * @return - 0 if the immediate was successfully consumed; nonzero
1423 * otherwise.
1424 */
1425static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1426 uint8_t imm8;
1427 uint16_t imm16;
1428 uint32_t imm32;
1429 uint64_t imm64;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001430
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001431 dbgprintf(insn, "readImmediate()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001432
Sean Callanan010b3732010-04-02 21:23:51 +00001433 if (insn->numImmediatesConsumed == 2) {
1434 debug("Already consumed two immediates");
1435 return -1;
1436 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001437
Sean Callanan04cc3072009-12-19 02:59:52 +00001438 if (size == 0)
1439 size = insn->immediateSize;
1440 else
1441 insn->immediateSize = size;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001442 insn->immediateOffset = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001443
Sean Callanan04cc3072009-12-19 02:59:52 +00001444 switch (size) {
1445 case 1:
1446 if (consumeByte(insn, &imm8))
1447 return -1;
1448 insn->immediates[insn->numImmediatesConsumed] = imm8;
1449 break;
1450 case 2:
1451 if (consumeUInt16(insn, &imm16))
1452 return -1;
1453 insn->immediates[insn->numImmediatesConsumed] = imm16;
1454 break;
1455 case 4:
1456 if (consumeUInt32(insn, &imm32))
1457 return -1;
1458 insn->immediates[insn->numImmediatesConsumed] = imm32;
1459 break;
1460 case 8:
1461 if (consumeUInt64(insn, &imm64))
1462 return -1;
1463 insn->immediates[insn->numImmediatesConsumed] = imm64;
1464 break;
1465 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001466
Sean Callanan04cc3072009-12-19 02:59:52 +00001467 insn->numImmediatesConsumed++;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001468
Sean Callanan04cc3072009-12-19 02:59:52 +00001469 return 0;
1470}
1471
1472/*
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001473 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
Sean Callananc3fd5232011-03-15 01:23:15 +00001474 *
1475 * @param insn - The instruction whose operand is to be read.
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001476 * @return - 0 if the vvvv was successfully consumed; nonzero
Sean Callananc3fd5232011-03-15 01:23:15 +00001477 * otherwise.
1478 */
1479static int readVVVV(struct InternalInstruction* insn) {
1480 dbgprintf(insn, "readVVVV()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001481
Sean Callananc3fd5232011-03-15 01:23:15 +00001482 if (insn->vexSize == 3)
1483 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
1484 else if (insn->vexSize == 2)
1485 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
1486 else
1487 return -1;
1488
Craig Topper0d0be472011-10-03 08:14:29 +00001489 if (insn->mode != MODE_64BIT)
1490 insn->vvvv &= 0x7;
1491
Sean Callananc3fd5232011-03-15 01:23:15 +00001492 return 0;
1493}
1494
1495/*
Sean Callanan04cc3072009-12-19 02:59:52 +00001496 * readOperands - Consults the specifier for an instruction and consumes all
1497 * operands for that instruction, interpreting them as it goes.
1498 *
1499 * @param insn - The instruction whose operands are to be read and interpreted.
1500 * @return - 0 if all operands could be read; nonzero otherwise.
1501 */
1502static int readOperands(struct InternalInstruction* insn) {
1503 int index;
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001504 int hasVVVV, needVVVV;
Craig Topper2ba766a2011-12-30 06:23:39 +00001505 int sawRegImm = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001506
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001507 dbgprintf(insn, "readOperands()");
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001508
1509 /* If non-zero vvvv specified, need to make sure one of the operands
1510 uses it. */
1511 hasVVVV = !readVVVV(insn);
1512 needVVVV = hasVVVV && (insn->vvvv != 0);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001513
Sean Callanan04cc3072009-12-19 02:59:52 +00001514 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
Craig Topperb8aec082012-08-01 07:39:18 +00001515 switch (x86OperandSets[insn->spec->operands][index].encoding) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001516 case ENCODING_NONE:
1517 break;
1518 case ENCODING_REG:
1519 case ENCODING_RM:
1520 if (readModRM(insn))
1521 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001522 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
Sean Callanan04cc3072009-12-19 02:59:52 +00001523 return -1;
1524 break;
1525 case ENCODING_CB:
1526 case ENCODING_CW:
1527 case ENCODING_CD:
1528 case ENCODING_CP:
1529 case ENCODING_CO:
1530 case ENCODING_CT:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001531 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan04cc3072009-12-19 02:59:52 +00001532 return -1;
1533 case ENCODING_IB:
Craig Topper2ba766a2011-12-30 06:23:39 +00001534 if (sawRegImm) {
Benjamin Kramer9c48f262012-01-04 22:06:45 +00001535 /* Saw a register immediate so don't read again and instead split the
1536 previous immediate. FIXME: This is a hack. */
Benjamin Kramer47aecca2012-01-01 17:55:36 +00001537 insn->immediates[insn->numImmediatesConsumed] =
1538 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1539 ++insn->numImmediatesConsumed;
Craig Topper2ba766a2011-12-30 06:23:39 +00001540 break;
1541 }
Sean Callanan04cc3072009-12-19 02:59:52 +00001542 if (readImmediate(insn, 1))
1543 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001544 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
Sean Callanan1efe6612010-04-07 21:42:19 +00001545 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1546 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001547 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
Craig Topper7629d632012-04-03 05:20:24 +00001548 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1549 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001550 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1551 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
Craig Topper2ba766a2011-12-30 06:23:39 +00001552 sawRegImm = 1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001553 break;
1554 case ENCODING_IW:
1555 if (readImmediate(insn, 2))
1556 return -1;
1557 break;
1558 case ENCODING_ID:
1559 if (readImmediate(insn, 4))
1560 return -1;
1561 break;
1562 case ENCODING_IO:
1563 if (readImmediate(insn, 8))
1564 return -1;
1565 break;
1566 case ENCODING_Iv:
Sean Callanan010b3732010-04-02 21:23:51 +00001567 if (readImmediate(insn, insn->immediateSize))
1568 return -1;
Chris Lattnerd4758fc2010-04-16 21:15:15 +00001569 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001570 case ENCODING_Ia:
Sean Callanan010b3732010-04-02 21:23:51 +00001571 if (readImmediate(insn, insn->addressSize))
1572 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001573 break;
1574 case ENCODING_RB:
Sean Callanan010b3732010-04-02 21:23:51 +00001575 if (readOpcodeRegister(insn, 1))
1576 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001577 break;
1578 case ENCODING_RW:
Sean Callanan010b3732010-04-02 21:23:51 +00001579 if (readOpcodeRegister(insn, 2))
1580 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001581 break;
1582 case ENCODING_RD:
Sean Callanan010b3732010-04-02 21:23:51 +00001583 if (readOpcodeRegister(insn, 4))
1584 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001585 break;
1586 case ENCODING_RO:
Sean Callanan010b3732010-04-02 21:23:51 +00001587 if (readOpcodeRegister(insn, 8))
1588 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001589 break;
1590 case ENCODING_Rv:
Sean Callanan010b3732010-04-02 21:23:51 +00001591 if (readOpcodeRegister(insn, 0))
1592 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001593 break;
1594 case ENCODING_I:
Sean Callanan010b3732010-04-02 21:23:51 +00001595 if (readOpcodeModifier(insn))
1596 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +00001597 break;
1598 case ENCODING_VVVV:
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001599 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1600 if (!hasVVVV)
Sean Callananc3fd5232011-03-15 01:23:15 +00001601 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001602 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
Sean Callananc3fd5232011-03-15 01:23:15 +00001603 return -1;
1604 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001605 case ENCODING_DUP:
1606 break;
1607 default:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001608 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan04cc3072009-12-19 02:59:52 +00001609 return -1;
1610 }
1611 }
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001612
1613 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1614 if (needVVVV) return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001615
Sean Callanan04cc3072009-12-19 02:59:52 +00001616 return 0;
1617}
1618
1619/*
1620 * decodeInstruction - Reads and interprets a full instruction provided by the
1621 * user.
1622 *
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001623 * @param insn - A pointer to the instruction to be populated. Must be
Sean Callanan04cc3072009-12-19 02:59:52 +00001624 * pre-allocated.
1625 * @param reader - The function to be used to read the instruction's bytes.
1626 * @param readerArg - A generic argument to be passed to the reader to store
1627 * any internal state.
1628 * @param logger - If non-NULL, the function to be used to write log messages
1629 * and warnings.
1630 * @param loggerArg - A generic argument to be passed to the logger to store
1631 * any internal state.
1632 * @param startLoc - The address (in the reader's address space) of the first
1633 * byte in the instruction.
1634 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1635 * decode the instruction in.
1636 * @return - 0 if the instruction's memory could be read; nonzero if
1637 * not.
1638 */
1639int decodeInstruction(struct InternalInstruction* insn,
1640 byteReader_t reader,
Roman Divacky67923802012-09-05 21:17:34 +00001641 const void* readerArg,
Sean Callanan04cc3072009-12-19 02:59:52 +00001642 dlog_t logger,
1643 void* loggerArg,
Roman Divacky67923802012-09-05 21:17:34 +00001644 const void* miiArg,
Sean Callanan04cc3072009-12-19 02:59:52 +00001645 uint64_t startLoc,
1646 DisassemblerMode mode) {
Daniel Dunbarc745a622009-12-19 03:31:50 +00001647 memset(insn, 0, sizeof(struct InternalInstruction));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001648
Sean Callanan04cc3072009-12-19 02:59:52 +00001649 insn->reader = reader;
1650 insn->readerArg = readerArg;
1651 insn->dlog = logger;
1652 insn->dlogArg = loggerArg;
1653 insn->startLocation = startLoc;
1654 insn->readerCursor = startLoc;
1655 insn->mode = mode;
1656 insn->numImmediatesConsumed = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001657
Sean Callanan04cc3072009-12-19 02:59:52 +00001658 if (readPrefixes(insn) ||
1659 readOpcode(insn) ||
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001660 getID(insn, miiArg) ||
Sean Callanan04cc3072009-12-19 02:59:52 +00001661 insn->instructionID == 0 ||
1662 readOperands(insn))
1663 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001664
1665 insn->operands = &x86OperandSets[insn->spec->operands][0];
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001666
Sean Callanan04cc3072009-12-19 02:59:52 +00001667 insn->length = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001668
Benjamin Kramer4f672272010-03-18 12:18:36 +00001669 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1670 startLoc, insn->readerCursor, insn->length);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001671
Sean Callanan04cc3072009-12-19 02:59:52 +00001672 if (insn->length > 15)
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001673 dbgprintf(insn, "Instruction exceeds 15-byte limit");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001674
Sean Callanan04cc3072009-12-19 02:59:52 +00001675 return 0;
1676}