blob: 602087756b23b9aa1bdb9af638f49ef5e02ed40c [file] [log] [blame]
Jia Liub22310f2012-02-18 12:03:15 +00001/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
Sean Callanan04cc3072009-12-19 02:59:52 +00002 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
Sean Callanan04cc3072009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbarc745a622009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan04cc3072009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
23#include "X86GenDisassemblerTables.inc"
24
25#define TRUE 1
26#define FALSE 0
27
Sean Callanan010b3732010-04-02 21:23:51 +000028typedef int8_t bool;
29
Sean Callanan010b3732010-04-02 21:23:51 +000030#ifndef NDEBUG
31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32#else
33#define debug(s) do { } while (0)
34#endif
35
Sean Callanan04cc3072009-12-19 02:59:52 +000036
37/*
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
40 *
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
44 */
Sean Callanan588785c2009-12-22 22:51:40 +000045static InstructionContext contextForAttrs(uint8_t attrMask) {
Sean Callanan04cc3072009-12-19 02:59:52 +000046 return CONTEXTS_SYM[attrMask];
47}
48
49/*
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
52 *
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
55 * contextForAttrs.
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
59 */
Sean Callanan588785c2009-12-22 22:51:40 +000060static int modRMRequired(OpcodeType type,
Craig Topper21c33652011-10-02 16:56:09 +000061 InstructionContext insnContext,
62 uint8_t opcode) {
Daniel Dunbar8b532de2009-12-22 01:41:37 +000063 const struct ContextDecision* decision = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +000064
65 switch (type) {
66 case ONEBYTE:
67 decision = &ONEBYTE_SYM;
68 break;
69 case TWOBYTE:
70 decision = &TWOBYTE_SYM;
71 break;
72 case THREEBYTE_38:
73 decision = &THREEBYTE38_SYM;
74 break;
75 case THREEBYTE_3A:
76 decision = &THREEBYTE3A_SYM;
77 break;
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +000078 case THREEBYTE_A6:
79 decision = &THREEBYTEA6_SYM;
80 break;
81 case THREEBYTE_A7:
82 decision = &THREEBYTEA7_SYM;
83 break;
Sean Callanan04cc3072009-12-19 02:59:52 +000084 }
Ahmed Charles636a3d62012-02-19 11:37:01 +000085
Sean Callanan04cc3072009-12-19 02:59:52 +000086 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87 modrm_type != MODRM_ONEENTRY;
Sean Callanan04cc3072009-12-19 02:59:52 +000088}
89
90/*
91 * decode - Reads the appropriate instruction table to obtain the unique ID of
92 * an instruction.
93 *
94 * @param type - See modRMRequired().
95 * @param insnContext - See modRMRequired().
96 * @param opcode - See modRMRequired().
97 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanan010b3732010-04-02 21:23:51 +000098 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan04cc3072009-12-19 02:59:52 +000099 */
Sean Callanan588785c2009-12-22 22:51:40 +0000100static InstrUID decode(OpcodeType type,
Sean Callanan010b3732010-04-02 21:23:51 +0000101 InstructionContext insnContext,
102 uint8_t opcode,
103 uint8_t modRM) {
Duncan Sandsae22c602012-02-05 14:20:11 +0000104 const struct ModRMDecision* dec = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000105
106 switch (type) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000107 case ONEBYTE:
108 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
109 break;
110 case TWOBYTE:
111 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112 break;
113 case THREEBYTE_38:
114 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115 break;
116 case THREEBYTE_3A:
117 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118 break;
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000119 case THREEBYTE_A6:
120 dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121 break;
122 case THREEBYTE_A7:
123 dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124 break;
Sean Callanan04cc3072009-12-19 02:59:52 +0000125 }
126
127 switch (dec->modrm_type) {
128 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000129 debug("Corrupt table! Unknown modrm_type");
130 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000131 case MODRM_ONEENTRY:
Craig Topper487e7442012-02-09 07:45:30 +0000132 return modRMTable[dec->instructionIDs];
Sean Callanan04cc3072009-12-19 02:59:52 +0000133 case MODRM_SPLITRM:
134 if (modFromModRM(modRM) == 0x3)
Craig Topper487e7442012-02-09 07:45:30 +0000135 return modRMTable[dec->instructionIDs+1];
136 return modRMTable[dec->instructionIDs];
Craig Toppera0cd9702012-02-09 08:58:07 +0000137 case MODRM_SPLITREG:
138 if (modFromModRM(modRM) == 0x3)
139 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
140 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Sean Callanan04cc3072009-12-19 02:59:52 +0000141 case MODRM_FULL:
Craig Topper487e7442012-02-09 07:45:30 +0000142 return modRMTable[dec->instructionIDs+modRM];
Sean Callanan04cc3072009-12-19 02:59:52 +0000143 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000144}
145
146/*
147 * specifierForUID - Given a UID, returns the name and operand specification for
148 * that instruction.
149 *
150 * @param uid - The unique ID for the instruction. This should be returned by
151 * decode(); specifierForUID will not check bounds.
152 * @return - A pointer to the specification for that instruction.
153 */
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000154static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000155 return &INSTRUCTIONS_SYM[uid];
156}
157
158/*
159 * consumeByte - Uses the reader function provided by the user to consume one
160 * byte from the instruction's memory and advance the cursor.
161 *
162 * @param insn - The instruction with the reader function to use. The cursor
163 * for this instruction is advanced.
164 * @param byte - A pointer to a pre-allocated memory buffer to be populated
165 * with the data read.
166 * @return - 0 if the read was successful; nonzero otherwise.
167 */
Sean Callanan588785c2009-12-22 22:51:40 +0000168static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000169 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
170
171 if (!ret)
172 ++(insn->readerCursor);
173
174 return ret;
175}
176
177/*
178 * lookAtByte - Like consumeByte, but does not advance the cursor.
179 *
180 * @param insn - See consumeByte().
181 * @param byte - See consumeByte().
182 * @return - See consumeByte().
183 */
Sean Callanan588785c2009-12-22 22:51:40 +0000184static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000185 return insn->reader(insn->readerArg, byte, insn->readerCursor);
186}
187
Sean Callanan588785c2009-12-22 22:51:40 +0000188static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000189 insn->readerCursor--;
190}
191
Sean Callanan588785c2009-12-22 22:51:40 +0000192#define CONSUME_FUNC(name, type) \
193 static int name(struct InternalInstruction* insn, type* ptr) { \
194 type combined = 0; \
195 unsigned offset; \
196 for (offset = 0; offset < sizeof(type); ++offset) { \
197 uint8_t byte; \
198 int ret = insn->reader(insn->readerArg, \
199 &byte, \
200 insn->readerCursor + offset); \
201 if (ret) \
202 return ret; \
203 combined = combined | ((type)byte << ((type)offset * 8)); \
204 } \
205 *ptr = combined; \
206 insn->readerCursor += sizeof(type); \
207 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +0000208 }
209
210/*
211 * consume* - Use the reader function provided by the user to consume data
212 * values of various sizes from the instruction's memory and advance the
213 * cursor appropriately. These readers perform endian conversion.
214 *
215 * @param insn - See consumeByte().
216 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
217 * be populated with the data read.
218 * @return - See consumeByte().
219 */
220CONSUME_FUNC(consumeInt8, int8_t)
221CONSUME_FUNC(consumeInt16, int16_t)
222CONSUME_FUNC(consumeInt32, int32_t)
223CONSUME_FUNC(consumeUInt16, uint16_t)
224CONSUME_FUNC(consumeUInt32, uint32_t)
225CONSUME_FUNC(consumeUInt64, uint64_t)
226
227/*
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000228 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan04cc3072009-12-19 02:59:52 +0000229 * message, typically without a carriage-return.
230 *
231 * @param insn - The instruction containing the logging function.
232 * @param format - See printf().
233 * @param ... - See printf().
234 */
Sean Callanan588785c2009-12-22 22:51:40 +0000235static void dbgprintf(struct InternalInstruction* insn,
236 const char* format,
237 ...) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000238 char buffer[256];
239 va_list ap;
240
241 if (!insn->dlog)
242 return;
243
244 va_start(ap, format);
245 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
246 va_end(ap);
247
248 insn->dlog(insn->dlogArg, buffer);
249
250 return;
251}
252
253/*
254 * setPrefixPresent - Marks that a particular prefix is present at a particular
255 * location.
256 *
257 * @param insn - The instruction to be marked as having the prefix.
258 * @param prefix - The prefix that is present.
259 * @param location - The location where the prefix is located (in the address
260 * space of the instruction's reader).
261 */
Sean Callanan588785c2009-12-22 22:51:40 +0000262static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000263 uint8_t prefix,
264 uint64_t location)
265{
266 insn->prefixPresent[prefix] = 1;
267 insn->prefixLocations[prefix] = location;
268}
269
270/*
271 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
272 * present at a given location.
273 *
274 * @param insn - The instruction to be queried.
275 * @param prefix - The prefix.
276 * @param location - The location to query.
277 * @return - Whether the prefix is at that location.
278 */
Sean Callanan588785c2009-12-22 22:51:40 +0000279static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
280 uint8_t prefix,
281 uint64_t location)
Sean Callanan04cc3072009-12-19 02:59:52 +0000282{
283 if (insn->prefixPresent[prefix] == 1 &&
284 insn->prefixLocations[prefix] == location)
285 return TRUE;
286 else
287 return FALSE;
288}
289
290/*
291 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
292 * instruction as having them. Also sets the instruction's default operand,
293 * address, and other relevant data sizes to report operands correctly.
294 *
295 * @param insn - The instruction whose prefixes are to be read.
296 * @return - 0 if the instruction could be read until the end of the prefix
297 * bytes, and no prefixes conflicted; nonzero otherwise.
298 */
299static int readPrefixes(struct InternalInstruction* insn) {
300 BOOL isPrefix = TRUE;
301 BOOL prefixGroups[4] = { FALSE };
302 uint64_t prefixLocation;
Ted Kremenek3c4408c2011-01-23 17:05:06 +0000303 uint8_t byte = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000304
305 BOOL hasAdSize = FALSE;
306 BOOL hasOpSize = FALSE;
307
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000308 dbgprintf(insn, "readPrefixes()");
Sean Callanan04cc3072009-12-19 02:59:52 +0000309
310 while (isPrefix) {
311 prefixLocation = insn->readerCursor;
312
313 if (consumeByte(insn, &byte))
314 return -1;
Kevin Enderby014e1cd2012-03-09 17:52:49 +0000315
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000316 /*
317 * If the first byte is a LOCK prefix break and let it be disassembled
318 * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
319 * FIXME there is currently no way to get the disassembler to print the
320 * lock prefix if it is not the first byte.
321 */
Kevin Enderby014e1cd2012-03-09 17:52:49 +0000322 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
323 break;
Sean Callanan04cc3072009-12-19 02:59:52 +0000324
325 switch (byte) {
326 case 0xf0: /* LOCK */
327 case 0xf2: /* REPNE/REPNZ */
328 case 0xf3: /* REP or REPE/REPZ */
329 if (prefixGroups[0])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000330 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000331 prefixGroups[0] = TRUE;
332 setPrefixPresent(insn, byte, prefixLocation);
333 break;
334 case 0x2e: /* CS segment override -OR- Branch not taken */
335 case 0x36: /* SS segment override -OR- Branch taken */
336 case 0x3e: /* DS segment override */
337 case 0x26: /* ES segment override */
338 case 0x64: /* FS segment override */
339 case 0x65: /* GS segment override */
340 switch (byte) {
341 case 0x2e:
342 insn->segmentOverride = SEG_OVERRIDE_CS;
343 break;
344 case 0x36:
345 insn->segmentOverride = SEG_OVERRIDE_SS;
346 break;
347 case 0x3e:
348 insn->segmentOverride = SEG_OVERRIDE_DS;
349 break;
350 case 0x26:
351 insn->segmentOverride = SEG_OVERRIDE_ES;
352 break;
353 case 0x64:
354 insn->segmentOverride = SEG_OVERRIDE_FS;
355 break;
356 case 0x65:
357 insn->segmentOverride = SEG_OVERRIDE_GS;
358 break;
359 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000360 debug("Unhandled override");
361 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +0000362 }
363 if (prefixGroups[1])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000364 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000365 prefixGroups[1] = TRUE;
366 setPrefixPresent(insn, byte, prefixLocation);
367 break;
368 case 0x66: /* Operand-size override */
369 if (prefixGroups[2])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000370 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000371 prefixGroups[2] = TRUE;
372 hasOpSize = TRUE;
373 setPrefixPresent(insn, byte, prefixLocation);
374 break;
375 case 0x67: /* Address-size override */
376 if (prefixGroups[3])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000377 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000378 prefixGroups[3] = TRUE;
379 hasAdSize = TRUE;
380 setPrefixPresent(insn, byte, prefixLocation);
381 break;
382 default: /* Not a prefix byte */
383 isPrefix = FALSE;
384 break;
385 }
386
387 if (isPrefix)
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000388 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan04cc3072009-12-19 02:59:52 +0000389 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000390
391 insn->vexSize = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000392
Sean Callananc3fd5232011-03-15 01:23:15 +0000393 if (byte == 0xc4) {
394 uint8_t byte1;
Sean Callanan04cc3072009-12-19 02:59:52 +0000395
Sean Callananc3fd5232011-03-15 01:23:15 +0000396 if (lookAtByte(insn, &byte1)) {
397 dbgprintf(insn, "Couldn't read second byte of VEX");
398 return -1;
399 }
400
Craig Topper45faba92011-09-26 05:12:43 +0000401 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000402 insn->vexSize = 3;
403 insn->necessaryPrefixLocation = insn->readerCursor - 1;
404 }
405 else {
Sean Callanan04cc3072009-12-19 02:59:52 +0000406 unconsumeByte(insn);
407 insn->necessaryPrefixLocation = insn->readerCursor - 1;
408 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000409
410 if (insn->vexSize == 3) {
411 insn->vexPrefix[0] = byte;
412 consumeByte(insn, &insn->vexPrefix[1]);
413 consumeByte(insn, &insn->vexPrefix[2]);
414
415 /* We simulate the REX prefix for simplicity's sake */
Craig Topper31854ba2011-10-03 07:51:09 +0000416
417 if (insn->mode == MODE_64BIT) {
418 insn->rexPrefix = 0x40
419 | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
420 | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
421 | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
422 | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
423 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000424
425 switch (ppFromVEX3of3(insn->vexPrefix[2]))
426 {
427 default:
428 break;
429 case VEX_PREFIX_66:
430 hasOpSize = TRUE;
431 break;
432 }
433
434 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
435 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000436 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000437 else if (byte == 0xc5) {
438 uint8_t byte1;
439
440 if (lookAtByte(insn, &byte1)) {
441 dbgprintf(insn, "Couldn't read second byte of VEX");
442 return -1;
443 }
444
Craig Topper45faba92011-09-26 05:12:43 +0000445 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000446 insn->vexSize = 2;
447 }
448 else {
449 unconsumeByte(insn);
450 }
451
452 if (insn->vexSize == 2) {
453 insn->vexPrefix[0] = byte;
454 consumeByte(insn, &insn->vexPrefix[1]);
455
Craig Topper31854ba2011-10-03 07:51:09 +0000456 if (insn->mode == MODE_64BIT) {
457 insn->rexPrefix = 0x40
458 | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
459 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000460
461 switch (ppFromVEX2of2(insn->vexPrefix[1]))
462 {
463 default:
464 break;
465 case VEX_PREFIX_66:
466 hasOpSize = TRUE;
467 break;
468 }
469
470 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
471 }
472 }
473 else {
474 if (insn->mode == MODE_64BIT) {
475 if ((byte & 0xf0) == 0x40) {
476 uint8_t opcodeByte;
477
478 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
479 dbgprintf(insn, "Redundant REX prefix");
480 return -1;
481 }
482
483 insn->rexPrefix = byte;
484 insn->necessaryPrefixLocation = insn->readerCursor - 2;
485
486 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
487 } else {
488 unconsumeByte(insn);
489 insn->necessaryPrefixLocation = insn->readerCursor - 1;
490 }
491 } else {
492 unconsumeByte(insn);
493 insn->necessaryPrefixLocation = insn->readerCursor - 1;
494 }
495 }
496
Sean Callanan04cc3072009-12-19 02:59:52 +0000497 if (insn->mode == MODE_16BIT) {
498 insn->registerSize = (hasOpSize ? 4 : 2);
499 insn->addressSize = (hasAdSize ? 4 : 2);
500 insn->displacementSize = (hasAdSize ? 4 : 2);
501 insn->immediateSize = (hasOpSize ? 4 : 2);
502 } else if (insn->mode == MODE_32BIT) {
503 insn->registerSize = (hasOpSize ? 2 : 4);
504 insn->addressSize = (hasAdSize ? 2 : 4);
505 insn->displacementSize = (hasAdSize ? 2 : 4);
Sean Callanan9f6c6222010-10-22 01:24:11 +0000506 insn->immediateSize = (hasOpSize ? 2 : 4);
Sean Callanan04cc3072009-12-19 02:59:52 +0000507 } else if (insn->mode == MODE_64BIT) {
508 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
509 insn->registerSize = 8;
510 insn->addressSize = (hasAdSize ? 4 : 8);
511 insn->displacementSize = 4;
512 insn->immediateSize = 4;
513 } else if (insn->rexPrefix) {
514 insn->registerSize = (hasOpSize ? 2 : 4);
515 insn->addressSize = (hasAdSize ? 4 : 8);
516 insn->displacementSize = (hasOpSize ? 2 : 4);
517 insn->immediateSize = (hasOpSize ? 2 : 4);
518 } else {
519 insn->registerSize = (hasOpSize ? 2 : 4);
520 insn->addressSize = (hasAdSize ? 4 : 8);
521 insn->displacementSize = (hasOpSize ? 2 : 4);
522 insn->immediateSize = (hasOpSize ? 2 : 4);
523 }
524 }
525
526 return 0;
527}
528
529/*
530 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
531 * extended or escape opcodes).
532 *
533 * @param insn - The instruction whose opcode is to be read.
534 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
535 */
536static int readOpcode(struct InternalInstruction* insn) {
537 /* Determine the length of the primary opcode */
538
539 uint8_t current;
540
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000541 dbgprintf(insn, "readOpcode()");
Sean Callanan04cc3072009-12-19 02:59:52 +0000542
543 insn->opcodeType = ONEBYTE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000544
545 if (insn->vexSize == 3)
546 {
547 switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
548 {
549 default:
550 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
551 return -1;
552 case 0:
553 break;
554 case VEX_LOB_0F:
555 insn->twoByteEscape = 0x0f;
556 insn->opcodeType = TWOBYTE;
557 return consumeByte(insn, &insn->opcode);
558 case VEX_LOB_0F38:
559 insn->twoByteEscape = 0x0f;
560 insn->threeByteEscape = 0x38;
561 insn->opcodeType = THREEBYTE_38;
562 return consumeByte(insn, &insn->opcode);
563 case VEX_LOB_0F3A:
564 insn->twoByteEscape = 0x0f;
565 insn->threeByteEscape = 0x3a;
566 insn->opcodeType = THREEBYTE_3A;
567 return consumeByte(insn, &insn->opcode);
568 }
569 }
570 else if (insn->vexSize == 2)
571 {
572 insn->twoByteEscape = 0x0f;
573 insn->opcodeType = TWOBYTE;
574 return consumeByte(insn, &insn->opcode);
575 }
576
Sean Callanan04cc3072009-12-19 02:59:52 +0000577 if (consumeByte(insn, &current))
578 return -1;
579
580 if (current == 0x0f) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000581 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
Sean Callanan04cc3072009-12-19 02:59:52 +0000582
583 insn->twoByteEscape = current;
584
585 if (consumeByte(insn, &current))
586 return -1;
587
588 if (current == 0x38) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000589 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan04cc3072009-12-19 02:59:52 +0000590
591 insn->threeByteEscape = current;
592
593 if (consumeByte(insn, &current))
594 return -1;
595
596 insn->opcodeType = THREEBYTE_38;
597 } else if (current == 0x3a) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000598 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan04cc3072009-12-19 02:59:52 +0000599
600 insn->threeByteEscape = current;
601
602 if (consumeByte(insn, &current))
603 return -1;
604
605 insn->opcodeType = THREEBYTE_3A;
Joerg Sonnenbergerfc4789d2011-04-04 16:58:13 +0000606 } else if (current == 0xa6) {
607 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
608
609 insn->threeByteEscape = current;
610
611 if (consumeByte(insn, &current))
612 return -1;
613
614 insn->opcodeType = THREEBYTE_A6;
615 } else if (current == 0xa7) {
616 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
617
618 insn->threeByteEscape = current;
619
620 if (consumeByte(insn, &current))
621 return -1;
622
623 insn->opcodeType = THREEBYTE_A7;
Sean Callanan04cc3072009-12-19 02:59:52 +0000624 } else {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000625 dbgprintf(insn, "Didn't find a three-byte escape prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000626
627 insn->opcodeType = TWOBYTE;
628 }
629 }
630
631 /*
632 * At this point we have consumed the full opcode.
633 * Anything we consume from here on must be unconsumed.
634 */
635
636 insn->opcode = current;
637
638 return 0;
639}
640
641static int readModRM(struct InternalInstruction* insn);
642
643/*
644 * getIDWithAttrMask - Determines the ID of an instruction, consuming
645 * the ModR/M byte as appropriate for extended and escape opcodes,
646 * and using a supplied attribute mask.
647 *
648 * @param instructionID - A pointer whose target is filled in with the ID of the
649 * instruction.
650 * @param insn - The instruction whose ID is to be determined.
651 * @param attrMask - The attribute mask to search.
652 * @return - 0 if the ModR/M could be read when needed or was not
653 * needed; nonzero otherwise.
654 */
655static int getIDWithAttrMask(uint16_t* instructionID,
656 struct InternalInstruction* insn,
657 uint8_t attrMask) {
658 BOOL hasModRMExtension;
659
660 uint8_t instructionClass;
661
662 instructionClass = contextForAttrs(attrMask);
663
664 hasModRMExtension = modRMRequired(insn->opcodeType,
665 instructionClass,
666 insn->opcode);
667
668 if (hasModRMExtension) {
Rafael Espindola9f9a1062011-01-06 16:48:42 +0000669 if (readModRM(insn))
670 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +0000671
672 *instructionID = decode(insn->opcodeType,
673 instructionClass,
674 insn->opcode,
675 insn->modRM);
676 } else {
677 *instructionID = decode(insn->opcodeType,
678 instructionClass,
679 insn->opcode,
680 0);
681 }
682
683 return 0;
684}
685
686/*
687 * is16BitEquivalent - Determines whether two instruction names refer to
688 * equivalent instructions but one is 16-bit whereas the other is not.
689 *
690 * @param orig - The instruction that is not 16-bit
691 * @param equiv - The instruction that is 16-bit
692 */
693static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
694 off_t i;
695
Sean Callanan010b3732010-04-02 21:23:51 +0000696 for (i = 0;; i++) {
697 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000698 return TRUE;
Sean Callanan010b3732010-04-02 21:23:51 +0000699 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000700 return FALSE;
Sean Callanan010b3732010-04-02 21:23:51 +0000701 if (orig[i] != equiv[i]) {
702 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan04cc3072009-12-19 02:59:52 +0000703 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000704 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan04cc3072009-12-19 02:59:52 +0000705 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000706 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan04cc3072009-12-19 02:59:52 +0000707 continue;
708 return FALSE;
709 }
710 }
711}
712
713/*
Sean Callanan04cc3072009-12-19 02:59:52 +0000714 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
715 * appropriate for extended and escape opcodes. Determines the attributes and
716 * context for the instruction before doing so.
717 *
718 * @param insn - The instruction whose ID is to be determined.
719 * @return - 0 if the ModR/M could be read when needed or was not needed;
720 * nonzero otherwise.
721 */
Benjamin Kramer478e8de2012-02-11 14:50:54 +0000722static int getID(struct InternalInstruction* insn, void *miiArg) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000723 uint8_t attrMask;
724 uint16_t instructionID;
725
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000726 dbgprintf(insn, "getID()");
Sean Callanan04cc3072009-12-19 02:59:52 +0000727
728 attrMask = ATTR_NONE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000729
Sean Callanan04cc3072009-12-19 02:59:52 +0000730 if (insn->mode == MODE_64BIT)
731 attrMask |= ATTR_64BIT;
Sean Callananc3fd5232011-03-15 01:23:15 +0000732
733 if (insn->vexSize) {
734 attrMask |= ATTR_VEX;
735
736 if (insn->vexSize == 3) {
737 switch (ppFromVEX3of3(insn->vexPrefix[2])) {
738 case VEX_PREFIX_66:
739 attrMask |= ATTR_OPSIZE;
740 break;
741 case VEX_PREFIX_F3:
742 attrMask |= ATTR_XS;
743 break;
744 case VEX_PREFIX_F2:
745 attrMask |= ATTR_XD;
746 break;
747 }
748
Sean Callananc3fd5232011-03-15 01:23:15 +0000749 if (lFromVEX3of3(insn->vexPrefix[2]))
750 attrMask |= ATTR_VEXL;
751 }
752 else if (insn->vexSize == 2) {
753 switch (ppFromVEX2of2(insn->vexPrefix[1])) {
754 case VEX_PREFIX_66:
755 attrMask |= ATTR_OPSIZE;
756 break;
757 case VEX_PREFIX_F3:
758 attrMask |= ATTR_XS;
759 break;
760 case VEX_PREFIX_F2:
761 attrMask |= ATTR_XD;
762 break;
763 }
764
765 if (lFromVEX2of2(insn->vexPrefix[1]))
766 attrMask |= ATTR_VEXL;
767 }
768 else {
769 return -1;
770 }
771 }
772 else {
Sean Callananc3fd5232011-03-15 01:23:15 +0000773 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
774 attrMask |= ATTR_OPSIZE;
Craig Topper6491c802012-02-27 01:54:29 +0000775 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
776 attrMask |= ATTR_ADSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000777 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
778 attrMask |= ATTR_XS;
779 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
780 attrMask |= ATTR_XD;
Sean Callananc3fd5232011-03-15 01:23:15 +0000781 }
782
Craig Topperf18c8962011-10-04 06:30:42 +0000783 if (insn->rexPrefix & 0x08)
784 attrMask |= ATTR_REXW;
Craig Topperf01f1b52011-11-06 23:04:08 +0000785
Sean Callanan010b3732010-04-02 21:23:51 +0000786 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan04cc3072009-12-19 02:59:52 +0000787 return -1;
Craig Topperf01f1b52011-11-06 23:04:08 +0000788
Sean Callanan04cc3072009-12-19 02:59:52 +0000789 /* The following clauses compensate for limitations of the tables. */
Craig Topperf01f1b52011-11-06 23:04:08 +0000790
791 if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
792 !(attrMask & ATTR_OPSIZE)) {
Craig Topperf18c8962011-10-04 06:30:42 +0000793 /*
794 * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
795 * has precedence since there are no L-bit with W-bit entries in the tables.
796 * So if the L-bit isn't significant we should use the W-bit instead.
Craig Topperf01f1b52011-11-06 23:04:08 +0000797 * We only need to do this if the instruction doesn't specify OpSize since
798 * there is a VEX_L_W_OPSIZE table.
Craig Topperf18c8962011-10-04 06:30:42 +0000799 */
800
801 const struct InstructionSpecifier *spec;
802 uint16_t instructionIDWithWBit;
803 const struct InstructionSpecifier *specWithWBit;
804
805 spec = specifierForUID(instructionID);
806
807 if (getIDWithAttrMask(&instructionIDWithWBit,
808 insn,
809 (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) {
810 insn->instructionID = instructionID;
811 insn->spec = spec;
812 return 0;
813 }
814
815 specWithWBit = specifierForUID(instructionIDWithWBit);
816
817 if (instructionID != instructionIDWithWBit) {
818 insn->instructionID = instructionIDWithWBit;
819 insn->spec = specWithWBit;
820 } else {
821 insn->instructionID = instructionID;
822 insn->spec = spec;
823 }
824 return 0;
825 }
826
Sean Callanan04cc3072009-12-19 02:59:52 +0000827 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
828 /*
829 * The instruction tables make no distinction between instructions that
830 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
831 * particular spot (i.e., many MMX operations). In general we're
832 * conservative, but in the specific case where OpSize is present but not
833 * in the right place we check if there's a 16-bit operation.
834 */
835
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000836 const struct InstructionSpecifier *spec;
Sean Callanan04cc3072009-12-19 02:59:52 +0000837 uint16_t instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000838 const char *specName, *specWithOpSizeName;
Sean Callanan04cc3072009-12-19 02:59:52 +0000839
840 spec = specifierForUID(instructionID);
841
842 if (getIDWithAttrMask(&instructionIDWithOpsize,
843 insn,
844 attrMask | ATTR_OPSIZE)) {
845 /*
846 * ModRM required with OpSize but not present; give up and return version
847 * without OpSize set
848 */
849
850 insn->instructionID = instructionID;
851 insn->spec = spec;
852 return 0;
853 }
854
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000855 specName = x86DisassemblerGetInstrName(instructionID, miiArg);
856 specWithOpSizeName =
Benjamin Kramer478e8de2012-02-11 14:50:54 +0000857 x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
858
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000859 if (is16BitEquvalent(specName, specWithOpSizeName)) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000860 insn->instructionID = instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000861 insn->spec = specifierForUID(instructionIDWithOpsize);
Sean Callanan04cc3072009-12-19 02:59:52 +0000862 } else {
863 insn->instructionID = instructionID;
864 insn->spec = spec;
865 }
866 return 0;
867 }
Craig Topper21c33652011-10-02 16:56:09 +0000868
869 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
870 insn->rexPrefix & 0x01) {
871 /*
872 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
873 * it should decode as XCHG %r8, %eax.
874 */
875
876 const struct InstructionSpecifier *spec;
877 uint16_t instructionIDWithNewOpcode;
878 const struct InstructionSpecifier *specWithNewOpcode;
879
880 spec = specifierForUID(instructionID);
881
Craig Topperb58a9662011-10-05 03:29:32 +0000882 /* Borrow opcode from one of the other XCHGar opcodes */
Craig Topper21c33652011-10-02 16:56:09 +0000883 insn->opcode = 0x91;
884
885 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
886 insn,
887 attrMask)) {
888 insn->opcode = 0x90;
889
890 insn->instructionID = instructionID;
891 insn->spec = spec;
892 return 0;
893 }
894
895 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
896
Craig Topperb58a9662011-10-05 03:29:32 +0000897 /* Change back */
Craig Topper21c33652011-10-02 16:56:09 +0000898 insn->opcode = 0x90;
899
900 insn->instructionID = instructionIDWithNewOpcode;
901 insn->spec = specWithNewOpcode;
902
903 return 0;
904 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000905
906 insn->instructionID = instructionID;
907 insn->spec = specifierForUID(insn->instructionID);
908
909 return 0;
910}
911
912/*
913 * readSIB - Consumes the SIB byte to determine addressing information for an
914 * instruction.
915 *
916 * @param insn - The instruction whose SIB byte is to be read.
917 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
918 */
919static int readSIB(struct InternalInstruction* insn) {
Daniel Dunbar8b532de2009-12-22 01:41:37 +0000920 SIBIndex sibIndexBase = 0;
921 SIBBase sibBaseBase = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000922 uint8_t index, base;
923
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000924 dbgprintf(insn, "readSIB()");
Sean Callanan04cc3072009-12-19 02:59:52 +0000925
926 if (insn->consumedSIB)
927 return 0;
928
929 insn->consumedSIB = TRUE;
930
931 switch (insn->addressSize) {
932 case 2:
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000933 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan04cc3072009-12-19 02:59:52 +0000934 return -1;
935 break;
936 case 4:
937 sibIndexBase = SIB_INDEX_EAX;
938 sibBaseBase = SIB_BASE_EAX;
939 break;
940 case 8:
941 sibIndexBase = SIB_INDEX_RAX;
942 sibBaseBase = SIB_BASE_RAX;
943 break;
944 }
945
946 if (consumeByte(insn, &insn->sib))
947 return -1;
948
949 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
950
951 switch (index) {
952 case 0x4:
953 insn->sibIndex = SIB_INDEX_NONE;
954 break;
955 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +0000956 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
Sean Callanan04cc3072009-12-19 02:59:52 +0000957 if (insn->sibIndex == SIB_INDEX_sib ||
958 insn->sibIndex == SIB_INDEX_sib64)
959 insn->sibIndex = SIB_INDEX_NONE;
960 break;
961 }
962
963 switch (scaleFromSIB(insn->sib)) {
964 case 0:
965 insn->sibScale = 1;
966 break;
967 case 1:
968 insn->sibScale = 2;
969 break;
970 case 2:
971 insn->sibScale = 4;
972 break;
973 case 3:
974 insn->sibScale = 8;
975 break;
976 }
977
978 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
979
980 switch (base) {
981 case 0x5:
982 switch (modFromModRM(insn->modRM)) {
983 case 0x0:
984 insn->eaDisplacement = EA_DISP_32;
985 insn->sibBase = SIB_BASE_NONE;
986 break;
987 case 0x1:
988 insn->eaDisplacement = EA_DISP_8;
989 insn->sibBase = (insn->addressSize == 4 ?
990 SIB_BASE_EBP : SIB_BASE_RBP);
991 break;
992 case 0x2:
993 insn->eaDisplacement = EA_DISP_32;
994 insn->sibBase = (insn->addressSize == 4 ?
995 SIB_BASE_EBP : SIB_BASE_RBP);
996 break;
997 case 0x3:
Sean Callanan010b3732010-04-02 21:23:51 +0000998 debug("Cannot have Mod = 0b11 and a SIB byte");
999 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001000 }
1001 break;
1002 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +00001003 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001004 break;
1005 }
1006
1007 return 0;
1008}
1009
1010/*
1011 * readDisplacement - Consumes the displacement of an instruction.
1012 *
1013 * @param insn - The instruction whose displacement is to be read.
1014 * @return - 0 if the displacement byte was successfully read; nonzero
1015 * otherwise.
1016 */
1017static int readDisplacement(struct InternalInstruction* insn) {
1018 int8_t d8;
1019 int16_t d16;
1020 int32_t d32;
1021
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001022 dbgprintf(insn, "readDisplacement()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001023
1024 if (insn->consumedDisplacement)
1025 return 0;
1026
1027 insn->consumedDisplacement = TRUE;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001028 insn->displacementOffset = insn->readerCursor - insn->startLocation;
Sean Callanan04cc3072009-12-19 02:59:52 +00001029
1030 switch (insn->eaDisplacement) {
1031 case EA_DISP_NONE:
1032 insn->consumedDisplacement = FALSE;
1033 break;
1034 case EA_DISP_8:
1035 if (consumeInt8(insn, &d8))
1036 return -1;
1037 insn->displacement = d8;
1038 break;
1039 case EA_DISP_16:
1040 if (consumeInt16(insn, &d16))
1041 return -1;
1042 insn->displacement = d16;
1043 break;
1044 case EA_DISP_32:
1045 if (consumeInt32(insn, &d32))
1046 return -1;
1047 insn->displacement = d32;
1048 break;
1049 }
1050
1051 insn->consumedDisplacement = TRUE;
1052 return 0;
1053}
1054
1055/*
1056 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1057 * displacement) for an instruction and interprets it.
1058 *
1059 * @param insn - The instruction whose addressing information is to be read.
1060 * @return - 0 if the information was successfully read; nonzero otherwise.
1061 */
1062static int readModRM(struct InternalInstruction* insn) {
1063 uint8_t mod, rm, reg;
1064
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001065 dbgprintf(insn, "readModRM()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001066
1067 if (insn->consumedModRM)
1068 return 0;
1069
Rafael Espindola9f9a1062011-01-06 16:48:42 +00001070 if (consumeByte(insn, &insn->modRM))
1071 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001072 insn->consumedModRM = TRUE;
1073
1074 mod = modFromModRM(insn->modRM);
1075 rm = rmFromModRM(insn->modRM);
1076 reg = regFromModRM(insn->modRM);
1077
1078 /*
1079 * This goes by insn->registerSize to pick the correct register, which messes
1080 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1081 * fixupReg().
1082 */
1083 switch (insn->registerSize) {
1084 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001085 insn->regBase = MODRM_REG_AX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001086 insn->eaRegBase = EA_REG_AX;
1087 break;
1088 case 4:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001089 insn->regBase = MODRM_REG_EAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001090 insn->eaRegBase = EA_REG_EAX;
1091 break;
1092 case 8:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001093 insn->regBase = MODRM_REG_RAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001094 insn->eaRegBase = EA_REG_RAX;
1095 break;
1096 }
1097
1098 reg |= rFromREX(insn->rexPrefix) << 3;
1099 rm |= bFromREX(insn->rexPrefix) << 3;
1100
1101 insn->reg = (Reg)(insn->regBase + reg);
1102
1103 switch (insn->addressSize) {
1104 case 2:
1105 insn->eaBaseBase = EA_BASE_BX_SI;
1106
1107 switch (mod) {
1108 case 0x0:
1109 if (rm == 0x6) {
1110 insn->eaBase = EA_BASE_NONE;
1111 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001112 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001113 return -1;
1114 } else {
1115 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1116 insn->eaDisplacement = EA_DISP_NONE;
1117 }
1118 break;
1119 case 0x1:
1120 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1121 insn->eaDisplacement = EA_DISP_8;
Sean Callanan010b3732010-04-02 21:23:51 +00001122 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001123 return -1;
1124 break;
1125 case 0x2:
1126 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1127 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001128 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001129 return -1;
1130 break;
1131 case 0x3:
1132 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001133 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001134 return -1;
1135 break;
1136 }
1137 break;
1138 case 4:
1139 case 8:
1140 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
1141
1142 switch (mod) {
1143 case 0x0:
1144 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1145 switch (rm) {
1146 case 0x4:
1147 case 0xc: /* in case REXW.b is set */
1148 insn->eaBase = (insn->addressSize == 4 ?
1149 EA_BASE_sib : EA_BASE_sib64);
1150 readSIB(insn);
Sean Callanan010b3732010-04-02 21:23:51 +00001151 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001152 return -1;
1153 break;
1154 case 0x5:
1155 insn->eaBase = EA_BASE_NONE;
1156 insn->eaDisplacement = EA_DISP_32;
Sean Callanan010b3732010-04-02 21:23:51 +00001157 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001158 return -1;
1159 break;
1160 default:
1161 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1162 break;
1163 }
1164 break;
1165 case 0x1:
1166 case 0x2:
1167 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1168 switch (rm) {
1169 case 0x4:
1170 case 0xc: /* in case REXW.b is set */
1171 insn->eaBase = EA_BASE_sib;
1172 readSIB(insn);
Sean Callanan010b3732010-04-02 21:23:51 +00001173 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001174 return -1;
1175 break;
1176 default:
1177 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001178 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001179 return -1;
1180 break;
1181 }
1182 break;
1183 case 0x3:
1184 insn->eaDisplacement = EA_DISP_NONE;
1185 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1186 break;
1187 }
1188 break;
1189 } /* switch (insn->addressSize) */
1190
1191 return 0;
1192}
1193
1194#define GENERIC_FIXUP_FUNC(name, base, prefix) \
1195 static uint8_t name(struct InternalInstruction *insn, \
1196 OperandType type, \
1197 uint8_t index, \
1198 uint8_t *valid) { \
1199 *valid = 1; \
1200 switch (type) { \
1201 default: \
Sean Callanan010b3732010-04-02 21:23:51 +00001202 debug("Unhandled register type"); \
1203 *valid = 0; \
1204 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001205 case TYPE_Rv: \
1206 return base + index; \
1207 case TYPE_R8: \
Sean Callanan010b3732010-04-02 21:23:51 +00001208 if (insn->rexPrefix && \
Sean Callanan04cc3072009-12-19 02:59:52 +00001209 index >= 4 && index <= 7) { \
1210 return prefix##_SPL + (index - 4); \
1211 } else { \
1212 return prefix##_AL + index; \
1213 } \
1214 case TYPE_R16: \
1215 return prefix##_AX + index; \
1216 case TYPE_R32: \
1217 return prefix##_EAX + index; \
1218 case TYPE_R64: \
1219 return prefix##_RAX + index; \
Sean Callananc3fd5232011-03-15 01:23:15 +00001220 case TYPE_XMM256: \
1221 return prefix##_YMM0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001222 case TYPE_XMM128: \
1223 case TYPE_XMM64: \
1224 case TYPE_XMM32: \
1225 case TYPE_XMM: \
1226 return prefix##_XMM0 + index; \
1227 case TYPE_MM64: \
1228 case TYPE_MM32: \
1229 case TYPE_MM: \
Sean Callanan010b3732010-04-02 21:23:51 +00001230 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001231 *valid = 0; \
1232 return prefix##_MM0 + index; \
1233 case TYPE_SEGMENTREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001234 if (index > 5) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001235 *valid = 0; \
1236 return prefix##_ES + index; \
1237 case TYPE_DEBUGREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001238 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001239 *valid = 0; \
1240 return prefix##_DR0 + index; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001241 case TYPE_CONTROLREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001242 if (index > 8) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001243 *valid = 0; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001244 return prefix##_CR0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001245 } \
1246 }
1247
1248/*
1249 * fixup*Value - Consults an operand type to determine the meaning of the
1250 * reg or R/M field. If the operand is an XMM operand, for example, an
1251 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1252 * misinterpret it as.
1253 *
1254 * @param insn - The instruction containing the operand.
1255 * @param type - The operand type.
1256 * @param index - The existing value of the field as reported by readModRM().
1257 * @param valid - The address of a uint8_t. The target is set to 1 if the
1258 * field is valid for the register class; 0 if not.
Sean Callanan010b3732010-04-02 21:23:51 +00001259 * @return - The proper value.
Sean Callanan04cc3072009-12-19 02:59:52 +00001260 */
Sean Callanan2f9443f2009-12-22 02:07:42 +00001261GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan04cc3072009-12-19 02:59:52 +00001262GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1263
1264/*
1265 * fixupReg - Consults an operand specifier to determine which of the
1266 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1267 *
1268 * @param insn - See fixup*Value().
1269 * @param op - The operand specifier.
1270 * @return - 0 if fixup was successful; -1 if the register returned was
1271 * invalid for its class.
1272 */
1273static int fixupReg(struct InternalInstruction *insn,
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +00001274 const struct OperandSpecifier *op) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001275 uint8_t valid;
1276
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001277 dbgprintf(insn, "fixupReg()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001278
1279 switch ((OperandEncoding)op->encoding) {
1280 default:
Sean Callanan010b3732010-04-02 21:23:51 +00001281 debug("Expected a REG or R/M encoding in fixupReg");
1282 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +00001283 case ENCODING_VVVV:
1284 insn->vvvv = (Reg)fixupRegValue(insn,
1285 (OperandType)op->type,
1286 insn->vvvv,
1287 &valid);
1288 if (!valid)
1289 return -1;
1290 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001291 case ENCODING_REG:
1292 insn->reg = (Reg)fixupRegValue(insn,
1293 (OperandType)op->type,
1294 insn->reg - insn->regBase,
1295 &valid);
1296 if (!valid)
1297 return -1;
1298 break;
1299 case ENCODING_RM:
1300 if (insn->eaBase >= insn->eaRegBase) {
1301 insn->eaBase = (EABase)fixupRMValue(insn,
1302 (OperandType)op->type,
1303 insn->eaBase - insn->eaRegBase,
1304 &valid);
1305 if (!valid)
1306 return -1;
1307 }
1308 break;
1309 }
1310
1311 return 0;
1312}
1313
1314/*
1315 * readOpcodeModifier - Reads an operand from the opcode field of an
1316 * instruction. Handles AddRegFrm instructions.
1317 *
1318 * @param insn - The instruction whose opcode field is to be read.
1319 * @param inModRM - Indicates that the opcode field is to be read from the
1320 * ModR/M extension; useful for escape opcodes
Sean Callanan010b3732010-04-02 21:23:51 +00001321 * @return - 0 on success; nonzero otherwise.
Sean Callanan04cc3072009-12-19 02:59:52 +00001322 */
Sean Callanan010b3732010-04-02 21:23:51 +00001323static int readOpcodeModifier(struct InternalInstruction* insn) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001324 dbgprintf(insn, "readOpcodeModifier()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001325
1326 if (insn->consumedOpcodeModifier)
Sean Callanan010b3732010-04-02 21:23:51 +00001327 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001328
1329 insn->consumedOpcodeModifier = TRUE;
1330
Sean Callanan010b3732010-04-02 21:23:51 +00001331 switch (insn->spec->modifierType) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001332 default:
Sean Callanan010b3732010-04-02 21:23:51 +00001333 debug("Unknown modifier type.");
1334 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001335 case MODIFIER_NONE:
Sean Callanan010b3732010-04-02 21:23:51 +00001336 debug("No modifier but an operand expects one.");
1337 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001338 case MODIFIER_OPCODE:
1339 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
Sean Callanan010b3732010-04-02 21:23:51 +00001340 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001341 case MODIFIER_MODRM:
1342 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
Sean Callanan010b3732010-04-02 21:23:51 +00001343 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001344 }
1345}
1346
1347/*
1348 * readOpcodeRegister - Reads an operand from the opcode field of an
1349 * instruction and interprets it appropriately given the operand width.
1350 * Handles AddRegFrm instructions.
1351 *
1352 * @param insn - See readOpcodeModifier().
1353 * @param size - The width (in bytes) of the register being specified.
1354 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1355 * RAX.
Sean Callanan010b3732010-04-02 21:23:51 +00001356 * @return - 0 on success; nonzero otherwise.
Sean Callanan04cc3072009-12-19 02:59:52 +00001357 */
Sean Callanan010b3732010-04-02 21:23:51 +00001358static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001359 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001360
Sean Callanan010b3732010-04-02 21:23:51 +00001361 if (readOpcodeModifier(insn))
1362 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001363
1364 if (size == 0)
1365 size = insn->registerSize;
1366
1367 switch (size) {
1368 case 1:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001369 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1370 | insn->opcodeModifier));
Sean Callanan010b3732010-04-02 21:23:51 +00001371 if (insn->rexPrefix &&
1372 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1373 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan2f9443f2009-12-22 02:07:42 +00001374 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1375 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan04cc3072009-12-19 02:59:52 +00001376 }
1377
1378 break;
1379 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001380 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1381 + ((bFromREX(insn->rexPrefix) << 3)
1382 | insn->opcodeModifier));
Sean Callanan04cc3072009-12-19 02:59:52 +00001383 break;
1384 case 4:
Sean Callanan010b3732010-04-02 21:23:51 +00001385 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
Sean Callanan2f9443f2009-12-22 02:07:42 +00001386 + ((bFromREX(insn->rexPrefix) << 3)
1387 | insn->opcodeModifier));
Sean Callanan04cc3072009-12-19 02:59:52 +00001388 break;
1389 case 8:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001390 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1391 + ((bFromREX(insn->rexPrefix) << 3)
1392 | insn->opcodeModifier));
Sean Callanan04cc3072009-12-19 02:59:52 +00001393 break;
1394 }
Sean Callanan010b3732010-04-02 21:23:51 +00001395
1396 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001397}
1398
1399/*
1400 * readImmediate - Consumes an immediate operand from an instruction, given the
1401 * desired operand size.
1402 *
1403 * @param insn - The instruction whose operand is to be read.
1404 * @param size - The width (in bytes) of the operand.
1405 * @return - 0 if the immediate was successfully consumed; nonzero
1406 * otherwise.
1407 */
1408static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1409 uint8_t imm8;
1410 uint16_t imm16;
1411 uint32_t imm32;
1412 uint64_t imm64;
1413
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001414 dbgprintf(insn, "readImmediate()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001415
Sean Callanan010b3732010-04-02 21:23:51 +00001416 if (insn->numImmediatesConsumed == 2) {
1417 debug("Already consumed two immediates");
1418 return -1;
1419 }
Sean Callanan04cc3072009-12-19 02:59:52 +00001420
1421 if (size == 0)
1422 size = insn->immediateSize;
1423 else
1424 insn->immediateSize = size;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001425 insn->immediateOffset = insn->readerCursor - insn->startLocation;
Sean Callanan04cc3072009-12-19 02:59:52 +00001426
1427 switch (size) {
1428 case 1:
1429 if (consumeByte(insn, &imm8))
1430 return -1;
1431 insn->immediates[insn->numImmediatesConsumed] = imm8;
1432 break;
1433 case 2:
1434 if (consumeUInt16(insn, &imm16))
1435 return -1;
1436 insn->immediates[insn->numImmediatesConsumed] = imm16;
1437 break;
1438 case 4:
1439 if (consumeUInt32(insn, &imm32))
1440 return -1;
1441 insn->immediates[insn->numImmediatesConsumed] = imm32;
1442 break;
1443 case 8:
1444 if (consumeUInt64(insn, &imm64))
1445 return -1;
1446 insn->immediates[insn->numImmediatesConsumed] = imm64;
1447 break;
1448 }
1449
1450 insn->numImmediatesConsumed++;
1451
1452 return 0;
1453}
1454
1455/*
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001456 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
Sean Callananc3fd5232011-03-15 01:23:15 +00001457 *
1458 * @param insn - The instruction whose operand is to be read.
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001459 * @return - 0 if the vvvv was successfully consumed; nonzero
Sean Callananc3fd5232011-03-15 01:23:15 +00001460 * otherwise.
1461 */
1462static int readVVVV(struct InternalInstruction* insn) {
1463 dbgprintf(insn, "readVVVV()");
1464
1465 if (insn->vexSize == 3)
1466 insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
1467 else if (insn->vexSize == 2)
1468 insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
1469 else
1470 return -1;
1471
Craig Topper0d0be472011-10-03 08:14:29 +00001472 if (insn->mode != MODE_64BIT)
1473 insn->vvvv &= 0x7;
1474
Sean Callananc3fd5232011-03-15 01:23:15 +00001475 return 0;
1476}
1477
1478/*
Sean Callanan04cc3072009-12-19 02:59:52 +00001479 * readOperands - Consults the specifier for an instruction and consumes all
1480 * operands for that instruction, interpreting them as it goes.
1481 *
1482 * @param insn - The instruction whose operands are to be read and interpreted.
1483 * @return - 0 if all operands could be read; nonzero otherwise.
1484 */
1485static int readOperands(struct InternalInstruction* insn) {
1486 int index;
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001487 int hasVVVV, needVVVV;
Craig Topper2ba766a2011-12-30 06:23:39 +00001488 int sawRegImm = 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001489
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001490 dbgprintf(insn, "readOperands()");
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001491
1492 /* If non-zero vvvv specified, need to make sure one of the operands
1493 uses it. */
1494 hasVVVV = !readVVVV(insn);
1495 needVVVV = hasVVVV && (insn->vvvv != 0);
Sean Callanan04cc3072009-12-19 02:59:52 +00001496
1497 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1498 switch (insn->spec->operands[index].encoding) {
1499 case ENCODING_NONE:
1500 break;
1501 case ENCODING_REG:
1502 case ENCODING_RM:
1503 if (readModRM(insn))
1504 return -1;
1505 if (fixupReg(insn, &insn->spec->operands[index]))
1506 return -1;
1507 break;
1508 case ENCODING_CB:
1509 case ENCODING_CW:
1510 case ENCODING_CD:
1511 case ENCODING_CP:
1512 case ENCODING_CO:
1513 case ENCODING_CT:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001514 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan04cc3072009-12-19 02:59:52 +00001515 return -1;
1516 case ENCODING_IB:
Craig Topper2ba766a2011-12-30 06:23:39 +00001517 if (sawRegImm) {
Benjamin Kramer9c48f262012-01-04 22:06:45 +00001518 /* Saw a register immediate so don't read again and instead split the
1519 previous immediate. FIXME: This is a hack. */
Benjamin Kramer47aecca2012-01-01 17:55:36 +00001520 insn->immediates[insn->numImmediatesConsumed] =
1521 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1522 ++insn->numImmediatesConsumed;
Craig Topper2ba766a2011-12-30 06:23:39 +00001523 break;
1524 }
Sean Callanan04cc3072009-12-19 02:59:52 +00001525 if (readImmediate(insn, 1))
1526 return -1;
Sean Callanan1efe6612010-04-07 21:42:19 +00001527 if (insn->spec->operands[index].type == TYPE_IMM3 &&
1528 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1529 return -1;
Craig Topper7629d632012-04-03 05:20:24 +00001530 if (insn->spec->operands[index].type == TYPE_IMM5 &&
1531 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1532 return -1;
Craig Topper2ba766a2011-12-30 06:23:39 +00001533 if (insn->spec->operands[index].type == TYPE_XMM128 ||
1534 insn->spec->operands[index].type == TYPE_XMM256)
1535 sawRegImm = 1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001536 break;
1537 case ENCODING_IW:
1538 if (readImmediate(insn, 2))
1539 return -1;
1540 break;
1541 case ENCODING_ID:
1542 if (readImmediate(insn, 4))
1543 return -1;
1544 break;
1545 case ENCODING_IO:
1546 if (readImmediate(insn, 8))
1547 return -1;
1548 break;
1549 case ENCODING_Iv:
Sean Callanan010b3732010-04-02 21:23:51 +00001550 if (readImmediate(insn, insn->immediateSize))
1551 return -1;
Chris Lattnerd4758fc2010-04-16 21:15:15 +00001552 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001553 case ENCODING_Ia:
Sean Callanan010b3732010-04-02 21:23:51 +00001554 if (readImmediate(insn, insn->addressSize))
1555 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001556 break;
1557 case ENCODING_RB:
Sean Callanan010b3732010-04-02 21:23:51 +00001558 if (readOpcodeRegister(insn, 1))
1559 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001560 break;
1561 case ENCODING_RW:
Sean Callanan010b3732010-04-02 21:23:51 +00001562 if (readOpcodeRegister(insn, 2))
1563 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001564 break;
1565 case ENCODING_RD:
Sean Callanan010b3732010-04-02 21:23:51 +00001566 if (readOpcodeRegister(insn, 4))
1567 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001568 break;
1569 case ENCODING_RO:
Sean Callanan010b3732010-04-02 21:23:51 +00001570 if (readOpcodeRegister(insn, 8))
1571 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001572 break;
1573 case ENCODING_Rv:
Sean Callanan010b3732010-04-02 21:23:51 +00001574 if (readOpcodeRegister(insn, 0))
1575 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001576 break;
1577 case ENCODING_I:
Sean Callanan010b3732010-04-02 21:23:51 +00001578 if (readOpcodeModifier(insn))
1579 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +00001580 break;
1581 case ENCODING_VVVV:
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001582 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1583 if (!hasVVVV)
Sean Callananc3fd5232011-03-15 01:23:15 +00001584 return -1;
1585 if (fixupReg(insn, &insn->spec->operands[index]))
1586 return -1;
1587 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001588 case ENCODING_DUP:
1589 break;
1590 default:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001591 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan04cc3072009-12-19 02:59:52 +00001592 return -1;
1593 }
1594 }
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001595
1596 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1597 if (needVVVV) return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001598
1599 return 0;
1600}
1601
1602/*
1603 * decodeInstruction - Reads and interprets a full instruction provided by the
1604 * user.
1605 *
1606 * @param insn - A pointer to the instruction to be populated. Must be
1607 * pre-allocated.
1608 * @param reader - The function to be used to read the instruction's bytes.
1609 * @param readerArg - A generic argument to be passed to the reader to store
1610 * any internal state.
1611 * @param logger - If non-NULL, the function to be used to write log messages
1612 * and warnings.
1613 * @param loggerArg - A generic argument to be passed to the logger to store
1614 * any internal state.
1615 * @param startLoc - The address (in the reader's address space) of the first
1616 * byte in the instruction.
1617 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1618 * decode the instruction in.
1619 * @return - 0 if the instruction's memory could be read; nonzero if
1620 * not.
1621 */
1622int decodeInstruction(struct InternalInstruction* insn,
1623 byteReader_t reader,
1624 void* readerArg,
1625 dlog_t logger,
1626 void* loggerArg,
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001627 void* miiArg,
Sean Callanan04cc3072009-12-19 02:59:52 +00001628 uint64_t startLoc,
1629 DisassemblerMode mode) {
Daniel Dunbarc745a622009-12-19 03:31:50 +00001630 memset(insn, 0, sizeof(struct InternalInstruction));
Sean Callanan04cc3072009-12-19 02:59:52 +00001631
1632 insn->reader = reader;
1633 insn->readerArg = readerArg;
1634 insn->dlog = logger;
1635 insn->dlogArg = loggerArg;
1636 insn->startLocation = startLoc;
1637 insn->readerCursor = startLoc;
1638 insn->mode = mode;
1639 insn->numImmediatesConsumed = 0;
1640
1641 if (readPrefixes(insn) ||
1642 readOpcode(insn) ||
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001643 getID(insn, miiArg) ||
Sean Callanan04cc3072009-12-19 02:59:52 +00001644 insn->instructionID == 0 ||
1645 readOperands(insn))
1646 return -1;
1647
1648 insn->length = insn->readerCursor - insn->startLocation;
1649
Benjamin Kramer4f672272010-03-18 12:18:36 +00001650 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1651 startLoc, insn->readerCursor, insn->length);
Sean Callanan04cc3072009-12-19 02:59:52 +00001652
1653 if (insn->length > 15)
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001654 dbgprintf(insn, "Instruction exceeds 15-byte limit");
Sean Callanan04cc3072009-12-19 02:59:52 +00001655
1656 return 0;
1657}