blob: b80e3a90a61e62ba9f6b697eb3acd37e1f9dcf12 [file] [log] [blame]
Richard Smith89ee75d2014-04-20 21:07:34 +00001//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains the implementation of the instruction decoder.
12// Documentation for the disassembler can be found in X86Disassembler.h.
13//
14//===----------------------------------------------------------------------===//
Sean Callanan04cc3072009-12-19 02:59:52 +000015
Sean Callanan04cc3072009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbarc745a622009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan04cc3072009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
Richard Smith89ee75d2014-04-20 21:07:34 +000023using namespace llvm::X86Disassembler;
24
Sean Callanan04cc3072009-12-19 02:59:52 +000025#include "X86GenDisassemblerTables.inc"
26
27#define TRUE 1
28#define FALSE 0
29
Sean Callanan010b3732010-04-02 21:23:51 +000030#ifndef NDEBUG
Richard Smith89ee75d2014-04-20 21:07:34 +000031#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
Sean Callanan010b3732010-04-02 21:23:51 +000032#else
33#define debug(s) do { } while (0)
34#endif
35
Sean Callanan04cc3072009-12-19 02:59:52 +000036
37/*
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
40 *
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
44 */
Elena Demikhovsky371e3632013-12-25 11:40:51 +000045static InstructionContext contextForAttrs(uint16_t attrMask) {
Richard Smith89ee75d2014-04-20 21:07:34 +000046 return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
Sean Callanan04cc3072009-12-19 02:59:52 +000047}
48
49/*
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
52 *
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
55 * contextForAttrs.
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
59 */
Sean Callanan588785c2009-12-22 22:51:40 +000060static int modRMRequired(OpcodeType type,
Craig Topper21c33652011-10-02 16:56:09 +000061 InstructionContext insnContext,
Elena Demikhovsky371e3632013-12-25 11:40:51 +000062 uint16_t opcode) {
Daniel Dunbar8b532de2009-12-22 01:41:37 +000063 const struct ContextDecision* decision = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +000064
Sean Callanan04cc3072009-12-19 02:59:52 +000065 switch (type) {
66 case ONEBYTE:
67 decision = &ONEBYTE_SYM;
68 break;
69 case TWOBYTE:
70 decision = &TWOBYTE_SYM;
71 break;
72 case THREEBYTE_38:
73 decision = &THREEBYTE38_SYM;
74 break;
75 case THREEBYTE_3A:
76 decision = &THREEBYTE3A_SYM;
77 break;
Craig Topper9e3e38a2013-10-03 05:17:48 +000078 case XOP8_MAP:
79 decision = &XOP8_MAP_SYM;
80 break;
81 case XOP9_MAP:
82 decision = &XOP9_MAP_SYM;
83 break;
84 case XOPA_MAP:
85 decision = &XOPA_MAP_SYM;
86 break;
Sean Callanan04cc3072009-12-19 02:59:52 +000087 }
Ahmed Charles636a3d62012-02-19 11:37:01 +000088
Sean Callanan04cc3072009-12-19 02:59:52 +000089 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
90 modrm_type != MODRM_ONEENTRY;
Sean Callanan04cc3072009-12-19 02:59:52 +000091}
92
93/*
94 * decode - Reads the appropriate instruction table to obtain the unique ID of
95 * an instruction.
96 *
97 * @param type - See modRMRequired().
98 * @param insnContext - See modRMRequired().
99 * @param opcode - See modRMRequired().
100 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanan010b3732010-04-02 21:23:51 +0000101 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan04cc3072009-12-19 02:59:52 +0000102 */
Sean Callanan588785c2009-12-22 22:51:40 +0000103static InstrUID decode(OpcodeType type,
Sean Callanan010b3732010-04-02 21:23:51 +0000104 InstructionContext insnContext,
105 uint8_t opcode,
106 uint8_t modRM) {
Duncan Sandsae22c602012-02-05 14:20:11 +0000107 const struct ModRMDecision* dec = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000108
Sean Callanan04cc3072009-12-19 02:59:52 +0000109 switch (type) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000110 case ONEBYTE:
111 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
112 break;
113 case TWOBYTE:
114 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
115 break;
116 case THREEBYTE_38:
117 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
118 break;
119 case THREEBYTE_3A:
120 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
121 break;
Craig Topper9e3e38a2013-10-03 05:17:48 +0000122 case XOP8_MAP:
123 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
124 break;
125 case XOP9_MAP:
126 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
127 break;
128 case XOPA_MAP:
129 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130 break;
Sean Callanan04cc3072009-12-19 02:59:52 +0000131 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000132
Sean Callanan04cc3072009-12-19 02:59:52 +0000133 switch (dec->modrm_type) {
134 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000135 debug("Corrupt table! Unknown modrm_type");
136 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000137 case MODRM_ONEENTRY:
Craig Topper487e7442012-02-09 07:45:30 +0000138 return modRMTable[dec->instructionIDs];
Sean Callanan04cc3072009-12-19 02:59:52 +0000139 case MODRM_SPLITRM:
140 if (modFromModRM(modRM) == 0x3)
Craig Topper487e7442012-02-09 07:45:30 +0000141 return modRMTable[dec->instructionIDs+1];
142 return modRMTable[dec->instructionIDs];
Craig Toppera0cd9702012-02-09 08:58:07 +0000143 case MODRM_SPLITREG:
144 if (modFromModRM(modRM) == 0x3)
145 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
146 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Craig Topper963305b2012-09-13 05:45:42 +0000147 case MODRM_SPLITMISC:
148 if (modFromModRM(modRM) == 0x3)
149 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
150 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Sean Callanan04cc3072009-12-19 02:59:52 +0000151 case MODRM_FULL:
Craig Topper487e7442012-02-09 07:45:30 +0000152 return modRMTable[dec->instructionIDs+modRM];
Sean Callanan04cc3072009-12-19 02:59:52 +0000153 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000154}
155
156/*
157 * specifierForUID - Given a UID, returns the name and operand specification for
158 * that instruction.
159 *
160 * @param uid - The unique ID for the instruction. This should be returned by
161 * decode(); specifierForUID will not check bounds.
162 * @return - A pointer to the specification for that instruction.
163 */
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000164static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000165 return &INSTRUCTIONS_SYM[uid];
166}
167
168/*
169 * consumeByte - Uses the reader function provided by the user to consume one
170 * byte from the instruction's memory and advance the cursor.
171 *
172 * @param insn - The instruction with the reader function to use. The cursor
173 * for this instruction is advanced.
174 * @param byte - A pointer to a pre-allocated memory buffer to be populated
175 * with the data read.
176 * @return - 0 if the read was successful; nonzero otherwise.
177 */
Sean Callanan588785c2009-12-22 22:51:40 +0000178static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000179 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000180
Sean Callanan04cc3072009-12-19 02:59:52 +0000181 if (!ret)
182 ++(insn->readerCursor);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000183
Sean Callanan04cc3072009-12-19 02:59:52 +0000184 return ret;
185}
186
187/*
188 * lookAtByte - Like consumeByte, but does not advance the cursor.
189 *
190 * @param insn - See consumeByte().
191 * @param byte - See consumeByte().
192 * @return - See consumeByte().
193 */
Sean Callanan588785c2009-12-22 22:51:40 +0000194static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000195 return insn->reader(insn->readerArg, byte, insn->readerCursor);
196}
197
Sean Callanan588785c2009-12-22 22:51:40 +0000198static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000199 insn->readerCursor--;
200}
201
Sean Callanan588785c2009-12-22 22:51:40 +0000202#define CONSUME_FUNC(name, type) \
203 static int name(struct InternalInstruction* insn, type* ptr) { \
204 type combined = 0; \
205 unsigned offset; \
206 for (offset = 0; offset < sizeof(type); ++offset) { \
207 uint8_t byte; \
208 int ret = insn->reader(insn->readerArg, \
209 &byte, \
210 insn->readerCursor + offset); \
211 if (ret) \
212 return ret; \
Richard Smith228e6d42012-08-24 23:29:28 +0000213 combined = combined | ((uint64_t)byte << (offset * 8)); \
Sean Callanan588785c2009-12-22 22:51:40 +0000214 } \
215 *ptr = combined; \
216 insn->readerCursor += sizeof(type); \
217 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +0000218 }
219
220/*
221 * consume* - Use the reader function provided by the user to consume data
222 * values of various sizes from the instruction's memory and advance the
223 * cursor appropriately. These readers perform endian conversion.
224 *
225 * @param insn - See consumeByte().
226 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
227 * be populated with the data read.
228 * @return - See consumeByte().
229 */
230CONSUME_FUNC(consumeInt8, int8_t)
231CONSUME_FUNC(consumeInt16, int16_t)
232CONSUME_FUNC(consumeInt32, int32_t)
233CONSUME_FUNC(consumeUInt16, uint16_t)
234CONSUME_FUNC(consumeUInt32, uint32_t)
235CONSUME_FUNC(consumeUInt64, uint64_t)
236
237/*
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000238 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan04cc3072009-12-19 02:59:52 +0000239 * message, typically without a carriage-return.
240 *
241 * @param insn - The instruction containing the logging function.
242 * @param format - See printf().
243 * @param ... - See printf().
244 */
Sean Callanan588785c2009-12-22 22:51:40 +0000245static void dbgprintf(struct InternalInstruction* insn,
246 const char* format,
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000247 ...) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000248 char buffer[256];
249 va_list ap;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000250
Sean Callanan04cc3072009-12-19 02:59:52 +0000251 if (!insn->dlog)
252 return;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000253
Sean Callanan04cc3072009-12-19 02:59:52 +0000254 va_start(ap, format);
255 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
256 va_end(ap);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000257
Sean Callanan04cc3072009-12-19 02:59:52 +0000258 insn->dlog(insn->dlogArg, buffer);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000259
Sean Callanan04cc3072009-12-19 02:59:52 +0000260 return;
261}
262
263/*
264 * setPrefixPresent - Marks that a particular prefix is present at a particular
265 * location.
266 *
267 * @param insn - The instruction to be marked as having the prefix.
268 * @param prefix - The prefix that is present.
269 * @param location - The location where the prefix is located (in the address
270 * space of the instruction's reader).
271 */
Sean Callanan588785c2009-12-22 22:51:40 +0000272static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000273 uint8_t prefix,
274 uint64_t location)
275{
276 insn->prefixPresent[prefix] = 1;
277 insn->prefixLocations[prefix] = location;
278}
279
280/*
281 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
282 * present at a given location.
283 *
284 * @param insn - The instruction to be queried.
285 * @param prefix - The prefix.
286 * @param location - The location to query.
287 * @return - Whether the prefix is at that location.
288 */
Sean Callanan588785c2009-12-22 22:51:40 +0000289static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
290 uint8_t prefix,
291 uint64_t location)
Sean Callanan04cc3072009-12-19 02:59:52 +0000292{
293 if (insn->prefixPresent[prefix] == 1 &&
294 insn->prefixLocations[prefix] == location)
295 return TRUE;
296 else
297 return FALSE;
298}
299
300/*
301 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
302 * instruction as having them. Also sets the instruction's default operand,
303 * address, and other relevant data sizes to report operands correctly.
304 *
305 * @param insn - The instruction whose prefixes are to be read.
306 * @return - 0 if the instruction could be read until the end of the prefix
307 * bytes, and no prefixes conflicted; nonzero otherwise.
308 */
309static int readPrefixes(struct InternalInstruction* insn) {
310 BOOL isPrefix = TRUE;
311 BOOL prefixGroups[4] = { FALSE };
312 uint64_t prefixLocation;
Ted Kremenek3c4408c2011-01-23 17:05:06 +0000313 uint8_t byte = 0;
Richard Mitton79917a92013-08-30 21:32:42 +0000314 uint8_t nextByte;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000315
Sean Callanan04cc3072009-12-19 02:59:52 +0000316 BOOL hasAdSize = FALSE;
317 BOOL hasOpSize = FALSE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000318
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000319 dbgprintf(insn, "readPrefixes()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000320
Sean Callanan04cc3072009-12-19 02:59:52 +0000321 while (isPrefix) {
322 prefixLocation = insn->readerCursor;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000323
Richard Mitton576ee002013-08-30 21:19:48 +0000324 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
Sean Callanan04cc3072009-12-19 02:59:52 +0000325 if (consumeByte(insn, &byte))
Richard Mitton576ee002013-08-30 21:19:48 +0000326 break;
Kevin Enderby014e1cd2012-03-09 17:52:49 +0000327
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000328 /*
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000329 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
330 * break and let it be disassembled as a normal "instruction".
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000331 */
Richard Mitton576ee002013-08-30 21:19:48 +0000332 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
333 break;
334
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000335 if (insn->readerCursor - 1 == insn->startLocation
Richard Mitton576ee002013-08-30 21:19:48 +0000336 && (byte == 0xf2 || byte == 0xf3)
337 && !lookAtByte(insn, &nextByte))
338 {
Kevin Enderby35fd7922013-06-20 22:32:18 +0000339 /*
340 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
341 * met:
342 * - it is followed by a LOCK (0xf0) prefix
343 * - it is followed by an xchg instruction
344 * then it should be disassembled as a xacquire/xrelease not repne/rep.
345 */
346 if ((byte == 0xf2 || byte == 0xf3) &&
347 ((nextByte == 0xf0) |
348 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
349 insn->xAcquireRelease = TRUE;
350 /*
351 * Also if the byte is 0xf3, and the following condition is met:
352 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
353 * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
354 * then it should be disassembled as an xrelease not rep.
355 */
356 if (byte == 0xf3 &&
357 (nextByte == 0x88 || nextByte == 0x89 ||
358 nextByte == 0xc6 || nextByte == 0xc7))
359 insn->xAcquireRelease = TRUE;
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000360 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
361 if (consumeByte(insn, &nextByte))
362 return -1;
363 if (lookAtByte(insn, &nextByte))
364 return -1;
365 unconsumeByte(insn);
366 }
367 if (nextByte != 0x0f && nextByte != 0x90)
368 break;
369 }
370
Sean Callanan04cc3072009-12-19 02:59:52 +0000371 switch (byte) {
372 case 0xf0: /* LOCK */
373 case 0xf2: /* REPNE/REPNZ */
374 case 0xf3: /* REP or REPE/REPZ */
375 if (prefixGroups[0])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000376 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000377 prefixGroups[0] = TRUE;
378 setPrefixPresent(insn, byte, prefixLocation);
379 break;
380 case 0x2e: /* CS segment override -OR- Branch not taken */
381 case 0x36: /* SS segment override -OR- Branch taken */
382 case 0x3e: /* DS segment override */
383 case 0x26: /* ES segment override */
384 case 0x64: /* FS segment override */
385 case 0x65: /* GS segment override */
386 switch (byte) {
387 case 0x2e:
388 insn->segmentOverride = SEG_OVERRIDE_CS;
389 break;
390 case 0x36:
391 insn->segmentOverride = SEG_OVERRIDE_SS;
392 break;
393 case 0x3e:
394 insn->segmentOverride = SEG_OVERRIDE_DS;
395 break;
396 case 0x26:
397 insn->segmentOverride = SEG_OVERRIDE_ES;
398 break;
399 case 0x64:
400 insn->segmentOverride = SEG_OVERRIDE_FS;
401 break;
402 case 0x65:
403 insn->segmentOverride = SEG_OVERRIDE_GS;
404 break;
405 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000406 debug("Unhandled override");
407 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +0000408 }
409 if (prefixGroups[1])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000410 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000411 prefixGroups[1] = TRUE;
412 setPrefixPresent(insn, byte, prefixLocation);
413 break;
414 case 0x66: /* Operand-size override */
415 if (prefixGroups[2])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000416 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000417 prefixGroups[2] = TRUE;
418 hasOpSize = TRUE;
419 setPrefixPresent(insn, byte, prefixLocation);
420 break;
421 case 0x67: /* Address-size override */
422 if (prefixGroups[3])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000423 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000424 prefixGroups[3] = TRUE;
425 hasAdSize = TRUE;
426 setPrefixPresent(insn, byte, prefixLocation);
427 break;
428 default: /* Not a prefix byte */
429 isPrefix = FALSE;
430 break;
431 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000432
Sean Callanan04cc3072009-12-19 02:59:52 +0000433 if (isPrefix)
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000434 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan04cc3072009-12-19 02:59:52 +0000435 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000436
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000437 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000438
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000439 if (byte == 0x62) {
440 uint8_t byte1, byte2;
441
442 if (consumeByte(insn, &byte1)) {
443 dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
444 return -1;
445 }
446
447 if (lookAtByte(insn, &byte2)) {
448 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
449 return -1;
450 }
451
452 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
453 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
454 insn->vectorExtensionType = TYPE_EVEX;
455 }
456 else {
457 unconsumeByte(insn); /* unconsume byte1 */
458 unconsumeByte(insn); /* unconsume byte */
459 insn->necessaryPrefixLocation = insn->readerCursor - 2;
460 }
461
462 if (insn->vectorExtensionType == TYPE_EVEX) {
463 insn->vectorExtensionPrefix[0] = byte;
464 insn->vectorExtensionPrefix[1] = byte1;
465 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
466 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
467 return -1;
468 }
469 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
470 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
471 return -1;
472 }
473
474 /* We simulate the REX prefix for simplicity's sake */
475 if (insn->mode == MODE_64BIT) {
476 insn->rexPrefix = 0x40
477 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
478 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
479 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
480 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
481 }
482
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000483 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
484 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
485 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
486 }
487 }
488 else if (byte == 0xc4) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000489 uint8_t byte1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000490
Sean Callananc3fd5232011-03-15 01:23:15 +0000491 if (lookAtByte(insn, &byte1)) {
492 dbgprintf(insn, "Couldn't read second byte of VEX");
493 return -1;
494 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000495
Craig Topper45faba92011-09-26 05:12:43 +0000496 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000497 insn->vectorExtensionType = TYPE_VEX_3B;
Sean Callananc3fd5232011-03-15 01:23:15 +0000498 insn->necessaryPrefixLocation = insn->readerCursor - 1;
499 }
500 else {
Sean Callanan04cc3072009-12-19 02:59:52 +0000501 unconsumeByte(insn);
502 insn->necessaryPrefixLocation = insn->readerCursor - 1;
503 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000504
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000505 if (insn->vectorExtensionType == TYPE_VEX_3B) {
506 insn->vectorExtensionPrefix[0] = byte;
507 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
508 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +0000509
510 /* We simulate the REX prefix for simplicity's sake */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000511
Craig Topper31854ba2011-10-03 07:51:09 +0000512 if (insn->mode == MODE_64BIT) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000513 insn->rexPrefix = 0x40
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000514 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
515 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
516 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
517 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
Craig Topper31854ba2011-10-03 07:51:09 +0000518 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000519
Craig Topper9e3e38a2013-10-03 05:17:48 +0000520 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000521 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
522 insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +0000523 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000524 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000525 else if (byte == 0xc5) {
526 uint8_t byte1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000527
Sean Callananc3fd5232011-03-15 01:23:15 +0000528 if (lookAtByte(insn, &byte1)) {
529 dbgprintf(insn, "Couldn't read second byte of VEX");
530 return -1;
531 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000532
Craig Topper45faba92011-09-26 05:12:43 +0000533 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000534 insn->vectorExtensionType = TYPE_VEX_2B;
Sean Callananc3fd5232011-03-15 01:23:15 +0000535 }
536 else {
537 unconsumeByte(insn);
538 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000539
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000540 if (insn->vectorExtensionType == TYPE_VEX_2B) {
541 insn->vectorExtensionPrefix[0] = byte;
542 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000543
Craig Topper31854ba2011-10-03 07:51:09 +0000544 if (insn->mode == MODE_64BIT) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000545 insn->rexPrefix = 0x40
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000546 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
Craig Topper31854ba2011-10-03 07:51:09 +0000547 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000548
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000549 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))
Sean Callananc3fd5232011-03-15 01:23:15 +0000550 {
551 default:
552 break;
553 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000554 hasOpSize = TRUE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000555 break;
556 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000557
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000558 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
559 insn->vectorExtensionPrefix[0],
560 insn->vectorExtensionPrefix[1]);
Craig Topper9e3e38a2013-10-03 05:17:48 +0000561 }
562 }
563 else if (byte == 0x8f) {
564 uint8_t byte1;
565
566 if (lookAtByte(insn, &byte1)) {
567 dbgprintf(insn, "Couldn't read second byte of XOP");
568 return -1;
569 }
570
Craig Topper9eb88372013-10-03 06:29:59 +0000571 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000572 insn->vectorExtensionType = TYPE_XOP;
Craig Topper9e3e38a2013-10-03 05:17:48 +0000573 insn->necessaryPrefixLocation = insn->readerCursor - 1;
574 }
575 else {
576 unconsumeByte(insn);
577 insn->necessaryPrefixLocation = insn->readerCursor - 1;
578 }
579
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000580 if (insn->vectorExtensionType == TYPE_XOP) {
581 insn->vectorExtensionPrefix[0] = byte;
582 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
583 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
Craig Topper9e3e38a2013-10-03 05:17:48 +0000584
585 /* We simulate the REX prefix for simplicity's sake */
586
587 if (insn->mode == MODE_64BIT) {
588 insn->rexPrefix = 0x40
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000589 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
590 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
591 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
592 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
Craig Topper9e3e38a2013-10-03 05:17:48 +0000593 }
594
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000595 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))
Craig Topper9e3e38a2013-10-03 05:17:48 +0000596 {
597 default:
598 break;
599 case VEX_PREFIX_66:
600 hasOpSize = TRUE;
601 break;
602 }
603
604 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000605 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
606 insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +0000607 }
608 }
609 else {
610 if (insn->mode == MODE_64BIT) {
611 if ((byte & 0xf0) == 0x40) {
612 uint8_t opcodeByte;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000613
Sean Callananc3fd5232011-03-15 01:23:15 +0000614 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
615 dbgprintf(insn, "Redundant REX prefix");
616 return -1;
617 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000618
Sean Callananc3fd5232011-03-15 01:23:15 +0000619 insn->rexPrefix = byte;
620 insn->necessaryPrefixLocation = insn->readerCursor - 2;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000621
Sean Callananc3fd5232011-03-15 01:23:15 +0000622 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000623 } else {
Sean Callananc3fd5232011-03-15 01:23:15 +0000624 unconsumeByte(insn);
625 insn->necessaryPrefixLocation = insn->readerCursor - 1;
626 }
627 } else {
628 unconsumeByte(insn);
629 insn->necessaryPrefixLocation = insn->readerCursor - 1;
630 }
631 }
632
Sean Callanan04cc3072009-12-19 02:59:52 +0000633 if (insn->mode == MODE_16BIT) {
634 insn->registerSize = (hasOpSize ? 4 : 2);
635 insn->addressSize = (hasAdSize ? 4 : 2);
636 insn->displacementSize = (hasAdSize ? 4 : 2);
637 insn->immediateSize = (hasOpSize ? 4 : 2);
638 } else if (insn->mode == MODE_32BIT) {
639 insn->registerSize = (hasOpSize ? 2 : 4);
640 insn->addressSize = (hasAdSize ? 2 : 4);
641 insn->displacementSize = (hasAdSize ? 2 : 4);
Sean Callanan9f6c6222010-10-22 01:24:11 +0000642 insn->immediateSize = (hasOpSize ? 2 : 4);
Sean Callanan04cc3072009-12-19 02:59:52 +0000643 } else if (insn->mode == MODE_64BIT) {
644 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
645 insn->registerSize = 8;
646 insn->addressSize = (hasAdSize ? 4 : 8);
647 insn->displacementSize = 4;
648 insn->immediateSize = 4;
649 } else if (insn->rexPrefix) {
650 insn->registerSize = (hasOpSize ? 2 : 4);
651 insn->addressSize = (hasAdSize ? 4 : 8);
652 insn->displacementSize = (hasOpSize ? 2 : 4);
653 insn->immediateSize = (hasOpSize ? 2 : 4);
654 } else {
655 insn->registerSize = (hasOpSize ? 2 : 4);
656 insn->addressSize = (hasAdSize ? 4 : 8);
657 insn->displacementSize = (hasOpSize ? 2 : 4);
658 insn->immediateSize = (hasOpSize ? 2 : 4);
659 }
660 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000661
Sean Callanan04cc3072009-12-19 02:59:52 +0000662 return 0;
663}
664
665/*
666 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
667 * extended or escape opcodes).
668 *
669 * @param insn - The instruction whose opcode is to be read.
670 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
671 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000672static int readOpcode(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000673 /* Determine the length of the primary opcode */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000674
Sean Callanan04cc3072009-12-19 02:59:52 +0000675 uint8_t current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000676
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000677 dbgprintf(insn, "readOpcode()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000678
Sean Callanan04cc3072009-12-19 02:59:52 +0000679 insn->opcodeType = ONEBYTE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000680
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000681 if (insn->vectorExtensionType == TYPE_EVEX)
Sean Callananc3fd5232011-03-15 01:23:15 +0000682 {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000683 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000684 default:
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000685 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
686 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000687 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +0000688 case VEX_LOB_0F:
Sean Callananc3fd5232011-03-15 01:23:15 +0000689 insn->opcodeType = TWOBYTE;
690 return consumeByte(insn, &insn->opcode);
691 case VEX_LOB_0F38:
Sean Callananc3fd5232011-03-15 01:23:15 +0000692 insn->opcodeType = THREEBYTE_38;
693 return consumeByte(insn, &insn->opcode);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000694 case VEX_LOB_0F3A:
Sean Callananc3fd5232011-03-15 01:23:15 +0000695 insn->opcodeType = THREEBYTE_3A;
696 return consumeByte(insn, &insn->opcode);
697 }
698 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000699 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
700 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
701 default:
702 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
703 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
704 return -1;
705 case VEX_LOB_0F:
706 insn->opcodeType = TWOBYTE;
707 return consumeByte(insn, &insn->opcode);
708 case VEX_LOB_0F38:
709 insn->opcodeType = THREEBYTE_38;
710 return consumeByte(insn, &insn->opcode);
711 case VEX_LOB_0F3A:
712 insn->opcodeType = THREEBYTE_3A;
713 return consumeByte(insn, &insn->opcode);
714 }
715 }
716 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000717 insn->opcodeType = TWOBYTE;
718 return consumeByte(insn, &insn->opcode);
719 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000720 else if (insn->vectorExtensionType == TYPE_XOP) {
721 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000722 default:
723 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000724 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
Craig Topper9e3e38a2013-10-03 05:17:48 +0000725 return -1;
726 case XOP_MAP_SELECT_8:
727 insn->opcodeType = XOP8_MAP;
728 return consumeByte(insn, &insn->opcode);
729 case XOP_MAP_SELECT_9:
730 insn->opcodeType = XOP9_MAP;
731 return consumeByte(insn, &insn->opcode);
732 case XOP_MAP_SELECT_A:
733 insn->opcodeType = XOPA_MAP;
734 return consumeByte(insn, &insn->opcode);
735 }
736 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000737
Sean Callanan04cc3072009-12-19 02:59:52 +0000738 if (consumeByte(insn, &current))
739 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000740
Sean Callanan04cc3072009-12-19 02:59:52 +0000741 if (current == 0x0f) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000742 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000743
Sean Callanan04cc3072009-12-19 02:59:52 +0000744 if (consumeByte(insn, &current))
745 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000746
Sean Callanan04cc3072009-12-19 02:59:52 +0000747 if (current == 0x38) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000748 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000749
Sean Callanan04cc3072009-12-19 02:59:52 +0000750 if (consumeByte(insn, &current))
751 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000752
Sean Callanan04cc3072009-12-19 02:59:52 +0000753 insn->opcodeType = THREEBYTE_38;
754 } else if (current == 0x3a) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000755 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000756
Sean Callanan04cc3072009-12-19 02:59:52 +0000757 if (consumeByte(insn, &current))
758 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000759
Sean Callanan04cc3072009-12-19 02:59:52 +0000760 insn->opcodeType = THREEBYTE_3A;
761 } else {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000762 dbgprintf(insn, "Didn't find a three-byte escape prefix");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000763
Sean Callanan04cc3072009-12-19 02:59:52 +0000764 insn->opcodeType = TWOBYTE;
765 }
766 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000767
Sean Callanan04cc3072009-12-19 02:59:52 +0000768 /*
769 * At this point we have consumed the full opcode.
770 * Anything we consume from here on must be unconsumed.
771 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000772
Sean Callanan04cc3072009-12-19 02:59:52 +0000773 insn->opcode = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000774
Sean Callanan04cc3072009-12-19 02:59:52 +0000775 return 0;
776}
777
778static int readModRM(struct InternalInstruction* insn);
779
780/*
781 * getIDWithAttrMask - Determines the ID of an instruction, consuming
782 * the ModR/M byte as appropriate for extended and escape opcodes,
783 * and using a supplied attribute mask.
784 *
785 * @param instructionID - A pointer whose target is filled in with the ID of the
786 * instruction.
787 * @param insn - The instruction whose ID is to be determined.
788 * @param attrMask - The attribute mask to search.
789 * @return - 0 if the ModR/M could be read when needed or was not
790 * needed; nonzero otherwise.
791 */
792static int getIDWithAttrMask(uint16_t* instructionID,
793 struct InternalInstruction* insn,
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000794 uint16_t attrMask) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000795 BOOL hasModRMExtension;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000796
Richard Smith89ee75d2014-04-20 21:07:34 +0000797 InstructionContext instructionClass = contextForAttrs(attrMask);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000798
Sean Callanan04cc3072009-12-19 02:59:52 +0000799 hasModRMExtension = modRMRequired(insn->opcodeType,
800 instructionClass,
801 insn->opcode);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000802
Sean Callanan04cc3072009-12-19 02:59:52 +0000803 if (hasModRMExtension) {
Rafael Espindola9f9a1062011-01-06 16:48:42 +0000804 if (readModRM(insn))
805 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000806
Sean Callanan04cc3072009-12-19 02:59:52 +0000807 *instructionID = decode(insn->opcodeType,
808 instructionClass,
809 insn->opcode,
810 insn->modRM);
811 } else {
812 *instructionID = decode(insn->opcodeType,
813 instructionClass,
814 insn->opcode,
815 0);
816 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000817
Sean Callanan04cc3072009-12-19 02:59:52 +0000818 return 0;
819}
820
821/*
822 * is16BitEquivalent - Determines whether two instruction names refer to
823 * equivalent instructions but one is 16-bit whereas the other is not.
824 *
825 * @param orig - The instruction that is not 16-bit
826 * @param equiv - The instruction that is 16-bit
827 */
Joerg Sonnenberger2b86e482012-10-29 17:56:15 +0000828static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000829 off_t i;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000830
Sean Callanan010b3732010-04-02 21:23:51 +0000831 for (i = 0;; i++) {
832 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000833 return TRUE;
Sean Callanan010b3732010-04-02 21:23:51 +0000834 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000835 return FALSE;
Sean Callanan010b3732010-04-02 21:23:51 +0000836 if (orig[i] != equiv[i]) {
837 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan04cc3072009-12-19 02:59:52 +0000838 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000839 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan04cc3072009-12-19 02:59:52 +0000840 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000841 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan04cc3072009-12-19 02:59:52 +0000842 continue;
843 return FALSE;
844 }
845 }
846}
847
848/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000849 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
850 * appropriate for extended and escape opcodes. Determines the attributes and
Sean Callanan04cc3072009-12-19 02:59:52 +0000851 * context for the instruction before doing so.
852 *
853 * @param insn - The instruction whose ID is to be determined.
854 * @return - 0 if the ModR/M could be read when needed or was not needed;
855 * nonzero otherwise.
856 */
Roman Divacky67923802012-09-05 21:17:34 +0000857static int getID(struct InternalInstruction* insn, const void *miiArg) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000858 uint16_t attrMask;
Sean Callanan04cc3072009-12-19 02:59:52 +0000859 uint16_t instructionID;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000860
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000861 dbgprintf(insn, "getID()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000862
Sean Callanan04cc3072009-12-19 02:59:52 +0000863 attrMask = ATTR_NONE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000864
Sean Callanan04cc3072009-12-19 02:59:52 +0000865 if (insn->mode == MODE_64BIT)
866 attrMask |= ATTR_64BIT;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000867
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000868 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
869 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
Sean Callananc3fd5232011-03-15 01:23:15 +0000870
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000871 if (insn->vectorExtensionType == TYPE_EVEX) {
872 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000873 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000874 attrMask |= ATTR_OPSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000875 break;
876 case VEX_PREFIX_F3:
877 attrMask |= ATTR_XS;
878 break;
879 case VEX_PREFIX_F2:
880 attrMask |= ATTR_XD;
881 break;
882 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000883
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000884 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
885 attrMask |= ATTR_EVEXKZ;
886 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
887 attrMask |= ATTR_EVEXB;
888 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
889 attrMask |= ATTR_EVEXK;
890 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
891 attrMask |= ATTR_EVEXL;
892 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
893 attrMask |= ATTR_EVEXL2;
894 }
895 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
896 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
897 case VEX_PREFIX_66:
898 attrMask |= ATTR_OPSIZE;
899 break;
900 case VEX_PREFIX_F3:
901 attrMask |= ATTR_XS;
902 break;
903 case VEX_PREFIX_F2:
904 attrMask |= ATTR_XD;
905 break;
906 }
907
908 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
Sean Callananc3fd5232011-03-15 01:23:15 +0000909 attrMask |= ATTR_VEXL;
910 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000911 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
912 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000913 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000914 attrMask |= ATTR_OPSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000915 break;
916 case VEX_PREFIX_F3:
917 attrMask |= ATTR_XS;
918 break;
919 case VEX_PREFIX_F2:
920 attrMask |= ATTR_XD;
921 break;
922 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000923
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000924 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
Craig Topper9e3e38a2013-10-03 05:17:48 +0000925 attrMask |= ATTR_VEXL;
926 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000927 else if (insn->vectorExtensionType == TYPE_XOP) {
928 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000929 case VEX_PREFIX_66:
930 attrMask |= ATTR_OPSIZE;
931 break;
932 case VEX_PREFIX_F3:
933 attrMask |= ATTR_XS;
934 break;
935 case VEX_PREFIX_F2:
936 attrMask |= ATTR_XD;
937 break;
938 }
939
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000940 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
Sean Callananc3fd5232011-03-15 01:23:15 +0000941 attrMask |= ATTR_VEXL;
942 }
943 else {
944 return -1;
945 }
946 }
947 else {
David Woodhouse5cf4c672014-01-20 12:02:35 +0000948 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
Sean Callananc3fd5232011-03-15 01:23:15 +0000949 attrMask |= ATTR_OPSIZE;
Craig Topper6491c802012-02-27 01:54:29 +0000950 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
951 attrMask |= ATTR_ADSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000952 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
953 attrMask |= ATTR_XS;
954 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
955 attrMask |= ATTR_XD;
Sean Callananc3fd5232011-03-15 01:23:15 +0000956 }
957
Craig Topperf18c8962011-10-04 06:30:42 +0000958 if (insn->rexPrefix & 0x08)
959 attrMask |= ATTR_REXW;
Craig Topperf01f1b52011-11-06 23:04:08 +0000960
Sean Callanan010b3732010-04-02 21:23:51 +0000961 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan04cc3072009-12-19 02:59:52 +0000962 return -1;
Craig Topperf01f1b52011-11-06 23:04:08 +0000963
David Woodhouse9c74fdb2014-01-20 12:02:48 +0000964 /*
965 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
966 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
967 */
968 if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) {
969 const struct InstructionSpecifier *spec;
970 spec = specifierForUID(instructionID);
971
972 /*
973 * Check for Ii8PCRel instructions. We could alternatively do a
974 * string-compare on the names, but this is probably cheaper.
975 */
976 if (x86OperandSets[spec->operands][0].type == TYPE_REL8) {
977 attrMask ^= ATTR_ADSIZE;
978 if (getIDWithAttrMask(&instructionID, insn, attrMask))
979 return -1;
980 }
981 }
982
Sean Callanan04cc3072009-12-19 02:59:52 +0000983 /* The following clauses compensate for limitations of the tables. */
Craig Topperf01f1b52011-11-06 23:04:08 +0000984
David Woodhouse5cf4c672014-01-20 12:02:35 +0000985 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
986 !(attrMask & ATTR_OPSIZE)) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000987 /*
988 * The instruction tables make no distinction between instructions that
989 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
990 * particular spot (i.e., many MMX operations). In general we're
991 * conservative, but in the specific case where OpSize is present but not
992 * in the right place we check if there's a 16-bit operation.
993 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000994
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000995 const struct InstructionSpecifier *spec;
Sean Callanan04cc3072009-12-19 02:59:52 +0000996 uint16_t instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +0000997 const char *specName, *specWithOpSizeName;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000998
Sean Callanan04cc3072009-12-19 02:59:52 +0000999 spec = specifierForUID(instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001000
Sean Callanan04cc3072009-12-19 02:59:52 +00001001 if (getIDWithAttrMask(&instructionIDWithOpsize,
1002 insn,
1003 attrMask | ATTR_OPSIZE)) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001004 /*
Sean Callanan04cc3072009-12-19 02:59:52 +00001005 * ModRM required with OpSize but not present; give up and return version
1006 * without OpSize set
1007 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001008
Sean Callanan04cc3072009-12-19 02:59:52 +00001009 insn->instructionID = instructionID;
1010 insn->spec = spec;
1011 return 0;
1012 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001013
Richard Smith89ee75d2014-04-20 21:07:34 +00001014 specName = GetInstrName(instructionID, miiArg);
1015 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001016
David Woodhouse5cf4c672014-01-20 12:02:35 +00001017 if (is16BitEquivalent(specName, specWithOpSizeName) &&
1018 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001019 insn->instructionID = instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +00001020 insn->spec = specifierForUID(instructionIDWithOpsize);
Sean Callanan04cc3072009-12-19 02:59:52 +00001021 } else {
1022 insn->instructionID = instructionID;
1023 insn->spec = spec;
1024 }
1025 return 0;
1026 }
Craig Topper21c33652011-10-02 16:56:09 +00001027
1028 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1029 insn->rexPrefix & 0x01) {
1030 /*
1031 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1032 * it should decode as XCHG %r8, %eax.
1033 */
1034
1035 const struct InstructionSpecifier *spec;
1036 uint16_t instructionIDWithNewOpcode;
1037 const struct InstructionSpecifier *specWithNewOpcode;
1038
1039 spec = specifierForUID(instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001040
Craig Topperb58a9662011-10-05 03:29:32 +00001041 /* Borrow opcode from one of the other XCHGar opcodes */
Craig Topper21c33652011-10-02 16:56:09 +00001042 insn->opcode = 0x91;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001043
Craig Topper21c33652011-10-02 16:56:09 +00001044 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1045 insn,
1046 attrMask)) {
1047 insn->opcode = 0x90;
1048
1049 insn->instructionID = instructionID;
1050 insn->spec = spec;
1051 return 0;
1052 }
1053
1054 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1055
Craig Topperb58a9662011-10-05 03:29:32 +00001056 /* Change back */
Craig Topper21c33652011-10-02 16:56:09 +00001057 insn->opcode = 0x90;
1058
1059 insn->instructionID = instructionIDWithNewOpcode;
1060 insn->spec = specWithNewOpcode;
1061
1062 return 0;
1063 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001064
Sean Callanan04cc3072009-12-19 02:59:52 +00001065 insn->instructionID = instructionID;
1066 insn->spec = specifierForUID(insn->instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001067
Sean Callanan04cc3072009-12-19 02:59:52 +00001068 return 0;
1069}
1070
1071/*
1072 * readSIB - Consumes the SIB byte to determine addressing information for an
1073 * instruction.
1074 *
1075 * @param insn - The instruction whose SIB byte is to be read.
1076 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1077 */
1078static int readSIB(struct InternalInstruction* insn) {
Richard Smith89ee75d2014-04-20 21:07:34 +00001079 SIBIndex sibIndexBase = SIB_INDEX_NONE;
1080 SIBBase sibBaseBase = SIB_BASE_NONE;
Sean Callanan04cc3072009-12-19 02:59:52 +00001081 uint8_t index, base;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001082
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001083 dbgprintf(insn, "readSIB()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001084
Sean Callanan04cc3072009-12-19 02:59:52 +00001085 if (insn->consumedSIB)
1086 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001087
Sean Callanan04cc3072009-12-19 02:59:52 +00001088 insn->consumedSIB = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001089
Sean Callanan04cc3072009-12-19 02:59:52 +00001090 switch (insn->addressSize) {
1091 case 2:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001092 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan04cc3072009-12-19 02:59:52 +00001093 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001094 case 4:
1095 sibIndexBase = SIB_INDEX_EAX;
1096 sibBaseBase = SIB_BASE_EAX;
1097 break;
1098 case 8:
1099 sibIndexBase = SIB_INDEX_RAX;
1100 sibBaseBase = SIB_BASE_RAX;
1101 break;
1102 }
1103
1104 if (consumeByte(insn, &insn->sib))
1105 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001106
Sean Callanan04cc3072009-12-19 02:59:52 +00001107 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001108 if (insn->vectorExtensionType == TYPE_EVEX)
1109 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001110
Sean Callanan04cc3072009-12-19 02:59:52 +00001111 switch (index) {
1112 case 0x4:
1113 insn->sibIndex = SIB_INDEX_NONE;
1114 break;
1115 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +00001116 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
Sean Callanan04cc3072009-12-19 02:59:52 +00001117 if (insn->sibIndex == SIB_INDEX_sib ||
1118 insn->sibIndex == SIB_INDEX_sib64)
1119 insn->sibIndex = SIB_INDEX_NONE;
1120 break;
1121 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001122
Sean Callanan04cc3072009-12-19 02:59:52 +00001123 switch (scaleFromSIB(insn->sib)) {
1124 case 0:
1125 insn->sibScale = 1;
1126 break;
1127 case 1:
1128 insn->sibScale = 2;
1129 break;
1130 case 2:
1131 insn->sibScale = 4;
1132 break;
1133 case 3:
1134 insn->sibScale = 8;
1135 break;
1136 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001137
Sean Callanan04cc3072009-12-19 02:59:52 +00001138 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001139
Sean Callanan04cc3072009-12-19 02:59:52 +00001140 switch (base) {
1141 case 0x5:
Craig Topperfae5ac22014-02-17 10:03:43 +00001142 case 0xd:
Sean Callanan04cc3072009-12-19 02:59:52 +00001143 switch (modFromModRM(insn->modRM)) {
1144 case 0x0:
1145 insn->eaDisplacement = EA_DISP_32;
1146 insn->sibBase = SIB_BASE_NONE;
1147 break;
1148 case 0x1:
1149 insn->eaDisplacement = EA_DISP_8;
Craig Topperfae5ac22014-02-17 10:03:43 +00001150 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001151 break;
1152 case 0x2:
1153 insn->eaDisplacement = EA_DISP_32;
Craig Topperfae5ac22014-02-17 10:03:43 +00001154 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001155 break;
1156 case 0x3:
Sean Callanan010b3732010-04-02 21:23:51 +00001157 debug("Cannot have Mod = 0b11 and a SIB byte");
1158 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001159 }
1160 break;
1161 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +00001162 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001163 break;
1164 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001165
Sean Callanan04cc3072009-12-19 02:59:52 +00001166 return 0;
1167}
1168
1169/*
1170 * readDisplacement - Consumes the displacement of an instruction.
1171 *
1172 * @param insn - The instruction whose displacement is to be read.
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001173 * @return - 0 if the displacement byte was successfully read; nonzero
Sean Callanan04cc3072009-12-19 02:59:52 +00001174 * otherwise.
1175 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001176static int readDisplacement(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001177 int8_t d8;
1178 int16_t d16;
1179 int32_t d32;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001180
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001181 dbgprintf(insn, "readDisplacement()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001182
Sean Callanan04cc3072009-12-19 02:59:52 +00001183 if (insn->consumedDisplacement)
1184 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001185
Sean Callanan04cc3072009-12-19 02:59:52 +00001186 insn->consumedDisplacement = TRUE;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001187 insn->displacementOffset = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001188
Sean Callanan04cc3072009-12-19 02:59:52 +00001189 switch (insn->eaDisplacement) {
1190 case EA_DISP_NONE:
1191 insn->consumedDisplacement = FALSE;
1192 break;
1193 case EA_DISP_8:
1194 if (consumeInt8(insn, &d8))
1195 return -1;
1196 insn->displacement = d8;
1197 break;
1198 case EA_DISP_16:
1199 if (consumeInt16(insn, &d16))
1200 return -1;
1201 insn->displacement = d16;
1202 break;
1203 case EA_DISP_32:
1204 if (consumeInt32(insn, &d32))
1205 return -1;
1206 insn->displacement = d32;
1207 break;
1208 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001209
Sean Callanan04cc3072009-12-19 02:59:52 +00001210 insn->consumedDisplacement = TRUE;
1211 return 0;
1212}
1213
1214/*
1215 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1216 * displacement) for an instruction and interprets it.
1217 *
1218 * @param insn - The instruction whose addressing information is to be read.
1219 * @return - 0 if the information was successfully read; nonzero otherwise.
1220 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001221static int readModRM(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001222 uint8_t mod, rm, reg;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001223
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001224 dbgprintf(insn, "readModRM()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001225
Sean Callanan04cc3072009-12-19 02:59:52 +00001226 if (insn->consumedModRM)
1227 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001228
Rafael Espindola9f9a1062011-01-06 16:48:42 +00001229 if (consumeByte(insn, &insn->modRM))
1230 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001231 insn->consumedModRM = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001232
Sean Callanan04cc3072009-12-19 02:59:52 +00001233 mod = modFromModRM(insn->modRM);
1234 rm = rmFromModRM(insn->modRM);
1235 reg = regFromModRM(insn->modRM);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001236
Sean Callanan04cc3072009-12-19 02:59:52 +00001237 /*
1238 * This goes by insn->registerSize to pick the correct register, which messes
1239 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1240 * fixupReg().
1241 */
1242 switch (insn->registerSize) {
1243 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001244 insn->regBase = MODRM_REG_AX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001245 insn->eaRegBase = EA_REG_AX;
1246 break;
1247 case 4:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001248 insn->regBase = MODRM_REG_EAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001249 insn->eaRegBase = EA_REG_EAX;
1250 break;
1251 case 8:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001252 insn->regBase = MODRM_REG_RAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001253 insn->eaRegBase = EA_REG_RAX;
1254 break;
1255 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001256
Sean Callanan04cc3072009-12-19 02:59:52 +00001257 reg |= rFromREX(insn->rexPrefix) << 3;
1258 rm |= bFromREX(insn->rexPrefix) << 3;
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001259 if (insn->vectorExtensionType == TYPE_EVEX) {
1260 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1261 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1262 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001263
Sean Callanan04cc3072009-12-19 02:59:52 +00001264 insn->reg = (Reg)(insn->regBase + reg);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001265
Sean Callanan04cc3072009-12-19 02:59:52 +00001266 switch (insn->addressSize) {
1267 case 2:
1268 insn->eaBaseBase = EA_BASE_BX_SI;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001269
Sean Callanan04cc3072009-12-19 02:59:52 +00001270 switch (mod) {
1271 case 0x0:
1272 if (rm == 0x6) {
1273 insn->eaBase = EA_BASE_NONE;
1274 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001275 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001276 return -1;
1277 } else {
1278 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1279 insn->eaDisplacement = EA_DISP_NONE;
1280 }
1281 break;
1282 case 0x1:
1283 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1284 insn->eaDisplacement = EA_DISP_8;
Craig Topper399e39e2014-01-25 22:48:43 +00001285 insn->displacementSize = 1;
Sean Callanan010b3732010-04-02 21:23:51 +00001286 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001287 return -1;
1288 break;
1289 case 0x2:
1290 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1291 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001292 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001293 return -1;
1294 break;
1295 case 0x3:
1296 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001297 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001298 return -1;
1299 break;
1300 }
1301 break;
1302 case 4:
1303 case 8:
1304 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001305
Sean Callanan04cc3072009-12-19 02:59:52 +00001306 switch (mod) {
1307 case 0x0:
1308 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1309 switch (rm) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001310 case 0x14:
Sean Callanan04cc3072009-12-19 02:59:52 +00001311 case 0x4:
1312 case 0xc: /* in case REXW.b is set */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001313 insn->eaBase = (insn->addressSize == 4 ?
Sean Callanan04cc3072009-12-19 02:59:52 +00001314 EA_BASE_sib : EA_BASE_sib64);
Craig Topper38afbfd2014-03-20 05:56:00 +00001315 if (readSIB(insn) || readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001316 return -1;
1317 break;
1318 case 0x5:
1319 insn->eaBase = EA_BASE_NONE;
1320 insn->eaDisplacement = EA_DISP_32;
Sean Callanan010b3732010-04-02 21:23:51 +00001321 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001322 return -1;
1323 break;
1324 default:
1325 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1326 break;
1327 }
1328 break;
1329 case 0x1:
Craig Topper399e39e2014-01-25 22:48:43 +00001330 insn->displacementSize = 1;
Alp Toker771f7652014-01-26 18:44:34 +00001331 /* FALLTHROUGH */
Sean Callanan04cc3072009-12-19 02:59:52 +00001332 case 0x2:
1333 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1334 switch (rm) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001335 case 0x14:
Sean Callanan04cc3072009-12-19 02:59:52 +00001336 case 0x4:
1337 case 0xc: /* in case REXW.b is set */
1338 insn->eaBase = EA_BASE_sib;
Craig Topper38afbfd2014-03-20 05:56:00 +00001339 if (readSIB(insn) || readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001340 return -1;
1341 break;
1342 default:
1343 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001344 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001345 return -1;
1346 break;
1347 }
1348 break;
1349 case 0x3:
1350 insn->eaDisplacement = EA_DISP_NONE;
1351 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1352 break;
1353 }
1354 break;
1355 } /* switch (insn->addressSize) */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001356
Sean Callanan04cc3072009-12-19 02:59:52 +00001357 return 0;
1358}
1359
1360#define GENERIC_FIXUP_FUNC(name, base, prefix) \
1361 static uint8_t name(struct InternalInstruction *insn, \
1362 OperandType type, \
1363 uint8_t index, \
1364 uint8_t *valid) { \
1365 *valid = 1; \
1366 switch (type) { \
1367 default: \
Sean Callanan010b3732010-04-02 21:23:51 +00001368 debug("Unhandled register type"); \
1369 *valid = 0; \
1370 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001371 case TYPE_Rv: \
1372 return base + index; \
1373 case TYPE_R8: \
Sean Callanan010b3732010-04-02 21:23:51 +00001374 if (insn->rexPrefix && \
Sean Callanan04cc3072009-12-19 02:59:52 +00001375 index >= 4 && index <= 7) { \
1376 return prefix##_SPL + (index - 4); \
1377 } else { \
1378 return prefix##_AL + index; \
1379 } \
1380 case TYPE_R16: \
1381 return prefix##_AX + index; \
1382 case TYPE_R32: \
1383 return prefix##_EAX + index; \
1384 case TYPE_R64: \
1385 return prefix##_RAX + index; \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +00001386 case TYPE_XMM512: \
1387 return prefix##_ZMM0 + index; \
Sean Callananc3fd5232011-03-15 01:23:15 +00001388 case TYPE_XMM256: \
1389 return prefix##_YMM0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001390 case TYPE_XMM128: \
1391 case TYPE_XMM64: \
1392 case TYPE_XMM32: \
1393 case TYPE_XMM: \
1394 return prefix##_XMM0 + index; \
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001395 case TYPE_VK1: \
1396 case TYPE_VK8: \
1397 case TYPE_VK16: \
1398 return prefix##_K0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001399 case TYPE_MM64: \
1400 case TYPE_MM32: \
1401 case TYPE_MM: \
Sean Callanan010b3732010-04-02 21:23:51 +00001402 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001403 *valid = 0; \
1404 return prefix##_MM0 + index; \
1405 case TYPE_SEGMENTREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001406 if (index > 5) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001407 *valid = 0; \
1408 return prefix##_ES + index; \
1409 case TYPE_DEBUGREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001410 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001411 *valid = 0; \
1412 return prefix##_DR0 + index; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001413 case TYPE_CONTROLREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001414 if (index > 8) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001415 *valid = 0; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001416 return prefix##_CR0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001417 } \
1418 }
1419
1420/*
1421 * fixup*Value - Consults an operand type to determine the meaning of the
1422 * reg or R/M field. If the operand is an XMM operand, for example, an
1423 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1424 * misinterpret it as.
1425 *
1426 * @param insn - The instruction containing the operand.
1427 * @param type - The operand type.
1428 * @param index - The existing value of the field as reported by readModRM().
1429 * @param valid - The address of a uint8_t. The target is set to 1 if the
1430 * field is valid for the register class; 0 if not.
Sean Callanan010b3732010-04-02 21:23:51 +00001431 * @return - The proper value.
Sean Callanan04cc3072009-12-19 02:59:52 +00001432 */
Sean Callanan2f9443f2009-12-22 02:07:42 +00001433GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan04cc3072009-12-19 02:59:52 +00001434GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1435
1436/*
1437 * fixupReg - Consults an operand specifier to determine which of the
1438 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1439 *
1440 * @param insn - See fixup*Value().
1441 * @param op - The operand specifier.
1442 * @return - 0 if fixup was successful; -1 if the register returned was
1443 * invalid for its class.
1444 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001445static int fixupReg(struct InternalInstruction *insn,
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +00001446 const struct OperandSpecifier *op) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001447 uint8_t valid;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001448
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001449 dbgprintf(insn, "fixupReg()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001450
Sean Callanan04cc3072009-12-19 02:59:52 +00001451 switch ((OperandEncoding)op->encoding) {
1452 default:
Sean Callanan010b3732010-04-02 21:23:51 +00001453 debug("Expected a REG or R/M encoding in fixupReg");
1454 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +00001455 case ENCODING_VVVV:
1456 insn->vvvv = (Reg)fixupRegValue(insn,
1457 (OperandType)op->type,
1458 insn->vvvv,
1459 &valid);
1460 if (!valid)
1461 return -1;
1462 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001463 case ENCODING_REG:
1464 insn->reg = (Reg)fixupRegValue(insn,
1465 (OperandType)op->type,
1466 insn->reg - insn->regBase,
1467 &valid);
1468 if (!valid)
1469 return -1;
1470 break;
1471 case ENCODING_RM:
1472 if (insn->eaBase >= insn->eaRegBase) {
1473 insn->eaBase = (EABase)fixupRMValue(insn,
1474 (OperandType)op->type,
1475 insn->eaBase - insn->eaRegBase,
1476 &valid);
1477 if (!valid)
1478 return -1;
1479 }
1480 break;
1481 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001482
Sean Callanan04cc3072009-12-19 02:59:52 +00001483 return 0;
1484}
1485
1486/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001487 * readOpcodeRegister - Reads an operand from the opcode field of an
Sean Callanan04cc3072009-12-19 02:59:52 +00001488 * instruction and interprets it appropriately given the operand width.
1489 * Handles AddRegFrm instructions.
1490 *
Craig Topper91551182014-01-01 15:29:32 +00001491 * @param insn - the instruction whose opcode field is to be read.
Sean Callanan04cc3072009-12-19 02:59:52 +00001492 * @param size - The width (in bytes) of the register being specified.
1493 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1494 * RAX.
Sean Callanan010b3732010-04-02 21:23:51 +00001495 * @return - 0 on success; nonzero otherwise.
Sean Callanan04cc3072009-12-19 02:59:52 +00001496 */
Sean Callanan010b3732010-04-02 21:23:51 +00001497static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001498 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001499
Sean Callanan04cc3072009-12-19 02:59:52 +00001500 if (size == 0)
1501 size = insn->registerSize;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001502
Sean Callanan04cc3072009-12-19 02:59:52 +00001503 switch (size) {
1504 case 1:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001505 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001506 | (insn->opcode & 7)));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001507 if (insn->rexPrefix &&
Sean Callanan010b3732010-04-02 21:23:51 +00001508 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1509 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan2f9443f2009-12-22 02:07:42 +00001510 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1511 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan04cc3072009-12-19 02:59:52 +00001512 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001513
Sean Callanan04cc3072009-12-19 02:59:52 +00001514 break;
1515 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001516 insn->opcodeRegister = (Reg)(MODRM_REG_AX
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001517 + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001518 | (insn->opcode & 7)));
Sean Callanan04cc3072009-12-19 02:59:52 +00001519 break;
1520 case 4:
Sean Callanan010b3732010-04-02 21:23:51 +00001521 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001522 + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001523 | (insn->opcode & 7)));
Sean Callanan04cc3072009-12-19 02:59:52 +00001524 break;
1525 case 8:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001526 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1527 + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001528 | (insn->opcode & 7)));
Sean Callanan04cc3072009-12-19 02:59:52 +00001529 break;
1530 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001531
Sean Callanan010b3732010-04-02 21:23:51 +00001532 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001533}
1534
1535/*
1536 * readImmediate - Consumes an immediate operand from an instruction, given the
1537 * desired operand size.
1538 *
1539 * @param insn - The instruction whose operand is to be read.
1540 * @param size - The width (in bytes) of the operand.
1541 * @return - 0 if the immediate was successfully consumed; nonzero
1542 * otherwise.
1543 */
1544static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1545 uint8_t imm8;
1546 uint16_t imm16;
1547 uint32_t imm32;
1548 uint64_t imm64;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001549
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001550 dbgprintf(insn, "readImmediate()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001551
Sean Callanan010b3732010-04-02 21:23:51 +00001552 if (insn->numImmediatesConsumed == 2) {
1553 debug("Already consumed two immediates");
1554 return -1;
1555 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001556
Sean Callanan04cc3072009-12-19 02:59:52 +00001557 if (size == 0)
1558 size = insn->immediateSize;
1559 else
1560 insn->immediateSize = size;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001561 insn->immediateOffset = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001562
Sean Callanan04cc3072009-12-19 02:59:52 +00001563 switch (size) {
1564 case 1:
1565 if (consumeByte(insn, &imm8))
1566 return -1;
1567 insn->immediates[insn->numImmediatesConsumed] = imm8;
1568 break;
1569 case 2:
1570 if (consumeUInt16(insn, &imm16))
1571 return -1;
1572 insn->immediates[insn->numImmediatesConsumed] = imm16;
1573 break;
1574 case 4:
1575 if (consumeUInt32(insn, &imm32))
1576 return -1;
1577 insn->immediates[insn->numImmediatesConsumed] = imm32;
1578 break;
1579 case 8:
1580 if (consumeUInt64(insn, &imm64))
1581 return -1;
1582 insn->immediates[insn->numImmediatesConsumed] = imm64;
1583 break;
1584 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001585
Sean Callanan04cc3072009-12-19 02:59:52 +00001586 insn->numImmediatesConsumed++;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001587
Sean Callanan04cc3072009-12-19 02:59:52 +00001588 return 0;
1589}
1590
1591/*
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001592 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
Sean Callananc3fd5232011-03-15 01:23:15 +00001593 *
1594 * @param insn - The instruction whose operand is to be read.
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001595 * @return - 0 if the vvvv was successfully consumed; nonzero
Sean Callananc3fd5232011-03-15 01:23:15 +00001596 * otherwise.
1597 */
1598static int readVVVV(struct InternalInstruction* insn) {
1599 dbgprintf(insn, "readVVVV()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001600
Richard Smith89ee75d2014-04-20 21:07:34 +00001601 int vvvv;
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001602 if (insn->vectorExtensionType == TYPE_EVEX)
Richard Smith89ee75d2014-04-20 21:07:34 +00001603 vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001604 else if (insn->vectorExtensionType == TYPE_VEX_3B)
Richard Smith89ee75d2014-04-20 21:07:34 +00001605 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001606 else if (insn->vectorExtensionType == TYPE_VEX_2B)
Richard Smith89ee75d2014-04-20 21:07:34 +00001607 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001608 else if (insn->vectorExtensionType == TYPE_XOP)
Richard Smith89ee75d2014-04-20 21:07:34 +00001609 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +00001610 else
1611 return -1;
1612
Craig Topper0d0be472011-10-03 08:14:29 +00001613 if (insn->mode != MODE_64BIT)
Richard Smith89ee75d2014-04-20 21:07:34 +00001614 vvvv &= 0x7;
Craig Topper0d0be472011-10-03 08:14:29 +00001615
Richard Smith89ee75d2014-04-20 21:07:34 +00001616 insn->vvvv = static_cast<Reg>(vvvv);
Sean Callananc3fd5232011-03-15 01:23:15 +00001617 return 0;
1618}
1619
1620/*
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001621 * readMaskRegister - Reads an mask register from the opcode field of an
1622 * instruction.
1623 *
1624 * @param insn - The instruction whose opcode field is to be read.
1625 * @return - 0 on success; nonzero otherwise.
1626 */
1627static int readMaskRegister(struct InternalInstruction* insn) {
1628 dbgprintf(insn, "readMaskRegister()");
1629
1630 if (insn->vectorExtensionType != TYPE_EVEX)
1631 return -1;
1632
Richard Smith89ee75d2014-04-20 21:07:34 +00001633 insn->writemask =
1634 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001635 return 0;
1636}
1637
1638/*
Sean Callanan04cc3072009-12-19 02:59:52 +00001639 * readOperands - Consults the specifier for an instruction and consumes all
1640 * operands for that instruction, interpreting them as it goes.
1641 *
1642 * @param insn - The instruction whose operands are to be read and interpreted.
1643 * @return - 0 if all operands could be read; nonzero otherwise.
1644 */
1645static int readOperands(struct InternalInstruction* insn) {
1646 int index;
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001647 int hasVVVV, needVVVV;
Craig Topper2ba766a2011-12-30 06:23:39 +00001648 int sawRegImm = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001649
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001650 dbgprintf(insn, "readOperands()");
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001651
1652 /* If non-zero vvvv specified, need to make sure one of the operands
1653 uses it. */
1654 hasVVVV = !readVVVV(insn);
1655 needVVVV = hasVVVV && (insn->vvvv != 0);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001656
Sean Callanan04cc3072009-12-19 02:59:52 +00001657 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
Craig Topperb8aec082012-08-01 07:39:18 +00001658 switch (x86OperandSets[insn->spec->operands][index].encoding) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001659 case ENCODING_NONE:
David Woodhouse2ef8d9c2014-01-22 15:08:08 +00001660 case ENCODING_SI:
David Woodhouseb33c2ef2014-01-22 15:08:21 +00001661 case ENCODING_DI:
Sean Callanan04cc3072009-12-19 02:59:52 +00001662 break;
1663 case ENCODING_REG:
1664 case ENCODING_RM:
1665 if (readModRM(insn))
1666 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001667 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
Sean Callanan04cc3072009-12-19 02:59:52 +00001668 return -1;
1669 break;
1670 case ENCODING_CB:
1671 case ENCODING_CW:
1672 case ENCODING_CD:
1673 case ENCODING_CP:
1674 case ENCODING_CO:
1675 case ENCODING_CT:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001676 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan04cc3072009-12-19 02:59:52 +00001677 return -1;
1678 case ENCODING_IB:
Craig Topper2ba766a2011-12-30 06:23:39 +00001679 if (sawRegImm) {
Benjamin Kramer9c48f262012-01-04 22:06:45 +00001680 /* Saw a register immediate so don't read again and instead split the
1681 previous immediate. FIXME: This is a hack. */
Benjamin Kramer47aecca2012-01-01 17:55:36 +00001682 insn->immediates[insn->numImmediatesConsumed] =
1683 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1684 ++insn->numImmediatesConsumed;
Craig Topper2ba766a2011-12-30 06:23:39 +00001685 break;
1686 }
Sean Callanan04cc3072009-12-19 02:59:52 +00001687 if (readImmediate(insn, 1))
1688 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001689 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
Sean Callanan1efe6612010-04-07 21:42:19 +00001690 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1691 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001692 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
Craig Topper7629d632012-04-03 05:20:24 +00001693 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1694 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001695 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1696 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
Craig Topper2ba766a2011-12-30 06:23:39 +00001697 sawRegImm = 1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001698 break;
1699 case ENCODING_IW:
1700 if (readImmediate(insn, 2))
1701 return -1;
1702 break;
1703 case ENCODING_ID:
1704 if (readImmediate(insn, 4))
1705 return -1;
1706 break;
1707 case ENCODING_IO:
1708 if (readImmediate(insn, 8))
1709 return -1;
1710 break;
1711 case ENCODING_Iv:
Sean Callanan010b3732010-04-02 21:23:51 +00001712 if (readImmediate(insn, insn->immediateSize))
1713 return -1;
Chris Lattnerd4758fc2010-04-16 21:15:15 +00001714 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001715 case ENCODING_Ia:
Sean Callanan010b3732010-04-02 21:23:51 +00001716 if (readImmediate(insn, insn->addressSize))
1717 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001718 break;
1719 case ENCODING_RB:
Sean Callanan010b3732010-04-02 21:23:51 +00001720 if (readOpcodeRegister(insn, 1))
1721 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001722 break;
1723 case ENCODING_RW:
Sean Callanan010b3732010-04-02 21:23:51 +00001724 if (readOpcodeRegister(insn, 2))
1725 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001726 break;
1727 case ENCODING_RD:
Sean Callanan010b3732010-04-02 21:23:51 +00001728 if (readOpcodeRegister(insn, 4))
1729 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001730 break;
1731 case ENCODING_RO:
Sean Callanan010b3732010-04-02 21:23:51 +00001732 if (readOpcodeRegister(insn, 8))
1733 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001734 break;
1735 case ENCODING_Rv:
Sean Callanan010b3732010-04-02 21:23:51 +00001736 if (readOpcodeRegister(insn, 0))
1737 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001738 break;
Craig Topper623b0d62014-01-01 14:22:37 +00001739 case ENCODING_FP:
Sean Callananc3fd5232011-03-15 01:23:15 +00001740 break;
1741 case ENCODING_VVVV:
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001742 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1743 if (!hasVVVV)
Sean Callananc3fd5232011-03-15 01:23:15 +00001744 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001745 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
Sean Callananc3fd5232011-03-15 01:23:15 +00001746 return -1;
1747 break;
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001748 case ENCODING_WRITEMASK:
1749 if (readMaskRegister(insn))
1750 return -1;
1751 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001752 case ENCODING_DUP:
1753 break;
1754 default:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001755 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan04cc3072009-12-19 02:59:52 +00001756 return -1;
1757 }
1758 }
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001759
1760 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1761 if (needVVVV) return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001762
Sean Callanan04cc3072009-12-19 02:59:52 +00001763 return 0;
1764}
1765
1766/*
1767 * decodeInstruction - Reads and interprets a full instruction provided by the
1768 * user.
1769 *
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001770 * @param insn - A pointer to the instruction to be populated. Must be
Sean Callanan04cc3072009-12-19 02:59:52 +00001771 * pre-allocated.
1772 * @param reader - The function to be used to read the instruction's bytes.
1773 * @param readerArg - A generic argument to be passed to the reader to store
1774 * any internal state.
1775 * @param logger - If non-NULL, the function to be used to write log messages
1776 * and warnings.
1777 * @param loggerArg - A generic argument to be passed to the logger to store
1778 * any internal state.
1779 * @param startLoc - The address (in the reader's address space) of the first
1780 * byte in the instruction.
1781 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1782 * decode the instruction in.
1783 * @return - 0 if the instruction's memory could be read; nonzero if
1784 * not.
1785 */
Richard Smith89ee75d2014-04-20 21:07:34 +00001786int llvm::X86Disassembler::decodeInstruction(
1787 struct InternalInstruction *insn, byteReader_t reader,
1788 const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
1789 uint64_t startLoc, DisassemblerMode mode) {
Daniel Dunbarc745a622009-12-19 03:31:50 +00001790 memset(insn, 0, sizeof(struct InternalInstruction));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001791
Sean Callanan04cc3072009-12-19 02:59:52 +00001792 insn->reader = reader;
1793 insn->readerArg = readerArg;
1794 insn->dlog = logger;
1795 insn->dlogArg = loggerArg;
1796 insn->startLocation = startLoc;
1797 insn->readerCursor = startLoc;
1798 insn->mode = mode;
1799 insn->numImmediatesConsumed = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001800
Sean Callanan04cc3072009-12-19 02:59:52 +00001801 if (readPrefixes(insn) ||
1802 readOpcode(insn) ||
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001803 getID(insn, miiArg) ||
Sean Callanan04cc3072009-12-19 02:59:52 +00001804 insn->instructionID == 0 ||
1805 readOperands(insn))
1806 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001807
1808 insn->operands = &x86OperandSets[insn->spec->operands][0];
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001809
Sean Callanan04cc3072009-12-19 02:59:52 +00001810 insn->length = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001811
Benjamin Kramer4f672272010-03-18 12:18:36 +00001812 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1813 startLoc, insn->readerCursor, insn->length);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001814
Sean Callanan04cc3072009-12-19 02:59:52 +00001815 if (insn->length > 15)
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001816 dbgprintf(insn, "Instruction exceeds 15-byte limit");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001817
Sean Callanan04cc3072009-12-19 02:59:52 +00001818 return 0;
1819}