blob: 070b365fd4895462f7f0c8b97bbfb2a940899a42 [file] [log] [blame]
Richard Smith89ee75d2014-04-20 21:07:34 +00001//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains the implementation of the instruction decoder.
12// Documentation for the disassembler can be found in X86Disassembler.h.
13//
14//===----------------------------------------------------------------------===//
Sean Callanan04cc3072009-12-19 02:59:52 +000015
Sean Callanan04cc3072009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbarc745a622009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan04cc3072009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
Richard Smith89ee75d2014-04-20 21:07:34 +000023using namespace llvm::X86Disassembler;
24
Richard Smithac15f1c2014-04-20 21:52:16 +000025/// Specifies whether a ModR/M byte is needed and (if so) which
26/// instruction each possible value of the ModR/M byte corresponds to. Once
27/// this information is known, we have narrowed down to a single instruction.
28struct ModRMDecision {
29 uint8_t modrm_type;
30 uint16_t instructionIDs;
31};
32
33/// Specifies which set of ModR/M->instruction tables to look at
34/// given a particular opcode.
35struct OpcodeDecision {
36 ModRMDecision modRMDecisions[256];
37};
38
39/// Specifies which opcode->instruction tables to look at given
40/// a particular context (set of attributes). Since there are many possible
41/// contexts, the decoder first uses CONTEXTS_SYM to determine which context
42/// applies given a specific set of attributes. Hence there are only IC_max
43/// entries in this table, rather than 2^(ATTR_max).
44struct ContextDecision {
45 OpcodeDecision opcodeDecisions[IC_max];
46};
47
Sean Callanan04cc3072009-12-19 02:59:52 +000048#include "X86GenDisassemblerTables.inc"
49
50#define TRUE 1
51#define FALSE 0
52
Sean Callanan010b3732010-04-02 21:23:51 +000053#ifndef NDEBUG
Richard Smith89ee75d2014-04-20 21:07:34 +000054#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
Sean Callanan010b3732010-04-02 21:23:51 +000055#else
56#define debug(s) do { } while (0)
57#endif
58
Sean Callanan04cc3072009-12-19 02:59:52 +000059
60/*
61 * contextForAttrs - Client for the instruction context table. Takes a set of
62 * attributes and returns the appropriate decode context.
63 *
64 * @param attrMask - Attributes, from the enumeration attributeBits.
65 * @return - The InstructionContext to use when looking up an
66 * an instruction with these attributes.
67 */
Elena Demikhovsky371e3632013-12-25 11:40:51 +000068static InstructionContext contextForAttrs(uint16_t attrMask) {
Richard Smith89ee75d2014-04-20 21:07:34 +000069 return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
Sean Callanan04cc3072009-12-19 02:59:52 +000070}
71
72/*
73 * modRMRequired - Reads the appropriate instruction table to determine whether
74 * the ModR/M byte is required to decode a particular instruction.
75 *
76 * @param type - The opcode type (i.e., how many bytes it has).
77 * @param insnContext - The context for the instruction, as returned by
78 * contextForAttrs.
79 * @param opcode - The last byte of the instruction's opcode, not counting
80 * ModR/M extensions and escapes.
81 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
82 */
Sean Callanan588785c2009-12-22 22:51:40 +000083static int modRMRequired(OpcodeType type,
Craig Topper21c33652011-10-02 16:56:09 +000084 InstructionContext insnContext,
Elena Demikhovsky371e3632013-12-25 11:40:51 +000085 uint16_t opcode) {
Daniel Dunbar8b532de2009-12-22 01:41:37 +000086 const struct ContextDecision* decision = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +000087
Sean Callanan04cc3072009-12-19 02:59:52 +000088 switch (type) {
89 case ONEBYTE:
90 decision = &ONEBYTE_SYM;
91 break;
92 case TWOBYTE:
93 decision = &TWOBYTE_SYM;
94 break;
95 case THREEBYTE_38:
96 decision = &THREEBYTE38_SYM;
97 break;
98 case THREEBYTE_3A:
99 decision = &THREEBYTE3A_SYM;
100 break;
Craig Topper9e3e38a2013-10-03 05:17:48 +0000101 case XOP8_MAP:
102 decision = &XOP8_MAP_SYM;
103 break;
104 case XOP9_MAP:
105 decision = &XOP9_MAP_SYM;
106 break;
107 case XOPA_MAP:
108 decision = &XOPA_MAP_SYM;
109 break;
Sean Callanan04cc3072009-12-19 02:59:52 +0000110 }
Ahmed Charles636a3d62012-02-19 11:37:01 +0000111
Sean Callanan04cc3072009-12-19 02:59:52 +0000112 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
113 modrm_type != MODRM_ONEENTRY;
Sean Callanan04cc3072009-12-19 02:59:52 +0000114}
115
116/*
117 * decode - Reads the appropriate instruction table to obtain the unique ID of
118 * an instruction.
119 *
120 * @param type - See modRMRequired().
121 * @param insnContext - See modRMRequired().
122 * @param opcode - See modRMRequired().
123 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanan010b3732010-04-02 21:23:51 +0000124 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan04cc3072009-12-19 02:59:52 +0000125 */
Sean Callanan588785c2009-12-22 22:51:40 +0000126static InstrUID decode(OpcodeType type,
Sean Callanan010b3732010-04-02 21:23:51 +0000127 InstructionContext insnContext,
128 uint8_t opcode,
129 uint8_t modRM) {
Duncan Sandsae22c602012-02-05 14:20:11 +0000130 const struct ModRMDecision* dec = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000131
Sean Callanan04cc3072009-12-19 02:59:52 +0000132 switch (type) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000133 case ONEBYTE:
134 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
135 break;
136 case TWOBYTE:
137 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
138 break;
139 case THREEBYTE_38:
140 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
141 break;
142 case THREEBYTE_3A:
143 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
144 break;
Craig Topper9e3e38a2013-10-03 05:17:48 +0000145 case XOP8_MAP:
146 dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
147 break;
148 case XOP9_MAP:
149 dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
150 break;
151 case XOPA_MAP:
152 dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
153 break;
Sean Callanan04cc3072009-12-19 02:59:52 +0000154 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000155
Sean Callanan04cc3072009-12-19 02:59:52 +0000156 switch (dec->modrm_type) {
157 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000158 debug("Corrupt table! Unknown modrm_type");
159 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +0000160 case MODRM_ONEENTRY:
Craig Topper487e7442012-02-09 07:45:30 +0000161 return modRMTable[dec->instructionIDs];
Sean Callanan04cc3072009-12-19 02:59:52 +0000162 case MODRM_SPLITRM:
163 if (modFromModRM(modRM) == 0x3)
Craig Topper487e7442012-02-09 07:45:30 +0000164 return modRMTable[dec->instructionIDs+1];
165 return modRMTable[dec->instructionIDs];
Craig Toppera0cd9702012-02-09 08:58:07 +0000166 case MODRM_SPLITREG:
167 if (modFromModRM(modRM) == 0x3)
168 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
169 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Craig Topper963305b2012-09-13 05:45:42 +0000170 case MODRM_SPLITMISC:
171 if (modFromModRM(modRM) == 0x3)
172 return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8];
173 return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
Sean Callanan04cc3072009-12-19 02:59:52 +0000174 case MODRM_FULL:
Craig Topper487e7442012-02-09 07:45:30 +0000175 return modRMTable[dec->instructionIDs+modRM];
Sean Callanan04cc3072009-12-19 02:59:52 +0000176 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000177}
178
179/*
180 * specifierForUID - Given a UID, returns the name and operand specification for
181 * that instruction.
182 *
183 * @param uid - The unique ID for the instruction. This should be returned by
184 * decode(); specifierForUID will not check bounds.
185 * @return - A pointer to the specification for that instruction.
186 */
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +0000187static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000188 return &INSTRUCTIONS_SYM[uid];
189}
190
191/*
192 * consumeByte - Uses the reader function provided by the user to consume one
193 * byte from the instruction's memory and advance the cursor.
194 *
195 * @param insn - The instruction with the reader function to use. The cursor
196 * for this instruction is advanced.
197 * @param byte - A pointer to a pre-allocated memory buffer to be populated
198 * with the data read.
199 * @return - 0 if the read was successful; nonzero otherwise.
200 */
Sean Callanan588785c2009-12-22 22:51:40 +0000201static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000202 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000203
Sean Callanan04cc3072009-12-19 02:59:52 +0000204 if (!ret)
205 ++(insn->readerCursor);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000206
Sean Callanan04cc3072009-12-19 02:59:52 +0000207 return ret;
208}
209
210/*
211 * lookAtByte - Like consumeByte, but does not advance the cursor.
212 *
213 * @param insn - See consumeByte().
214 * @param byte - See consumeByte().
215 * @return - See consumeByte().
216 */
Sean Callanan588785c2009-12-22 22:51:40 +0000217static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000218 return insn->reader(insn->readerArg, byte, insn->readerCursor);
219}
220
Sean Callanan588785c2009-12-22 22:51:40 +0000221static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000222 insn->readerCursor--;
223}
224
Sean Callanan588785c2009-12-22 22:51:40 +0000225#define CONSUME_FUNC(name, type) \
226 static int name(struct InternalInstruction* insn, type* ptr) { \
227 type combined = 0; \
228 unsigned offset; \
229 for (offset = 0; offset < sizeof(type); ++offset) { \
230 uint8_t byte; \
231 int ret = insn->reader(insn->readerArg, \
232 &byte, \
233 insn->readerCursor + offset); \
234 if (ret) \
235 return ret; \
Richard Smith228e6d42012-08-24 23:29:28 +0000236 combined = combined | ((uint64_t)byte << (offset * 8)); \
Sean Callanan588785c2009-12-22 22:51:40 +0000237 } \
238 *ptr = combined; \
239 insn->readerCursor += sizeof(type); \
240 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +0000241 }
242
243/*
244 * consume* - Use the reader function provided by the user to consume data
245 * values of various sizes from the instruction's memory and advance the
246 * cursor appropriately. These readers perform endian conversion.
247 *
248 * @param insn - See consumeByte().
249 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
250 * be populated with the data read.
251 * @return - See consumeByte().
252 */
253CONSUME_FUNC(consumeInt8, int8_t)
254CONSUME_FUNC(consumeInt16, int16_t)
255CONSUME_FUNC(consumeInt32, int32_t)
256CONSUME_FUNC(consumeUInt16, uint16_t)
257CONSUME_FUNC(consumeUInt32, uint32_t)
258CONSUME_FUNC(consumeUInt64, uint64_t)
259
260/*
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000261 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan04cc3072009-12-19 02:59:52 +0000262 * message, typically without a carriage-return.
263 *
264 * @param insn - The instruction containing the logging function.
265 * @param format - See printf().
266 * @param ... - See printf().
267 */
Sean Callanan588785c2009-12-22 22:51:40 +0000268static void dbgprintf(struct InternalInstruction* insn,
269 const char* format,
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000270 ...) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000271 char buffer[256];
272 va_list ap;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000273
Sean Callanan04cc3072009-12-19 02:59:52 +0000274 if (!insn->dlog)
275 return;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000276
Sean Callanan04cc3072009-12-19 02:59:52 +0000277 va_start(ap, format);
278 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
279 va_end(ap);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000280
Sean Callanan04cc3072009-12-19 02:59:52 +0000281 insn->dlog(insn->dlogArg, buffer);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000282
Sean Callanan04cc3072009-12-19 02:59:52 +0000283 return;
284}
285
286/*
287 * setPrefixPresent - Marks that a particular prefix is present at a particular
288 * location.
289 *
290 * @param insn - The instruction to be marked as having the prefix.
291 * @param prefix - The prefix that is present.
292 * @param location - The location where the prefix is located (in the address
293 * space of the instruction's reader).
294 */
Sean Callanan588785c2009-12-22 22:51:40 +0000295static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000296 uint8_t prefix,
297 uint64_t location)
298{
299 insn->prefixPresent[prefix] = 1;
300 insn->prefixLocations[prefix] = location;
301}
302
303/*
304 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
305 * present at a given location.
306 *
307 * @param insn - The instruction to be queried.
308 * @param prefix - The prefix.
309 * @param location - The location to query.
310 * @return - Whether the prefix is at that location.
311 */
Sean Callanan588785c2009-12-22 22:51:40 +0000312static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
313 uint8_t prefix,
314 uint64_t location)
Sean Callanan04cc3072009-12-19 02:59:52 +0000315{
316 if (insn->prefixPresent[prefix] == 1 &&
317 insn->prefixLocations[prefix] == location)
318 return TRUE;
319 else
320 return FALSE;
321}
322
323/*
324 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
325 * instruction as having them. Also sets the instruction's default operand,
326 * address, and other relevant data sizes to report operands correctly.
327 *
328 * @param insn - The instruction whose prefixes are to be read.
329 * @return - 0 if the instruction could be read until the end of the prefix
330 * bytes, and no prefixes conflicted; nonzero otherwise.
331 */
332static int readPrefixes(struct InternalInstruction* insn) {
333 BOOL isPrefix = TRUE;
334 BOOL prefixGroups[4] = { FALSE };
335 uint64_t prefixLocation;
Ted Kremenek3c4408c2011-01-23 17:05:06 +0000336 uint8_t byte = 0;
Richard Mitton79917a92013-08-30 21:32:42 +0000337 uint8_t nextByte;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000338
Sean Callanan04cc3072009-12-19 02:59:52 +0000339 BOOL hasAdSize = FALSE;
340 BOOL hasOpSize = FALSE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000341
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000342 dbgprintf(insn, "readPrefixes()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000343
Sean Callanan04cc3072009-12-19 02:59:52 +0000344 while (isPrefix) {
345 prefixLocation = insn->readerCursor;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000346
Richard Mitton576ee002013-08-30 21:19:48 +0000347 /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */
Sean Callanan04cc3072009-12-19 02:59:52 +0000348 if (consumeByte(insn, &byte))
Richard Mitton576ee002013-08-30 21:19:48 +0000349 break;
Kevin Enderby014e1cd2012-03-09 17:52:49 +0000350
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000351 /*
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000352 * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
353 * break and let it be disassembled as a normal "instruction".
Benjamin Krameradfc73d2012-03-10 15:10:06 +0000354 */
Richard Mitton576ee002013-08-30 21:19:48 +0000355 if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
356 break;
357
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000358 if (insn->readerCursor - 1 == insn->startLocation
Richard Mitton576ee002013-08-30 21:19:48 +0000359 && (byte == 0xf2 || byte == 0xf3)
360 && !lookAtByte(insn, &nextByte))
361 {
Kevin Enderby35fd7922013-06-20 22:32:18 +0000362 /*
363 * If the byte is 0xf2 or 0xf3, and any of the following conditions are
364 * met:
365 * - it is followed by a LOCK (0xf0) prefix
366 * - it is followed by an xchg instruction
367 * then it should be disassembled as a xacquire/xrelease not repne/rep.
368 */
369 if ((byte == 0xf2 || byte == 0xf3) &&
370 ((nextByte == 0xf0) |
371 ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
372 insn->xAcquireRelease = TRUE;
373 /*
374 * Also if the byte is 0xf3, and the following condition is met:
375 * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
376 * "mov mem, imm" (opcode 0xc6/0xc7) instructions.
377 * then it should be disassembled as an xrelease not rep.
378 */
379 if (byte == 0xf3 &&
380 (nextByte == 0x88 || nextByte == 0x89 ||
381 nextByte == 0xc6 || nextByte == 0xc7))
382 insn->xAcquireRelease = TRUE;
Dave Zarzycki07fabee2013-03-25 18:59:38 +0000383 if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
384 if (consumeByte(insn, &nextByte))
385 return -1;
386 if (lookAtByte(insn, &nextByte))
387 return -1;
388 unconsumeByte(insn);
389 }
390 if (nextByte != 0x0f && nextByte != 0x90)
391 break;
392 }
393
Sean Callanan04cc3072009-12-19 02:59:52 +0000394 switch (byte) {
395 case 0xf0: /* LOCK */
396 case 0xf2: /* REPNE/REPNZ */
397 case 0xf3: /* REP or REPE/REPZ */
398 if (prefixGroups[0])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000399 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000400 prefixGroups[0] = TRUE;
401 setPrefixPresent(insn, byte, prefixLocation);
402 break;
403 case 0x2e: /* CS segment override -OR- Branch not taken */
404 case 0x36: /* SS segment override -OR- Branch taken */
405 case 0x3e: /* DS segment override */
406 case 0x26: /* ES segment override */
407 case 0x64: /* FS segment override */
408 case 0x65: /* GS segment override */
409 switch (byte) {
410 case 0x2e:
411 insn->segmentOverride = SEG_OVERRIDE_CS;
412 break;
413 case 0x36:
414 insn->segmentOverride = SEG_OVERRIDE_SS;
415 break;
416 case 0x3e:
417 insn->segmentOverride = SEG_OVERRIDE_DS;
418 break;
419 case 0x26:
420 insn->segmentOverride = SEG_OVERRIDE_ES;
421 break;
422 case 0x64:
423 insn->segmentOverride = SEG_OVERRIDE_FS;
424 break;
425 case 0x65:
426 insn->segmentOverride = SEG_OVERRIDE_GS;
427 break;
428 default:
Sean Callanan010b3732010-04-02 21:23:51 +0000429 debug("Unhandled override");
430 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +0000431 }
432 if (prefixGroups[1])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000433 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000434 prefixGroups[1] = TRUE;
435 setPrefixPresent(insn, byte, prefixLocation);
436 break;
437 case 0x66: /* Operand-size override */
438 if (prefixGroups[2])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000439 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000440 prefixGroups[2] = TRUE;
441 hasOpSize = TRUE;
442 setPrefixPresent(insn, byte, prefixLocation);
443 break;
444 case 0x67: /* Address-size override */
445 if (prefixGroups[3])
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000446 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan04cc3072009-12-19 02:59:52 +0000447 prefixGroups[3] = TRUE;
448 hasAdSize = TRUE;
449 setPrefixPresent(insn, byte, prefixLocation);
450 break;
451 default: /* Not a prefix byte */
452 isPrefix = FALSE;
453 break;
454 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000455
Sean Callanan04cc3072009-12-19 02:59:52 +0000456 if (isPrefix)
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000457 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan04cc3072009-12-19 02:59:52 +0000458 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000459
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000460 insn->vectorExtensionType = TYPE_NO_VEX_XOP;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000461
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000462 if (byte == 0x62) {
463 uint8_t byte1, byte2;
464
465 if (consumeByte(insn, &byte1)) {
466 dbgprintf(insn, "Couldn't read second byte of EVEX prefix");
467 return -1;
468 }
469
470 if (lookAtByte(insn, &byte2)) {
471 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
472 return -1;
473 }
474
475 if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
476 ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
477 insn->vectorExtensionType = TYPE_EVEX;
478 }
479 else {
480 unconsumeByte(insn); /* unconsume byte1 */
481 unconsumeByte(insn); /* unconsume byte */
482 insn->necessaryPrefixLocation = insn->readerCursor - 2;
483 }
484
485 if (insn->vectorExtensionType == TYPE_EVEX) {
486 insn->vectorExtensionPrefix[0] = byte;
487 insn->vectorExtensionPrefix[1] = byte1;
488 if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) {
489 dbgprintf(insn, "Couldn't read third byte of EVEX prefix");
490 return -1;
491 }
492 if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) {
493 dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix");
494 return -1;
495 }
496
497 /* We simulate the REX prefix for simplicity's sake */
498 if (insn->mode == MODE_64BIT) {
499 insn->rexPrefix = 0x40
500 | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3)
501 | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2)
502 | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1)
503 | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
504 }
505
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000506 dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
507 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
508 insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]);
509 }
510 }
511 else if (byte == 0xc4) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000512 uint8_t byte1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000513
Sean Callananc3fd5232011-03-15 01:23:15 +0000514 if (lookAtByte(insn, &byte1)) {
515 dbgprintf(insn, "Couldn't read second byte of VEX");
516 return -1;
517 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000518
Craig Topper45faba92011-09-26 05:12:43 +0000519 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000520 insn->vectorExtensionType = TYPE_VEX_3B;
Sean Callananc3fd5232011-03-15 01:23:15 +0000521 insn->necessaryPrefixLocation = insn->readerCursor - 1;
522 }
523 else {
Sean Callanan04cc3072009-12-19 02:59:52 +0000524 unconsumeByte(insn);
525 insn->necessaryPrefixLocation = insn->readerCursor - 1;
526 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000527
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000528 if (insn->vectorExtensionType == TYPE_VEX_3B) {
529 insn->vectorExtensionPrefix[0] = byte;
530 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
531 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +0000532
533 /* We simulate the REX prefix for simplicity's sake */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000534
Craig Topper31854ba2011-10-03 07:51:09 +0000535 if (insn->mode == MODE_64BIT) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000536 insn->rexPrefix = 0x40
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000537 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3)
538 | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2)
539 | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1)
540 | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
Craig Topper31854ba2011-10-03 07:51:09 +0000541 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000542
Craig Topper9e3e38a2013-10-03 05:17:48 +0000543 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000544 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
545 insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +0000546 }
Sean Callanan04cc3072009-12-19 02:59:52 +0000547 }
Sean Callananc3fd5232011-03-15 01:23:15 +0000548 else if (byte == 0xc5) {
549 uint8_t byte1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000550
Sean Callananc3fd5232011-03-15 01:23:15 +0000551 if (lookAtByte(insn, &byte1)) {
552 dbgprintf(insn, "Couldn't read second byte of VEX");
553 return -1;
554 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000555
Craig Topper45faba92011-09-26 05:12:43 +0000556 if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000557 insn->vectorExtensionType = TYPE_VEX_2B;
Sean Callananc3fd5232011-03-15 01:23:15 +0000558 }
559 else {
560 unconsumeByte(insn);
561 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000562
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000563 if (insn->vectorExtensionType == TYPE_VEX_2B) {
564 insn->vectorExtensionPrefix[0] = byte;
565 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000566
Craig Topper31854ba2011-10-03 07:51:09 +0000567 if (insn->mode == MODE_64BIT) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000568 insn->rexPrefix = 0x40
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000569 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
Craig Topper31854ba2011-10-03 07:51:09 +0000570 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000571
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000572 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))
Sean Callananc3fd5232011-03-15 01:23:15 +0000573 {
574 default:
575 break;
576 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000577 hasOpSize = TRUE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000578 break;
579 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000580
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000581 dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx",
582 insn->vectorExtensionPrefix[0],
583 insn->vectorExtensionPrefix[1]);
Craig Topper9e3e38a2013-10-03 05:17:48 +0000584 }
585 }
586 else if (byte == 0x8f) {
587 uint8_t byte1;
588
589 if (lookAtByte(insn, &byte1)) {
590 dbgprintf(insn, "Couldn't read second byte of XOP");
591 return -1;
592 }
593
Craig Topper9eb88372013-10-03 06:29:59 +0000594 if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000595 insn->vectorExtensionType = TYPE_XOP;
Craig Topper9e3e38a2013-10-03 05:17:48 +0000596 insn->necessaryPrefixLocation = insn->readerCursor - 1;
597 }
598 else {
599 unconsumeByte(insn);
600 insn->necessaryPrefixLocation = insn->readerCursor - 1;
601 }
602
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000603 if (insn->vectorExtensionType == TYPE_XOP) {
604 insn->vectorExtensionPrefix[0] = byte;
605 consumeByte(insn, &insn->vectorExtensionPrefix[1]);
606 consumeByte(insn, &insn->vectorExtensionPrefix[2]);
Craig Topper9e3e38a2013-10-03 05:17:48 +0000607
608 /* We simulate the REX prefix for simplicity's sake */
609
610 if (insn->mode == MODE_64BIT) {
611 insn->rexPrefix = 0x40
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000612 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3)
613 | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2)
614 | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1)
615 | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
Craig Topper9e3e38a2013-10-03 05:17:48 +0000616 }
617
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000618 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))
Craig Topper9e3e38a2013-10-03 05:17:48 +0000619 {
620 default:
621 break;
622 case VEX_PREFIX_66:
623 hasOpSize = TRUE;
624 break;
625 }
626
627 dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000628 insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
629 insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +0000630 }
631 }
632 else {
633 if (insn->mode == MODE_64BIT) {
634 if ((byte & 0xf0) == 0x40) {
635 uint8_t opcodeByte;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000636
Sean Callananc3fd5232011-03-15 01:23:15 +0000637 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
638 dbgprintf(insn, "Redundant REX prefix");
639 return -1;
640 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000641
Sean Callananc3fd5232011-03-15 01:23:15 +0000642 insn->rexPrefix = byte;
643 insn->necessaryPrefixLocation = insn->readerCursor - 2;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000644
Sean Callananc3fd5232011-03-15 01:23:15 +0000645 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000646 } else {
Sean Callananc3fd5232011-03-15 01:23:15 +0000647 unconsumeByte(insn);
648 insn->necessaryPrefixLocation = insn->readerCursor - 1;
649 }
650 } else {
651 unconsumeByte(insn);
652 insn->necessaryPrefixLocation = insn->readerCursor - 1;
653 }
654 }
655
Sean Callanan04cc3072009-12-19 02:59:52 +0000656 if (insn->mode == MODE_16BIT) {
657 insn->registerSize = (hasOpSize ? 4 : 2);
658 insn->addressSize = (hasAdSize ? 4 : 2);
659 insn->displacementSize = (hasAdSize ? 4 : 2);
660 insn->immediateSize = (hasOpSize ? 4 : 2);
661 } else if (insn->mode == MODE_32BIT) {
662 insn->registerSize = (hasOpSize ? 2 : 4);
663 insn->addressSize = (hasAdSize ? 2 : 4);
664 insn->displacementSize = (hasAdSize ? 2 : 4);
Sean Callanan9f6c6222010-10-22 01:24:11 +0000665 insn->immediateSize = (hasOpSize ? 2 : 4);
Sean Callanan04cc3072009-12-19 02:59:52 +0000666 } else if (insn->mode == MODE_64BIT) {
667 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
668 insn->registerSize = 8;
669 insn->addressSize = (hasAdSize ? 4 : 8);
670 insn->displacementSize = 4;
671 insn->immediateSize = 4;
672 } else if (insn->rexPrefix) {
673 insn->registerSize = (hasOpSize ? 2 : 4);
674 insn->addressSize = (hasAdSize ? 4 : 8);
675 insn->displacementSize = (hasOpSize ? 2 : 4);
676 insn->immediateSize = (hasOpSize ? 2 : 4);
677 } else {
678 insn->registerSize = (hasOpSize ? 2 : 4);
679 insn->addressSize = (hasAdSize ? 4 : 8);
680 insn->displacementSize = (hasOpSize ? 2 : 4);
681 insn->immediateSize = (hasOpSize ? 2 : 4);
682 }
683 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000684
Sean Callanan04cc3072009-12-19 02:59:52 +0000685 return 0;
686}
687
688/*
689 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
690 * extended or escape opcodes).
691 *
692 * @param insn - The instruction whose opcode is to be read.
693 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
694 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000695static int readOpcode(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000696 /* Determine the length of the primary opcode */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000697
Sean Callanan04cc3072009-12-19 02:59:52 +0000698 uint8_t current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000699
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000700 dbgprintf(insn, "readOpcode()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000701
Sean Callanan04cc3072009-12-19 02:59:52 +0000702 insn->opcodeType = ONEBYTE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000703
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000704 if (insn->vectorExtensionType == TYPE_EVEX)
Sean Callananc3fd5232011-03-15 01:23:15 +0000705 {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000706 switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000707 default:
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000708 dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)",
709 mmFromEVEX2of4(insn->vectorExtensionPrefix[1]));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000710 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +0000711 case VEX_LOB_0F:
Sean Callananc3fd5232011-03-15 01:23:15 +0000712 insn->opcodeType = TWOBYTE;
713 return consumeByte(insn, &insn->opcode);
714 case VEX_LOB_0F38:
Sean Callananc3fd5232011-03-15 01:23:15 +0000715 insn->opcodeType = THREEBYTE_38;
716 return consumeByte(insn, &insn->opcode);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000717 case VEX_LOB_0F3A:
Sean Callananc3fd5232011-03-15 01:23:15 +0000718 insn->opcodeType = THREEBYTE_3A;
719 return consumeByte(insn, &insn->opcode);
720 }
721 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000722 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
723 switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
724 default:
725 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
726 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
727 return -1;
728 case VEX_LOB_0F:
729 insn->opcodeType = TWOBYTE;
730 return consumeByte(insn, &insn->opcode);
731 case VEX_LOB_0F38:
732 insn->opcodeType = THREEBYTE_38;
733 return consumeByte(insn, &insn->opcode);
734 case VEX_LOB_0F3A:
735 insn->opcodeType = THREEBYTE_3A;
736 return consumeByte(insn, &insn->opcode);
737 }
738 }
739 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000740 insn->opcodeType = TWOBYTE;
741 return consumeByte(insn, &insn->opcode);
742 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000743 else if (insn->vectorExtensionType == TYPE_XOP) {
744 switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000745 default:
746 dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)",
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000747 mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));
Craig Topper9e3e38a2013-10-03 05:17:48 +0000748 return -1;
749 case XOP_MAP_SELECT_8:
750 insn->opcodeType = XOP8_MAP;
751 return consumeByte(insn, &insn->opcode);
752 case XOP_MAP_SELECT_9:
753 insn->opcodeType = XOP9_MAP;
754 return consumeByte(insn, &insn->opcode);
755 case XOP_MAP_SELECT_A:
756 insn->opcodeType = XOPA_MAP;
757 return consumeByte(insn, &insn->opcode);
758 }
759 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000760
Sean Callanan04cc3072009-12-19 02:59:52 +0000761 if (consumeByte(insn, &current))
762 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000763
Sean Callanan04cc3072009-12-19 02:59:52 +0000764 if (current == 0x0f) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000765 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000766
Sean Callanan04cc3072009-12-19 02:59:52 +0000767 if (consumeByte(insn, &current))
768 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000769
Sean Callanan04cc3072009-12-19 02:59:52 +0000770 if (current == 0x38) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000771 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000772
Sean Callanan04cc3072009-12-19 02:59:52 +0000773 if (consumeByte(insn, &current))
774 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000775
Sean Callanan04cc3072009-12-19 02:59:52 +0000776 insn->opcodeType = THREEBYTE_38;
777 } else if (current == 0x3a) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000778 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000779
Sean Callanan04cc3072009-12-19 02:59:52 +0000780 if (consumeByte(insn, &current))
781 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000782
Sean Callanan04cc3072009-12-19 02:59:52 +0000783 insn->opcodeType = THREEBYTE_3A;
784 } else {
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000785 dbgprintf(insn, "Didn't find a three-byte escape prefix");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000786
Sean Callanan04cc3072009-12-19 02:59:52 +0000787 insn->opcodeType = TWOBYTE;
788 }
789 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000790
Sean Callanan04cc3072009-12-19 02:59:52 +0000791 /*
792 * At this point we have consumed the full opcode.
793 * Anything we consume from here on must be unconsumed.
794 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000795
Sean Callanan04cc3072009-12-19 02:59:52 +0000796 insn->opcode = current;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000797
Sean Callanan04cc3072009-12-19 02:59:52 +0000798 return 0;
799}
800
801static int readModRM(struct InternalInstruction* insn);
802
803/*
804 * getIDWithAttrMask - Determines the ID of an instruction, consuming
805 * the ModR/M byte as appropriate for extended and escape opcodes,
806 * and using a supplied attribute mask.
807 *
808 * @param instructionID - A pointer whose target is filled in with the ID of the
809 * instruction.
810 * @param insn - The instruction whose ID is to be determined.
811 * @param attrMask - The attribute mask to search.
812 * @return - 0 if the ModR/M could be read when needed or was not
813 * needed; nonzero otherwise.
814 */
815static int getIDWithAttrMask(uint16_t* instructionID,
816 struct InternalInstruction* insn,
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000817 uint16_t attrMask) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000818 BOOL hasModRMExtension;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000819
Richard Smith89ee75d2014-04-20 21:07:34 +0000820 InstructionContext instructionClass = contextForAttrs(attrMask);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000821
Sean Callanan04cc3072009-12-19 02:59:52 +0000822 hasModRMExtension = modRMRequired(insn->opcodeType,
823 instructionClass,
824 insn->opcode);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000825
Sean Callanan04cc3072009-12-19 02:59:52 +0000826 if (hasModRMExtension) {
Rafael Espindola9f9a1062011-01-06 16:48:42 +0000827 if (readModRM(insn))
828 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000829
Sean Callanan04cc3072009-12-19 02:59:52 +0000830 *instructionID = decode(insn->opcodeType,
831 instructionClass,
832 insn->opcode,
833 insn->modRM);
834 } else {
835 *instructionID = decode(insn->opcodeType,
836 instructionClass,
837 insn->opcode,
838 0);
839 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000840
Sean Callanan04cc3072009-12-19 02:59:52 +0000841 return 0;
842}
843
844/*
845 * is16BitEquivalent - Determines whether two instruction names refer to
846 * equivalent instructions but one is 16-bit whereas the other is not.
847 *
848 * @param orig - The instruction that is not 16-bit
849 * @param equiv - The instruction that is 16-bit
850 */
Joerg Sonnenberger2b86e482012-10-29 17:56:15 +0000851static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
Sean Callanan04cc3072009-12-19 02:59:52 +0000852 off_t i;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000853
Sean Callanan010b3732010-04-02 21:23:51 +0000854 for (i = 0;; i++) {
855 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000856 return TRUE;
Sean Callanan010b3732010-04-02 21:23:51 +0000857 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan04cc3072009-12-19 02:59:52 +0000858 return FALSE;
Sean Callanan010b3732010-04-02 21:23:51 +0000859 if (orig[i] != equiv[i]) {
860 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan04cc3072009-12-19 02:59:52 +0000861 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000862 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan04cc3072009-12-19 02:59:52 +0000863 continue;
Sean Callanan010b3732010-04-02 21:23:51 +0000864 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan04cc3072009-12-19 02:59:52 +0000865 continue;
866 return FALSE;
867 }
868 }
869}
870
871/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000872 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
873 * appropriate for extended and escape opcodes. Determines the attributes and
Sean Callanan04cc3072009-12-19 02:59:52 +0000874 * context for the instruction before doing so.
875 *
876 * @param insn - The instruction whose ID is to be determined.
877 * @return - 0 if the ModR/M could be read when needed or was not needed;
878 * nonzero otherwise.
879 */
Roman Divacky67923802012-09-05 21:17:34 +0000880static int getID(struct InternalInstruction* insn, const void *miiArg) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000881 uint16_t attrMask;
Sean Callanan04cc3072009-12-19 02:59:52 +0000882 uint16_t instructionID;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000883
Nuno Lopes3ed6d602009-12-19 12:07:00 +0000884 dbgprintf(insn, "getID()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000885
Sean Callanan04cc3072009-12-19 02:59:52 +0000886 attrMask = ATTR_NONE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000887
Sean Callanan04cc3072009-12-19 02:59:52 +0000888 if (insn->mode == MODE_64BIT)
889 attrMask |= ATTR_64BIT;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000890
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000891 if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
892 attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
Sean Callananc3fd5232011-03-15 01:23:15 +0000893
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000894 if (insn->vectorExtensionType == TYPE_EVEX) {
895 switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000896 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000897 attrMask |= ATTR_OPSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000898 break;
899 case VEX_PREFIX_F3:
900 attrMask |= ATTR_XS;
901 break;
902 case VEX_PREFIX_F2:
903 attrMask |= ATTR_XD;
904 break;
905 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000906
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000907 if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
908 attrMask |= ATTR_EVEXKZ;
909 if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
910 attrMask |= ATTR_EVEXB;
911 if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
912 attrMask |= ATTR_EVEXK;
913 if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
914 attrMask |= ATTR_EVEXL;
915 if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
916 attrMask |= ATTR_EVEXL2;
917 }
918 else if (insn->vectorExtensionType == TYPE_VEX_3B) {
919 switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
920 case VEX_PREFIX_66:
921 attrMask |= ATTR_OPSIZE;
922 break;
923 case VEX_PREFIX_F3:
924 attrMask |= ATTR_XS;
925 break;
926 case VEX_PREFIX_F2:
927 attrMask |= ATTR_XD;
928 break;
929 }
930
931 if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
Sean Callananc3fd5232011-03-15 01:23:15 +0000932 attrMask |= ATTR_VEXL;
933 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000934 else if (insn->vectorExtensionType == TYPE_VEX_2B) {
935 switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
Sean Callananc3fd5232011-03-15 01:23:15 +0000936 case VEX_PREFIX_66:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000937 attrMask |= ATTR_OPSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000938 break;
939 case VEX_PREFIX_F3:
940 attrMask |= ATTR_XS;
941 break;
942 case VEX_PREFIX_F2:
943 attrMask |= ATTR_XD;
944 break;
945 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +0000946
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000947 if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
Craig Topper9e3e38a2013-10-03 05:17:48 +0000948 attrMask |= ATTR_VEXL;
949 }
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000950 else if (insn->vectorExtensionType == TYPE_XOP) {
951 switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000952 case VEX_PREFIX_66:
953 attrMask |= ATTR_OPSIZE;
954 break;
955 case VEX_PREFIX_F3:
956 attrMask |= ATTR_XS;
957 break;
958 case VEX_PREFIX_F2:
959 attrMask |= ATTR_XD;
960 break;
961 }
962
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000963 if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
Sean Callananc3fd5232011-03-15 01:23:15 +0000964 attrMask |= ATTR_VEXL;
965 }
966 else {
967 return -1;
968 }
969 }
970 else {
David Woodhouse5cf4c672014-01-20 12:02:35 +0000971 if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
Sean Callananc3fd5232011-03-15 01:23:15 +0000972 attrMask |= ATTR_OPSIZE;
Craig Topper6491c802012-02-27 01:54:29 +0000973 else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
974 attrMask |= ATTR_ADSIZE;
Sean Callananc3fd5232011-03-15 01:23:15 +0000975 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
976 attrMask |= ATTR_XS;
977 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
978 attrMask |= ATTR_XD;
Sean Callananc3fd5232011-03-15 01:23:15 +0000979 }
980
Craig Topperf18c8962011-10-04 06:30:42 +0000981 if (insn->rexPrefix & 0x08)
982 attrMask |= ATTR_REXW;
Craig Topperf01f1b52011-11-06 23:04:08 +0000983
Sean Callanan010b3732010-04-02 21:23:51 +0000984 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan04cc3072009-12-19 02:59:52 +0000985 return -1;
Craig Topperf01f1b52011-11-06 23:04:08 +0000986
David Woodhouse9c74fdb2014-01-20 12:02:48 +0000987 /*
988 * JCXZ/JECXZ need special handling for 16-bit mode because the meaning
989 * of the AdSize prefix is inverted w.r.t. 32-bit mode.
990 */
991 if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) {
992 const struct InstructionSpecifier *spec;
993 spec = specifierForUID(instructionID);
994
995 /*
996 * Check for Ii8PCRel instructions. We could alternatively do a
997 * string-compare on the names, but this is probably cheaper.
998 */
999 if (x86OperandSets[spec->operands][0].type == TYPE_REL8) {
1000 attrMask ^= ATTR_ADSIZE;
1001 if (getIDWithAttrMask(&instructionID, insn, attrMask))
1002 return -1;
1003 }
1004 }
1005
Sean Callanan04cc3072009-12-19 02:59:52 +00001006 /* The following clauses compensate for limitations of the tables. */
Craig Topperf01f1b52011-11-06 23:04:08 +00001007
David Woodhouse5cf4c672014-01-20 12:02:35 +00001008 if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) &&
1009 !(attrMask & ATTR_OPSIZE)) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001010 /*
1011 * The instruction tables make no distinction between instructions that
1012 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1013 * particular spot (i.e., many MMX operations). In general we're
1014 * conservative, but in the specific case where OpSize is present but not
1015 * in the right place we check if there's a 16-bit operation.
1016 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001017
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +00001018 const struct InstructionSpecifier *spec;
Sean Callanan04cc3072009-12-19 02:59:52 +00001019 uint16_t instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +00001020 const char *specName, *specWithOpSizeName;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001021
Sean Callanan04cc3072009-12-19 02:59:52 +00001022 spec = specifierForUID(instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001023
Sean Callanan04cc3072009-12-19 02:59:52 +00001024 if (getIDWithAttrMask(&instructionIDWithOpsize,
1025 insn,
1026 attrMask | ATTR_OPSIZE)) {
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001027 /*
Sean Callanan04cc3072009-12-19 02:59:52 +00001028 * ModRM required with OpSize but not present; give up and return version
1029 * without OpSize set
1030 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001031
Sean Callanan04cc3072009-12-19 02:59:52 +00001032 insn->instructionID = instructionID;
1033 insn->spec = spec;
1034 return 0;
1035 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001036
Richard Smith89ee75d2014-04-20 21:07:34 +00001037 specName = GetInstrName(instructionID, miiArg);
1038 specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001039
David Woodhouse5cf4c672014-01-20 12:02:35 +00001040 if (is16BitEquivalent(specName, specWithOpSizeName) &&
1041 (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001042 insn->instructionID = instructionIDWithOpsize;
Benjamin Kramer915e3d92012-02-11 16:01:02 +00001043 insn->spec = specifierForUID(instructionIDWithOpsize);
Sean Callanan04cc3072009-12-19 02:59:52 +00001044 } else {
1045 insn->instructionID = instructionID;
1046 insn->spec = spec;
1047 }
1048 return 0;
1049 }
Craig Topper21c33652011-10-02 16:56:09 +00001050
1051 if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1052 insn->rexPrefix & 0x01) {
1053 /*
1054 * NOOP shouldn't decode as NOOP if REX.b is set. Instead
1055 * it should decode as XCHG %r8, %eax.
1056 */
1057
1058 const struct InstructionSpecifier *spec;
1059 uint16_t instructionIDWithNewOpcode;
1060 const struct InstructionSpecifier *specWithNewOpcode;
1061
1062 spec = specifierForUID(instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001063
Craig Topperb58a9662011-10-05 03:29:32 +00001064 /* Borrow opcode from one of the other XCHGar opcodes */
Craig Topper21c33652011-10-02 16:56:09 +00001065 insn->opcode = 0x91;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001066
Craig Topper21c33652011-10-02 16:56:09 +00001067 if (getIDWithAttrMask(&instructionIDWithNewOpcode,
1068 insn,
1069 attrMask)) {
1070 insn->opcode = 0x90;
1071
1072 insn->instructionID = instructionID;
1073 insn->spec = spec;
1074 return 0;
1075 }
1076
1077 specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode);
1078
Craig Topperb58a9662011-10-05 03:29:32 +00001079 /* Change back */
Craig Topper21c33652011-10-02 16:56:09 +00001080 insn->opcode = 0x90;
1081
1082 insn->instructionID = instructionIDWithNewOpcode;
1083 insn->spec = specWithNewOpcode;
1084
1085 return 0;
1086 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001087
Sean Callanan04cc3072009-12-19 02:59:52 +00001088 insn->instructionID = instructionID;
1089 insn->spec = specifierForUID(insn->instructionID);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001090
Sean Callanan04cc3072009-12-19 02:59:52 +00001091 return 0;
1092}
1093
1094/*
1095 * readSIB - Consumes the SIB byte to determine addressing information for an
1096 * instruction.
1097 *
1098 * @param insn - The instruction whose SIB byte is to be read.
1099 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
1100 */
1101static int readSIB(struct InternalInstruction* insn) {
Richard Smith89ee75d2014-04-20 21:07:34 +00001102 SIBIndex sibIndexBase = SIB_INDEX_NONE;
1103 SIBBase sibBaseBase = SIB_BASE_NONE;
Sean Callanan04cc3072009-12-19 02:59:52 +00001104 uint8_t index, base;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001105
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001106 dbgprintf(insn, "readSIB()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001107
Sean Callanan04cc3072009-12-19 02:59:52 +00001108 if (insn->consumedSIB)
1109 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001110
Sean Callanan04cc3072009-12-19 02:59:52 +00001111 insn->consumedSIB = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001112
Sean Callanan04cc3072009-12-19 02:59:52 +00001113 switch (insn->addressSize) {
1114 case 2:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001115 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan04cc3072009-12-19 02:59:52 +00001116 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001117 case 4:
1118 sibIndexBase = SIB_INDEX_EAX;
1119 sibBaseBase = SIB_BASE_EAX;
1120 break;
1121 case 8:
1122 sibIndexBase = SIB_INDEX_RAX;
1123 sibBaseBase = SIB_BASE_RAX;
1124 break;
1125 }
1126
1127 if (consumeByte(insn, &insn->sib))
1128 return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001129
Sean Callanan04cc3072009-12-19 02:59:52 +00001130 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001131 if (insn->vectorExtensionType == TYPE_EVEX)
1132 index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001133
Sean Callanan04cc3072009-12-19 02:59:52 +00001134 switch (index) {
1135 case 0x4:
1136 insn->sibIndex = SIB_INDEX_NONE;
1137 break;
1138 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +00001139 insn->sibIndex = (SIBIndex)(sibIndexBase + index);
Sean Callanan04cc3072009-12-19 02:59:52 +00001140 if (insn->sibIndex == SIB_INDEX_sib ||
1141 insn->sibIndex == SIB_INDEX_sib64)
1142 insn->sibIndex = SIB_INDEX_NONE;
1143 break;
1144 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001145
Sean Callanan04cc3072009-12-19 02:59:52 +00001146 switch (scaleFromSIB(insn->sib)) {
1147 case 0:
1148 insn->sibScale = 1;
1149 break;
1150 case 1:
1151 insn->sibScale = 2;
1152 break;
1153 case 2:
1154 insn->sibScale = 4;
1155 break;
1156 case 3:
1157 insn->sibScale = 8;
1158 break;
1159 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001160
Sean Callanan04cc3072009-12-19 02:59:52 +00001161 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001162
Sean Callanan04cc3072009-12-19 02:59:52 +00001163 switch (base) {
1164 case 0x5:
Craig Topperfae5ac22014-02-17 10:03:43 +00001165 case 0xd:
Sean Callanan04cc3072009-12-19 02:59:52 +00001166 switch (modFromModRM(insn->modRM)) {
1167 case 0x0:
1168 insn->eaDisplacement = EA_DISP_32;
1169 insn->sibBase = SIB_BASE_NONE;
1170 break;
1171 case 0x1:
1172 insn->eaDisplacement = EA_DISP_8;
Craig Topperfae5ac22014-02-17 10:03:43 +00001173 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001174 break;
1175 case 0x2:
1176 insn->eaDisplacement = EA_DISP_32;
Craig Topperfae5ac22014-02-17 10:03:43 +00001177 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001178 break;
1179 case 0x3:
Sean Callanan010b3732010-04-02 21:23:51 +00001180 debug("Cannot have Mod = 0b11 and a SIB byte");
1181 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001182 }
1183 break;
1184 default:
Benjamin Kramer25bddae2011-02-27 18:13:53 +00001185 insn->sibBase = (SIBBase)(sibBaseBase + base);
Sean Callanan04cc3072009-12-19 02:59:52 +00001186 break;
1187 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001188
Sean Callanan04cc3072009-12-19 02:59:52 +00001189 return 0;
1190}
1191
1192/*
1193 * readDisplacement - Consumes the displacement of an instruction.
1194 *
1195 * @param insn - The instruction whose displacement is to be read.
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001196 * @return - 0 if the displacement byte was successfully read; nonzero
Sean Callanan04cc3072009-12-19 02:59:52 +00001197 * otherwise.
1198 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001199static int readDisplacement(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001200 int8_t d8;
1201 int16_t d16;
1202 int32_t d32;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001203
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001204 dbgprintf(insn, "readDisplacement()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001205
Sean Callanan04cc3072009-12-19 02:59:52 +00001206 if (insn->consumedDisplacement)
1207 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001208
Sean Callanan04cc3072009-12-19 02:59:52 +00001209 insn->consumedDisplacement = TRUE;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001210 insn->displacementOffset = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001211
Sean Callanan04cc3072009-12-19 02:59:52 +00001212 switch (insn->eaDisplacement) {
1213 case EA_DISP_NONE:
1214 insn->consumedDisplacement = FALSE;
1215 break;
1216 case EA_DISP_8:
1217 if (consumeInt8(insn, &d8))
1218 return -1;
1219 insn->displacement = d8;
1220 break;
1221 case EA_DISP_16:
1222 if (consumeInt16(insn, &d16))
1223 return -1;
1224 insn->displacement = d16;
1225 break;
1226 case EA_DISP_32:
1227 if (consumeInt32(insn, &d32))
1228 return -1;
1229 insn->displacement = d32;
1230 break;
1231 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001232
Sean Callanan04cc3072009-12-19 02:59:52 +00001233 insn->consumedDisplacement = TRUE;
1234 return 0;
1235}
1236
1237/*
1238 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
1239 * displacement) for an instruction and interprets it.
1240 *
1241 * @param insn - The instruction whose addressing information is to be read.
1242 * @return - 0 if the information was successfully read; nonzero otherwise.
1243 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001244static int readModRM(struct InternalInstruction* insn) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001245 uint8_t mod, rm, reg;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001246
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001247 dbgprintf(insn, "readModRM()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001248
Sean Callanan04cc3072009-12-19 02:59:52 +00001249 if (insn->consumedModRM)
1250 return 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001251
Rafael Espindola9f9a1062011-01-06 16:48:42 +00001252 if (consumeByte(insn, &insn->modRM))
1253 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001254 insn->consumedModRM = TRUE;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001255
Sean Callanan04cc3072009-12-19 02:59:52 +00001256 mod = modFromModRM(insn->modRM);
1257 rm = rmFromModRM(insn->modRM);
1258 reg = regFromModRM(insn->modRM);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001259
Sean Callanan04cc3072009-12-19 02:59:52 +00001260 /*
1261 * This goes by insn->registerSize to pick the correct register, which messes
1262 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
1263 * fixupReg().
1264 */
1265 switch (insn->registerSize) {
1266 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001267 insn->regBase = MODRM_REG_AX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001268 insn->eaRegBase = EA_REG_AX;
1269 break;
1270 case 4:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001271 insn->regBase = MODRM_REG_EAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001272 insn->eaRegBase = EA_REG_EAX;
1273 break;
1274 case 8:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001275 insn->regBase = MODRM_REG_RAX;
Sean Callanan04cc3072009-12-19 02:59:52 +00001276 insn->eaRegBase = EA_REG_RAX;
1277 break;
1278 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001279
Sean Callanan04cc3072009-12-19 02:59:52 +00001280 reg |= rFromREX(insn->rexPrefix) << 3;
1281 rm |= bFromREX(insn->rexPrefix) << 3;
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001282 if (insn->vectorExtensionType == TYPE_EVEX) {
1283 reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1284 rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
1285 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001286
Sean Callanan04cc3072009-12-19 02:59:52 +00001287 insn->reg = (Reg)(insn->regBase + reg);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001288
Sean Callanan04cc3072009-12-19 02:59:52 +00001289 switch (insn->addressSize) {
1290 case 2:
1291 insn->eaBaseBase = EA_BASE_BX_SI;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001292
Sean Callanan04cc3072009-12-19 02:59:52 +00001293 switch (mod) {
1294 case 0x0:
1295 if (rm == 0x6) {
1296 insn->eaBase = EA_BASE_NONE;
1297 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001298 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001299 return -1;
1300 } else {
1301 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1302 insn->eaDisplacement = EA_DISP_NONE;
1303 }
1304 break;
1305 case 0x1:
1306 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1307 insn->eaDisplacement = EA_DISP_8;
Craig Topper399e39e2014-01-25 22:48:43 +00001308 insn->displacementSize = 1;
Sean Callanan010b3732010-04-02 21:23:51 +00001309 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001310 return -1;
1311 break;
1312 case 0x2:
1313 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1314 insn->eaDisplacement = EA_DISP_16;
Sean Callanan010b3732010-04-02 21:23:51 +00001315 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001316 return -1;
1317 break;
1318 case 0x3:
1319 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001320 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001321 return -1;
1322 break;
1323 }
1324 break;
1325 case 4:
1326 case 8:
1327 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001328
Sean Callanan04cc3072009-12-19 02:59:52 +00001329 switch (mod) {
1330 case 0x0:
1331 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
1332 switch (rm) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001333 case 0x14:
Sean Callanan04cc3072009-12-19 02:59:52 +00001334 case 0x4:
1335 case 0xc: /* in case REXW.b is set */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001336 insn->eaBase = (insn->addressSize == 4 ?
Sean Callanan04cc3072009-12-19 02:59:52 +00001337 EA_BASE_sib : EA_BASE_sib64);
Craig Topper38afbfd2014-03-20 05:56:00 +00001338 if (readSIB(insn) || readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001339 return -1;
1340 break;
1341 case 0x5:
1342 insn->eaBase = EA_BASE_NONE;
1343 insn->eaDisplacement = EA_DISP_32;
Sean Callanan010b3732010-04-02 21:23:51 +00001344 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001345 return -1;
1346 break;
1347 default:
1348 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
1349 break;
1350 }
1351 break;
1352 case 0x1:
Craig Topper399e39e2014-01-25 22:48:43 +00001353 insn->displacementSize = 1;
Alp Toker771f7652014-01-26 18:44:34 +00001354 /* FALLTHROUGH */
Sean Callanan04cc3072009-12-19 02:59:52 +00001355 case 0x2:
1356 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
1357 switch (rm) {
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001358 case 0x14:
Sean Callanan04cc3072009-12-19 02:59:52 +00001359 case 0x4:
1360 case 0xc: /* in case REXW.b is set */
1361 insn->eaBase = EA_BASE_sib;
Craig Topper38afbfd2014-03-20 05:56:00 +00001362 if (readSIB(insn) || readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001363 return -1;
1364 break;
1365 default:
1366 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanan010b3732010-04-02 21:23:51 +00001367 if (readDisplacement(insn))
Sean Callanan04cc3072009-12-19 02:59:52 +00001368 return -1;
1369 break;
1370 }
1371 break;
1372 case 0x3:
1373 insn->eaDisplacement = EA_DISP_NONE;
1374 insn->eaBase = (EABase)(insn->eaRegBase + rm);
1375 break;
1376 }
1377 break;
1378 } /* switch (insn->addressSize) */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001379
Sean Callanan04cc3072009-12-19 02:59:52 +00001380 return 0;
1381}
1382
1383#define GENERIC_FIXUP_FUNC(name, base, prefix) \
1384 static uint8_t name(struct InternalInstruction *insn, \
1385 OperandType type, \
1386 uint8_t index, \
1387 uint8_t *valid) { \
1388 *valid = 1; \
1389 switch (type) { \
1390 default: \
Sean Callanan010b3732010-04-02 21:23:51 +00001391 debug("Unhandled register type"); \
1392 *valid = 0; \
1393 return 0; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001394 case TYPE_Rv: \
1395 return base + index; \
1396 case TYPE_R8: \
Sean Callanan010b3732010-04-02 21:23:51 +00001397 if (insn->rexPrefix && \
Sean Callanan04cc3072009-12-19 02:59:52 +00001398 index >= 4 && index <= 7) { \
1399 return prefix##_SPL + (index - 4); \
1400 } else { \
1401 return prefix##_AL + index; \
1402 } \
1403 case TYPE_R16: \
1404 return prefix##_AX + index; \
1405 case TYPE_R32: \
1406 return prefix##_EAX + index; \
1407 case TYPE_R64: \
1408 return prefix##_RAX + index; \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +00001409 case TYPE_XMM512: \
1410 return prefix##_ZMM0 + index; \
Sean Callananc3fd5232011-03-15 01:23:15 +00001411 case TYPE_XMM256: \
1412 return prefix##_YMM0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001413 case TYPE_XMM128: \
1414 case TYPE_XMM64: \
1415 case TYPE_XMM32: \
1416 case TYPE_XMM: \
1417 return prefix##_XMM0 + index; \
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001418 case TYPE_VK1: \
1419 case TYPE_VK8: \
1420 case TYPE_VK16: \
1421 return prefix##_K0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001422 case TYPE_MM64: \
1423 case TYPE_MM32: \
1424 case TYPE_MM: \
Sean Callanan010b3732010-04-02 21:23:51 +00001425 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001426 *valid = 0; \
1427 return prefix##_MM0 + index; \
1428 case TYPE_SEGMENTREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001429 if (index > 5) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001430 *valid = 0; \
1431 return prefix##_ES + index; \
1432 case TYPE_DEBUGREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001433 if (index > 7) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001434 *valid = 0; \
1435 return prefix##_DR0 + index; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001436 case TYPE_CONTROLREG: \
Sean Callanan010b3732010-04-02 21:23:51 +00001437 if (index > 8) \
Sean Callanan04cc3072009-12-19 02:59:52 +00001438 *valid = 0; \
Sean Callanane7e1cf92010-05-06 20:59:00 +00001439 return prefix##_CR0 + index; \
Sean Callanan04cc3072009-12-19 02:59:52 +00001440 } \
1441 }
1442
1443/*
1444 * fixup*Value - Consults an operand type to determine the meaning of the
1445 * reg or R/M field. If the operand is an XMM operand, for example, an
1446 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1447 * misinterpret it as.
1448 *
1449 * @param insn - The instruction containing the operand.
1450 * @param type - The operand type.
1451 * @param index - The existing value of the field as reported by readModRM().
1452 * @param valid - The address of a uint8_t. The target is set to 1 if the
1453 * field is valid for the register class; 0 if not.
Sean Callanan010b3732010-04-02 21:23:51 +00001454 * @return - The proper value.
Sean Callanan04cc3072009-12-19 02:59:52 +00001455 */
Sean Callanan2f9443f2009-12-22 02:07:42 +00001456GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan04cc3072009-12-19 02:59:52 +00001457GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1458
1459/*
1460 * fixupReg - Consults an operand specifier to determine which of the
1461 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1462 *
1463 * @param insn - See fixup*Value().
1464 * @param op - The operand specifier.
1465 * @return - 0 if fixup was successful; -1 if the register returned was
1466 * invalid for its class.
1467 */
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001468static int fixupReg(struct InternalInstruction *insn,
Benjamin Kramerde0a4fb2010-10-23 09:10:44 +00001469 const struct OperandSpecifier *op) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001470 uint8_t valid;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001471
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001472 dbgprintf(insn, "fixupReg()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001473
Sean Callanan04cc3072009-12-19 02:59:52 +00001474 switch ((OperandEncoding)op->encoding) {
1475 default:
Sean Callanan010b3732010-04-02 21:23:51 +00001476 debug("Expected a REG or R/M encoding in fixupReg");
1477 return -1;
Sean Callananc3fd5232011-03-15 01:23:15 +00001478 case ENCODING_VVVV:
1479 insn->vvvv = (Reg)fixupRegValue(insn,
1480 (OperandType)op->type,
1481 insn->vvvv,
1482 &valid);
1483 if (!valid)
1484 return -1;
1485 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001486 case ENCODING_REG:
1487 insn->reg = (Reg)fixupRegValue(insn,
1488 (OperandType)op->type,
1489 insn->reg - insn->regBase,
1490 &valid);
1491 if (!valid)
1492 return -1;
1493 break;
1494 case ENCODING_RM:
1495 if (insn->eaBase >= insn->eaRegBase) {
1496 insn->eaBase = (EABase)fixupRMValue(insn,
1497 (OperandType)op->type,
1498 insn->eaBase - insn->eaRegBase,
1499 &valid);
1500 if (!valid)
1501 return -1;
1502 }
1503 break;
1504 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001505
Sean Callanan04cc3072009-12-19 02:59:52 +00001506 return 0;
1507}
1508
1509/*
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001510 * readOpcodeRegister - Reads an operand from the opcode field of an
Sean Callanan04cc3072009-12-19 02:59:52 +00001511 * instruction and interprets it appropriately given the operand width.
1512 * Handles AddRegFrm instructions.
1513 *
Craig Topper91551182014-01-01 15:29:32 +00001514 * @param insn - the instruction whose opcode field is to be read.
Sean Callanan04cc3072009-12-19 02:59:52 +00001515 * @param size - The width (in bytes) of the register being specified.
1516 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1517 * RAX.
Sean Callanan010b3732010-04-02 21:23:51 +00001518 * @return - 0 on success; nonzero otherwise.
Sean Callanan04cc3072009-12-19 02:59:52 +00001519 */
Sean Callanan010b3732010-04-02 21:23:51 +00001520static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001521 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan04cc3072009-12-19 02:59:52 +00001522
Sean Callanan04cc3072009-12-19 02:59:52 +00001523 if (size == 0)
1524 size = insn->registerSize;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001525
Sean Callanan04cc3072009-12-19 02:59:52 +00001526 switch (size) {
1527 case 1:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001528 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001529 | (insn->opcode & 7)));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001530 if (insn->rexPrefix &&
Sean Callanan010b3732010-04-02 21:23:51 +00001531 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1532 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan2f9443f2009-12-22 02:07:42 +00001533 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1534 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan04cc3072009-12-19 02:59:52 +00001535 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001536
Sean Callanan04cc3072009-12-19 02:59:52 +00001537 break;
1538 case 2:
Sean Callanan2f9443f2009-12-22 02:07:42 +00001539 insn->opcodeRegister = (Reg)(MODRM_REG_AX
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001540 + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001541 | (insn->opcode & 7)));
Sean Callanan04cc3072009-12-19 02:59:52 +00001542 break;
1543 case 4:
Sean Callanan010b3732010-04-02 21:23:51 +00001544 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001545 + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001546 | (insn->opcode & 7)));
Sean Callanan04cc3072009-12-19 02:59:52 +00001547 break;
1548 case 8:
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001549 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1550 + ((bFromREX(insn->rexPrefix) << 3)
Craig Topper91551182014-01-01 15:29:32 +00001551 | (insn->opcode & 7)));
Sean Callanan04cc3072009-12-19 02:59:52 +00001552 break;
1553 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001554
Sean Callanan010b3732010-04-02 21:23:51 +00001555 return 0;
Sean Callanan04cc3072009-12-19 02:59:52 +00001556}
1557
1558/*
1559 * readImmediate - Consumes an immediate operand from an instruction, given the
1560 * desired operand size.
1561 *
1562 * @param insn - The instruction whose operand is to be read.
1563 * @param size - The width (in bytes) of the operand.
1564 * @return - 0 if the immediate was successfully consumed; nonzero
1565 * otherwise.
1566 */
1567static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1568 uint8_t imm8;
1569 uint16_t imm16;
1570 uint32_t imm32;
1571 uint64_t imm64;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001572
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001573 dbgprintf(insn, "readImmediate()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001574
Sean Callanan010b3732010-04-02 21:23:51 +00001575 if (insn->numImmediatesConsumed == 2) {
1576 debug("Already consumed two immediates");
1577 return -1;
1578 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001579
Sean Callanan04cc3072009-12-19 02:59:52 +00001580 if (size == 0)
1581 size = insn->immediateSize;
1582 else
1583 insn->immediateSize = size;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +00001584 insn->immediateOffset = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001585
Sean Callanan04cc3072009-12-19 02:59:52 +00001586 switch (size) {
1587 case 1:
1588 if (consumeByte(insn, &imm8))
1589 return -1;
1590 insn->immediates[insn->numImmediatesConsumed] = imm8;
1591 break;
1592 case 2:
1593 if (consumeUInt16(insn, &imm16))
1594 return -1;
1595 insn->immediates[insn->numImmediatesConsumed] = imm16;
1596 break;
1597 case 4:
1598 if (consumeUInt32(insn, &imm32))
1599 return -1;
1600 insn->immediates[insn->numImmediatesConsumed] = imm32;
1601 break;
1602 case 8:
1603 if (consumeUInt64(insn, &imm64))
1604 return -1;
1605 insn->immediates[insn->numImmediatesConsumed] = imm64;
1606 break;
1607 }
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001608
Sean Callanan04cc3072009-12-19 02:59:52 +00001609 insn->numImmediatesConsumed++;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001610
Sean Callanan04cc3072009-12-19 02:59:52 +00001611 return 0;
1612}
1613
1614/*
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001615 * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix.
Sean Callananc3fd5232011-03-15 01:23:15 +00001616 *
1617 * @param insn - The instruction whose operand is to be read.
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001618 * @return - 0 if the vvvv was successfully consumed; nonzero
Sean Callananc3fd5232011-03-15 01:23:15 +00001619 * otherwise.
1620 */
1621static int readVVVV(struct InternalInstruction* insn) {
1622 dbgprintf(insn, "readVVVV()");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001623
Richard Smith89ee75d2014-04-20 21:07:34 +00001624 int vvvv;
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001625 if (insn->vectorExtensionType == TYPE_EVEX)
Richard Smith89ee75d2014-04-20 21:07:34 +00001626 vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001627 else if (insn->vectorExtensionType == TYPE_VEX_3B)
Richard Smith89ee75d2014-04-20 21:07:34 +00001628 vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001629 else if (insn->vectorExtensionType == TYPE_VEX_2B)
Richard Smith89ee75d2014-04-20 21:07:34 +00001630 vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001631 else if (insn->vectorExtensionType == TYPE_XOP)
Richard Smith89ee75d2014-04-20 21:07:34 +00001632 vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
Sean Callananc3fd5232011-03-15 01:23:15 +00001633 else
1634 return -1;
1635
Craig Topper0d0be472011-10-03 08:14:29 +00001636 if (insn->mode != MODE_64BIT)
Richard Smith89ee75d2014-04-20 21:07:34 +00001637 vvvv &= 0x7;
Craig Topper0d0be472011-10-03 08:14:29 +00001638
Richard Smith89ee75d2014-04-20 21:07:34 +00001639 insn->vvvv = static_cast<Reg>(vvvv);
Sean Callananc3fd5232011-03-15 01:23:15 +00001640 return 0;
1641}
1642
1643/*
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001644 * readMaskRegister - Reads an mask register from the opcode field of an
1645 * instruction.
1646 *
1647 * @param insn - The instruction whose opcode field is to be read.
1648 * @return - 0 on success; nonzero otherwise.
1649 */
1650static int readMaskRegister(struct InternalInstruction* insn) {
1651 dbgprintf(insn, "readMaskRegister()");
1652
1653 if (insn->vectorExtensionType != TYPE_EVEX)
1654 return -1;
1655
Richard Smith89ee75d2014-04-20 21:07:34 +00001656 insn->writemask =
1657 static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001658 return 0;
1659}
1660
1661/*
Sean Callanan04cc3072009-12-19 02:59:52 +00001662 * readOperands - Consults the specifier for an instruction and consumes all
1663 * operands for that instruction, interpreting them as it goes.
1664 *
1665 * @param insn - The instruction whose operands are to be read and interpreted.
1666 * @return - 0 if all operands could be read; nonzero otherwise.
1667 */
1668static int readOperands(struct InternalInstruction* insn) {
1669 int index;
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001670 int hasVVVV, needVVVV;
Craig Topper2ba766a2011-12-30 06:23:39 +00001671 int sawRegImm = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001672
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001673 dbgprintf(insn, "readOperands()");
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001674
1675 /* If non-zero vvvv specified, need to make sure one of the operands
1676 uses it. */
1677 hasVVVV = !readVVVV(insn);
1678 needVVVV = hasVVVV && (insn->vvvv != 0);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001679
Sean Callanan04cc3072009-12-19 02:59:52 +00001680 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
Craig Topperb8aec082012-08-01 07:39:18 +00001681 switch (x86OperandSets[insn->spec->operands][index].encoding) {
Sean Callanan04cc3072009-12-19 02:59:52 +00001682 case ENCODING_NONE:
David Woodhouse2ef8d9c2014-01-22 15:08:08 +00001683 case ENCODING_SI:
David Woodhouseb33c2ef2014-01-22 15:08:21 +00001684 case ENCODING_DI:
Sean Callanan04cc3072009-12-19 02:59:52 +00001685 break;
1686 case ENCODING_REG:
1687 case ENCODING_RM:
1688 if (readModRM(insn))
1689 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001690 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
Sean Callanan04cc3072009-12-19 02:59:52 +00001691 return -1;
1692 break;
1693 case ENCODING_CB:
1694 case ENCODING_CW:
1695 case ENCODING_CD:
1696 case ENCODING_CP:
1697 case ENCODING_CO:
1698 case ENCODING_CT:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001699 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan04cc3072009-12-19 02:59:52 +00001700 return -1;
1701 case ENCODING_IB:
Craig Topper2ba766a2011-12-30 06:23:39 +00001702 if (sawRegImm) {
Benjamin Kramer9c48f262012-01-04 22:06:45 +00001703 /* Saw a register immediate so don't read again and instead split the
1704 previous immediate. FIXME: This is a hack. */
Benjamin Kramer47aecca2012-01-01 17:55:36 +00001705 insn->immediates[insn->numImmediatesConsumed] =
1706 insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1707 ++insn->numImmediatesConsumed;
Craig Topper2ba766a2011-12-30 06:23:39 +00001708 break;
1709 }
Sean Callanan04cc3072009-12-19 02:59:52 +00001710 if (readImmediate(insn, 1))
1711 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001712 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
Sean Callanan1efe6612010-04-07 21:42:19 +00001713 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1714 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001715 if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
Craig Topper7629d632012-04-03 05:20:24 +00001716 insn->immediates[insn->numImmediatesConsumed - 1] > 31)
1717 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001718 if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
1719 x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
Craig Topper2ba766a2011-12-30 06:23:39 +00001720 sawRegImm = 1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001721 break;
1722 case ENCODING_IW:
1723 if (readImmediate(insn, 2))
1724 return -1;
1725 break;
1726 case ENCODING_ID:
1727 if (readImmediate(insn, 4))
1728 return -1;
1729 break;
1730 case ENCODING_IO:
1731 if (readImmediate(insn, 8))
1732 return -1;
1733 break;
1734 case ENCODING_Iv:
Sean Callanan010b3732010-04-02 21:23:51 +00001735 if (readImmediate(insn, insn->immediateSize))
1736 return -1;
Chris Lattnerd4758fc2010-04-16 21:15:15 +00001737 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001738 case ENCODING_Ia:
Sean Callanan010b3732010-04-02 21:23:51 +00001739 if (readImmediate(insn, insn->addressSize))
1740 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001741 break;
1742 case ENCODING_RB:
Sean Callanan010b3732010-04-02 21:23:51 +00001743 if (readOpcodeRegister(insn, 1))
1744 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001745 break;
1746 case ENCODING_RW:
Sean Callanan010b3732010-04-02 21:23:51 +00001747 if (readOpcodeRegister(insn, 2))
1748 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001749 break;
1750 case ENCODING_RD:
Sean Callanan010b3732010-04-02 21:23:51 +00001751 if (readOpcodeRegister(insn, 4))
1752 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001753 break;
1754 case ENCODING_RO:
Sean Callanan010b3732010-04-02 21:23:51 +00001755 if (readOpcodeRegister(insn, 8))
1756 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001757 break;
1758 case ENCODING_Rv:
Sean Callanan010b3732010-04-02 21:23:51 +00001759 if (readOpcodeRegister(insn, 0))
1760 return -1;
Sean Callanan04cc3072009-12-19 02:59:52 +00001761 break;
Craig Topper623b0d62014-01-01 14:22:37 +00001762 case ENCODING_FP:
Sean Callananc3fd5232011-03-15 01:23:15 +00001763 break;
1764 case ENCODING_VVVV:
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001765 needVVVV = 0; /* Mark that we have found a VVVV operand. */
1766 if (!hasVVVV)
Sean Callananc3fd5232011-03-15 01:23:15 +00001767 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001768 if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
Sean Callananc3fd5232011-03-15 01:23:15 +00001769 return -1;
1770 break;
Elena Demikhovsky371e3632013-12-25 11:40:51 +00001771 case ENCODING_WRITEMASK:
1772 if (readMaskRegister(insn))
1773 return -1;
1774 break;
Sean Callanan04cc3072009-12-19 02:59:52 +00001775 case ENCODING_DUP:
1776 break;
1777 default:
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001778 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan04cc3072009-12-19 02:59:52 +00001779 return -1;
1780 }
1781 }
Craig Topper8dd7bbc2011-09-13 07:37:44 +00001782
1783 /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */
1784 if (needVVVV) return -1;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001785
Sean Callanan04cc3072009-12-19 02:59:52 +00001786 return 0;
1787}
1788
1789/*
1790 * decodeInstruction - Reads and interprets a full instruction provided by the
1791 * user.
1792 *
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001793 * @param insn - A pointer to the instruction to be populated. Must be
Sean Callanan04cc3072009-12-19 02:59:52 +00001794 * pre-allocated.
1795 * @param reader - The function to be used to read the instruction's bytes.
1796 * @param readerArg - A generic argument to be passed to the reader to store
1797 * any internal state.
1798 * @param logger - If non-NULL, the function to be used to write log messages
1799 * and warnings.
1800 * @param loggerArg - A generic argument to be passed to the logger to store
1801 * any internal state.
1802 * @param startLoc - The address (in the reader's address space) of the first
1803 * byte in the instruction.
1804 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1805 * decode the instruction in.
1806 * @return - 0 if the instruction's memory could be read; nonzero if
1807 * not.
1808 */
Richard Smith89ee75d2014-04-20 21:07:34 +00001809int llvm::X86Disassembler::decodeInstruction(
1810 struct InternalInstruction *insn, byteReader_t reader,
1811 const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
1812 uint64_t startLoc, DisassemblerMode mode) {
Daniel Dunbarc745a622009-12-19 03:31:50 +00001813 memset(insn, 0, sizeof(struct InternalInstruction));
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001814
Sean Callanan04cc3072009-12-19 02:59:52 +00001815 insn->reader = reader;
1816 insn->readerArg = readerArg;
1817 insn->dlog = logger;
1818 insn->dlogArg = loggerArg;
1819 insn->startLocation = startLoc;
1820 insn->readerCursor = startLoc;
1821 insn->mode = mode;
1822 insn->numImmediatesConsumed = 0;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001823
Sean Callanan04cc3072009-12-19 02:59:52 +00001824 if (readPrefixes(insn) ||
1825 readOpcode(insn) ||
Benjamin Kramer478e8de2012-02-11 14:50:54 +00001826 getID(insn, miiArg) ||
Sean Callanan04cc3072009-12-19 02:59:52 +00001827 insn->instructionID == 0 ||
1828 readOperands(insn))
1829 return -1;
Craig Topperb8aec082012-08-01 07:39:18 +00001830
1831 insn->operands = &x86OperandSets[insn->spec->operands][0];
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001832
Sean Callanan04cc3072009-12-19 02:59:52 +00001833 insn->length = insn->readerCursor - insn->startLocation;
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001834
Benjamin Kramer4f672272010-03-18 12:18:36 +00001835 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1836 startLoc, insn->readerCursor, insn->length);
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001837
Sean Callanan04cc3072009-12-19 02:59:52 +00001838 if (insn->length > 15)
Nuno Lopes3ed6d602009-12-19 12:07:00 +00001839 dbgprintf(insn, "Instruction exceeds 15-byte limit");
NAKAMURA Takumidde7fa82013-03-25 20:55:43 +00001840
Sean Callanan04cc3072009-12-19 02:59:52 +00001841 return 0;
1842}