blob: db694bc2f3c5814b3d427fd996acc7b40d90f7b0 [file] [log] [blame]
Sean Callanan8ed9f512009-12-19 02:59:52 +00001/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
Sean Callanan8ed9f512009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbar71f842d2009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan8ed9f512009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
23#include "X86GenDisassemblerTables.inc"
24
25#define TRUE 1
26#define FALSE 0
27
Sean Callanana144c3f2010-04-02 21:23:51 +000028typedef int8_t bool;
29
Sean Callanan8ed9f512009-12-19 02:59:52 +000030#ifdef __GNUC__
31#define NORETURN __attribute__((noreturn))
32#else
33#define NORETURN
34#endif
35
Sean Callanana144c3f2010-04-02 21:23:51 +000036#ifndef NDEBUG
37#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
38#else
39#define debug(s) do { } while (0)
40#endif
41
Sean Callanan8ed9f512009-12-19 02:59:52 +000042
43/*
44 * contextForAttrs - Client for the instruction context table. Takes a set of
45 * attributes and returns the appropriate decode context.
46 *
47 * @param attrMask - Attributes, from the enumeration attributeBits.
48 * @return - The InstructionContext to use when looking up an
49 * an instruction with these attributes.
50 */
Sean Callanan542eabc2009-12-22 22:51:40 +000051static InstructionContext contextForAttrs(uint8_t attrMask) {
Sean Callanan8ed9f512009-12-19 02:59:52 +000052 return CONTEXTS_SYM[attrMask];
53}
54
55/*
56 * modRMRequired - Reads the appropriate instruction table to determine whether
57 * the ModR/M byte is required to decode a particular instruction.
58 *
59 * @param type - The opcode type (i.e., how many bytes it has).
60 * @param insnContext - The context for the instruction, as returned by
61 * contextForAttrs.
62 * @param opcode - The last byte of the instruction's opcode, not counting
63 * ModR/M extensions and escapes.
64 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
65 */
Sean Callanan542eabc2009-12-22 22:51:40 +000066static int modRMRequired(OpcodeType type,
Sean Callanan8ed9f512009-12-19 02:59:52 +000067 InstructionContext insnContext,
68 uint8_t opcode) {
Daniel Dunbarbaf2e352009-12-22 01:41:37 +000069 const struct ContextDecision* decision = 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +000070
71 switch (type) {
72 case ONEBYTE:
73 decision = &ONEBYTE_SYM;
74 break;
75 case TWOBYTE:
76 decision = &TWOBYTE_SYM;
77 break;
78 case THREEBYTE_38:
79 decision = &THREEBYTE38_SYM;
80 break;
81 case THREEBYTE_3A:
82 decision = &THREEBYTE3A_SYM;
83 break;
84 }
85
86 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
87 modrm_type != MODRM_ONEENTRY;
88
Sean Callanan8ed9f512009-12-19 02:59:52 +000089 return 0;
90}
91
92/*
93 * decode - Reads the appropriate instruction table to obtain the unique ID of
94 * an instruction.
95 *
96 * @param type - See modRMRequired().
97 * @param insnContext - See modRMRequired().
98 * @param opcode - See modRMRequired().
99 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanana144c3f2010-04-02 21:23:51 +0000100 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan8ed9f512009-12-19 02:59:52 +0000101 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000102static InstrUID decode(OpcodeType type,
Sean Callanana144c3f2010-04-02 21:23:51 +0000103 InstructionContext insnContext,
104 uint8_t opcode,
105 uint8_t modRM) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000106 struct ModRMDecision* dec;
107
108 switch (type) {
109 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000110 debug("Unknown opcode type");
111 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000112 case ONEBYTE:
113 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
114 break;
115 case TWOBYTE:
116 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
117 break;
118 case THREEBYTE_38:
119 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
120 break;
121 case THREEBYTE_3A:
122 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
123 break;
124 }
125
126 switch (dec->modrm_type) {
127 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000128 debug("Corrupt table! Unknown modrm_type");
129 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000130 case MODRM_ONEENTRY:
131 return dec->instructionIDs[0];
132 case MODRM_SPLITRM:
133 if (modFromModRM(modRM) == 0x3)
134 return dec->instructionIDs[1];
135 else
136 return dec->instructionIDs[0];
137 case MODRM_FULL:
138 return dec->instructionIDs[modRM];
139 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000140}
141
142/*
143 * specifierForUID - Given a UID, returns the name and operand specification for
144 * that instruction.
145 *
146 * @param uid - The unique ID for the instruction. This should be returned by
147 * decode(); specifierForUID will not check bounds.
148 * @return - A pointer to the specification for that instruction.
149 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000150static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000151 return &INSTRUCTIONS_SYM[uid];
152}
153
154/*
155 * consumeByte - Uses the reader function provided by the user to consume one
156 * byte from the instruction's memory and advance the cursor.
157 *
158 * @param insn - The instruction with the reader function to use. The cursor
159 * for this instruction is advanced.
160 * @param byte - A pointer to a pre-allocated memory buffer to be populated
161 * with the data read.
162 * @return - 0 if the read was successful; nonzero otherwise.
163 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000164static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000165 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
166
167 if (!ret)
168 ++(insn->readerCursor);
169
170 return ret;
171}
172
173/*
174 * lookAtByte - Like consumeByte, but does not advance the cursor.
175 *
176 * @param insn - See consumeByte().
177 * @param byte - See consumeByte().
178 * @return - See consumeByte().
179 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000180static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000181 return insn->reader(insn->readerArg, byte, insn->readerCursor);
182}
183
Sean Callanan542eabc2009-12-22 22:51:40 +0000184static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000185 insn->readerCursor--;
186}
187
Sean Callanan542eabc2009-12-22 22:51:40 +0000188#define CONSUME_FUNC(name, type) \
189 static int name(struct InternalInstruction* insn, type* ptr) { \
190 type combined = 0; \
191 unsigned offset; \
192 for (offset = 0; offset < sizeof(type); ++offset) { \
193 uint8_t byte; \
194 int ret = insn->reader(insn->readerArg, \
195 &byte, \
196 insn->readerCursor + offset); \
197 if (ret) \
198 return ret; \
199 combined = combined | ((type)byte << ((type)offset * 8)); \
200 } \
201 *ptr = combined; \
202 insn->readerCursor += sizeof(type); \
203 return 0; \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000204 }
205
206/*
207 * consume* - Use the reader function provided by the user to consume data
208 * values of various sizes from the instruction's memory and advance the
209 * cursor appropriately. These readers perform endian conversion.
210 *
211 * @param insn - See consumeByte().
212 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
213 * be populated with the data read.
214 * @return - See consumeByte().
215 */
216CONSUME_FUNC(consumeInt8, int8_t)
217CONSUME_FUNC(consumeInt16, int16_t)
218CONSUME_FUNC(consumeInt32, int32_t)
219CONSUME_FUNC(consumeUInt16, uint16_t)
220CONSUME_FUNC(consumeUInt32, uint32_t)
221CONSUME_FUNC(consumeUInt64, uint64_t)
222
223/*
Nuno Lopes392bbd92009-12-19 12:07:00 +0000224 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan8ed9f512009-12-19 02:59:52 +0000225 * message, typically without a carriage-return.
226 *
227 * @param insn - The instruction containing the logging function.
228 * @param format - See printf().
229 * @param ... - See printf().
230 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000231static void dbgprintf(struct InternalInstruction* insn,
232 const char* format,
233 ...) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000234 char buffer[256];
235 va_list ap;
236
237 if (!insn->dlog)
238 return;
239
240 va_start(ap, format);
241 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
242 va_end(ap);
243
244 insn->dlog(insn->dlogArg, buffer);
245
246 return;
247}
248
249/*
250 * setPrefixPresent - Marks that a particular prefix is present at a particular
251 * location.
252 *
253 * @param insn - The instruction to be marked as having the prefix.
254 * @param prefix - The prefix that is present.
255 * @param location - The location where the prefix is located (in the address
256 * space of the instruction's reader).
257 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000258static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan8ed9f512009-12-19 02:59:52 +0000259 uint8_t prefix,
260 uint64_t location)
261{
262 insn->prefixPresent[prefix] = 1;
263 insn->prefixLocations[prefix] = location;
264}
265
266/*
267 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
268 * present at a given location.
269 *
270 * @param insn - The instruction to be queried.
271 * @param prefix - The prefix.
272 * @param location - The location to query.
273 * @return - Whether the prefix is at that location.
274 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000275static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
276 uint8_t prefix,
277 uint64_t location)
Sean Callanan8ed9f512009-12-19 02:59:52 +0000278{
279 if (insn->prefixPresent[prefix] == 1 &&
280 insn->prefixLocations[prefix] == location)
281 return TRUE;
282 else
283 return FALSE;
284}
285
286/*
287 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
288 * instruction as having them. Also sets the instruction's default operand,
289 * address, and other relevant data sizes to report operands correctly.
290 *
291 * @param insn - The instruction whose prefixes are to be read.
292 * @return - 0 if the instruction could be read until the end of the prefix
293 * bytes, and no prefixes conflicted; nonzero otherwise.
294 */
295static int readPrefixes(struct InternalInstruction* insn) {
296 BOOL isPrefix = TRUE;
297 BOOL prefixGroups[4] = { FALSE };
298 uint64_t prefixLocation;
299 uint8_t byte;
300
301 BOOL hasAdSize = FALSE;
302 BOOL hasOpSize = FALSE;
303
Nuno Lopes392bbd92009-12-19 12:07:00 +0000304 dbgprintf(insn, "readPrefixes()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000305
306 while (isPrefix) {
307 prefixLocation = insn->readerCursor;
308
309 if (consumeByte(insn, &byte))
310 return -1;
311
312 switch (byte) {
313 case 0xf0: /* LOCK */
314 case 0xf2: /* REPNE/REPNZ */
315 case 0xf3: /* REP or REPE/REPZ */
316 if (prefixGroups[0])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000317 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000318 prefixGroups[0] = TRUE;
319 setPrefixPresent(insn, byte, prefixLocation);
320 break;
321 case 0x2e: /* CS segment override -OR- Branch not taken */
322 case 0x36: /* SS segment override -OR- Branch taken */
323 case 0x3e: /* DS segment override */
324 case 0x26: /* ES segment override */
325 case 0x64: /* FS segment override */
326 case 0x65: /* GS segment override */
327 switch (byte) {
328 case 0x2e:
329 insn->segmentOverride = SEG_OVERRIDE_CS;
330 break;
331 case 0x36:
332 insn->segmentOverride = SEG_OVERRIDE_SS;
333 break;
334 case 0x3e:
335 insn->segmentOverride = SEG_OVERRIDE_DS;
336 break;
337 case 0x26:
338 insn->segmentOverride = SEG_OVERRIDE_ES;
339 break;
340 case 0x64:
341 insn->segmentOverride = SEG_OVERRIDE_FS;
342 break;
343 case 0x65:
344 insn->segmentOverride = SEG_OVERRIDE_GS;
345 break;
346 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000347 debug("Unhandled override");
348 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000349 }
350 if (prefixGroups[1])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000351 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000352 prefixGroups[1] = TRUE;
353 setPrefixPresent(insn, byte, prefixLocation);
354 break;
355 case 0x66: /* Operand-size override */
356 if (prefixGroups[2])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000357 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000358 prefixGroups[2] = TRUE;
359 hasOpSize = TRUE;
360 setPrefixPresent(insn, byte, prefixLocation);
361 break;
362 case 0x67: /* Address-size override */
363 if (prefixGroups[3])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000364 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000365 prefixGroups[3] = TRUE;
366 hasAdSize = TRUE;
367 setPrefixPresent(insn, byte, prefixLocation);
368 break;
369 default: /* Not a prefix byte */
370 isPrefix = FALSE;
371 break;
372 }
373
374 if (isPrefix)
Nuno Lopes392bbd92009-12-19 12:07:00 +0000375 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000376 }
377
378 if (insn->mode == MODE_64BIT) {
379 if ((byte & 0xf0) == 0x40) {
380 uint8_t opcodeByte;
381
Sean Callanana144c3f2010-04-02 21:23:51 +0000382 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000383 dbgprintf(insn, "Redundant REX prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000384 return -1;
385 }
386
387 insn->rexPrefix = byte;
388 insn->necessaryPrefixLocation = insn->readerCursor - 2;
389
Nuno Lopes392bbd92009-12-19 12:07:00 +0000390 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000391 } else {
392 unconsumeByte(insn);
393 insn->necessaryPrefixLocation = insn->readerCursor - 1;
394 }
395 } else {
396 unconsumeByte(insn);
397 }
398
399 if (insn->mode == MODE_16BIT) {
400 insn->registerSize = (hasOpSize ? 4 : 2);
401 insn->addressSize = (hasAdSize ? 4 : 2);
402 insn->displacementSize = (hasAdSize ? 4 : 2);
403 insn->immediateSize = (hasOpSize ? 4 : 2);
404 } else if (insn->mode == MODE_32BIT) {
405 insn->registerSize = (hasOpSize ? 2 : 4);
406 insn->addressSize = (hasAdSize ? 2 : 4);
407 insn->displacementSize = (hasAdSize ? 2 : 4);
408 insn->immediateSize = (hasAdSize ? 2 : 4);
409 } else if (insn->mode == MODE_64BIT) {
410 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
411 insn->registerSize = 8;
412 insn->addressSize = (hasAdSize ? 4 : 8);
413 insn->displacementSize = 4;
414 insn->immediateSize = 4;
415 } else if (insn->rexPrefix) {
416 insn->registerSize = (hasOpSize ? 2 : 4);
417 insn->addressSize = (hasAdSize ? 4 : 8);
418 insn->displacementSize = (hasOpSize ? 2 : 4);
419 insn->immediateSize = (hasOpSize ? 2 : 4);
420 } else {
421 insn->registerSize = (hasOpSize ? 2 : 4);
422 insn->addressSize = (hasAdSize ? 4 : 8);
423 insn->displacementSize = (hasOpSize ? 2 : 4);
424 insn->immediateSize = (hasOpSize ? 2 : 4);
425 }
426 }
427
428 return 0;
429}
430
431/*
432 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
433 * extended or escape opcodes).
434 *
435 * @param insn - The instruction whose opcode is to be read.
436 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
437 */
438static int readOpcode(struct InternalInstruction* insn) {
439 /* Determine the length of the primary opcode */
440
441 uint8_t current;
442
Nuno Lopes392bbd92009-12-19 12:07:00 +0000443 dbgprintf(insn, "readOpcode()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000444
445 insn->opcodeType = ONEBYTE;
446 if (consumeByte(insn, &current))
447 return -1;
448
449 if (current == 0x0f) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000450 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000451
452 insn->twoByteEscape = current;
453
454 if (consumeByte(insn, &current))
455 return -1;
456
457 if (current == 0x38) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000458 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000459
460 insn->threeByteEscape = current;
461
462 if (consumeByte(insn, &current))
463 return -1;
464
465 insn->opcodeType = THREEBYTE_38;
466 } else if (current == 0x3a) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000467 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000468
469 insn->threeByteEscape = current;
470
471 if (consumeByte(insn, &current))
472 return -1;
473
474 insn->opcodeType = THREEBYTE_3A;
475 } else {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000476 dbgprintf(insn, "Didn't find a three-byte escape prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000477
478 insn->opcodeType = TWOBYTE;
479 }
480 }
481
482 /*
483 * At this point we have consumed the full opcode.
484 * Anything we consume from here on must be unconsumed.
485 */
486
487 insn->opcode = current;
488
489 return 0;
490}
491
492static int readModRM(struct InternalInstruction* insn);
493
494/*
495 * getIDWithAttrMask - Determines the ID of an instruction, consuming
496 * the ModR/M byte as appropriate for extended and escape opcodes,
497 * and using a supplied attribute mask.
498 *
499 * @param instructionID - A pointer whose target is filled in with the ID of the
500 * instruction.
501 * @param insn - The instruction whose ID is to be determined.
502 * @param attrMask - The attribute mask to search.
503 * @return - 0 if the ModR/M could be read when needed or was not
504 * needed; nonzero otherwise.
505 */
506static int getIDWithAttrMask(uint16_t* instructionID,
507 struct InternalInstruction* insn,
508 uint8_t attrMask) {
509 BOOL hasModRMExtension;
510
511 uint8_t instructionClass;
512
513 instructionClass = contextForAttrs(attrMask);
514
515 hasModRMExtension = modRMRequired(insn->opcodeType,
516 instructionClass,
517 insn->opcode);
518
519 if (hasModRMExtension) {
520 readModRM(insn);
521
522 *instructionID = decode(insn->opcodeType,
523 instructionClass,
524 insn->opcode,
525 insn->modRM);
526 } else {
527 *instructionID = decode(insn->opcodeType,
528 instructionClass,
529 insn->opcode,
530 0);
531 }
532
533 return 0;
534}
535
536/*
537 * is16BitEquivalent - Determines whether two instruction names refer to
538 * equivalent instructions but one is 16-bit whereas the other is not.
539 *
540 * @param orig - The instruction that is not 16-bit
541 * @param equiv - The instruction that is 16-bit
542 */
543static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
544 off_t i;
545
Sean Callanana144c3f2010-04-02 21:23:51 +0000546 for (i = 0;; i++) {
547 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000548 return TRUE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000549 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000550 return FALSE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000551 if (orig[i] != equiv[i]) {
552 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000553 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000554 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000555 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000556 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000557 continue;
558 return FALSE;
559 }
560 }
561}
562
563/*
564 * is64BitEquivalent - Determines whether two instruction names refer to
565 * equivalent instructions but one is 64-bit whereas the other is not.
566 *
567 * @param orig - The instruction that is not 64-bit
568 * @param equiv - The instruction that is 64-bit
569 */
570static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
571 off_t i;
572
Sean Callanana144c3f2010-04-02 21:23:51 +0000573 for (i = 0;; i++) {
574 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000575 return TRUE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000576 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000577 return FALSE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000578 if (orig[i] != equiv[i]) {
579 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000580 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000581 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000582 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000583 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000584 continue;
585 return FALSE;
586 }
587 }
588}
589
590
591/*
592 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
593 * appropriate for extended and escape opcodes. Determines the attributes and
594 * context for the instruction before doing so.
595 *
596 * @param insn - The instruction whose ID is to be determined.
597 * @return - 0 if the ModR/M could be read when needed or was not needed;
598 * nonzero otherwise.
599 */
600static int getID(struct InternalInstruction* insn) {
601 uint8_t attrMask;
602 uint16_t instructionID;
603
Nuno Lopes392bbd92009-12-19 12:07:00 +0000604 dbgprintf(insn, "getID()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000605
606 attrMask = ATTR_NONE;
607
608 if (insn->mode == MODE_64BIT)
609 attrMask |= ATTR_64BIT;
610
611 if (insn->rexPrefix & 0x08)
612 attrMask |= ATTR_REXW;
613
614 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
615 attrMask |= ATTR_OPSIZE;
616 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
617 attrMask |= ATTR_XS;
618 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
619 attrMask |= ATTR_XD;
620
Sean Callanana144c3f2010-04-02 21:23:51 +0000621 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000622 return -1;
623
624 /* The following clauses compensate for limitations of the tables. */
625
626 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
627 /*
628 * Although for SSE instructions it is usually necessary to treat REX.W+F2
629 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
630 * an occasional instruction where F2 is incidental and REX.W is the more
631 * significant. If the decoded instruction is 32-bit and adding REX.W
632 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
633 */
634
635 struct InstructionSpecifier* spec;
636 uint16_t instructionIDWithREXw;
637 struct InstructionSpecifier* specWithREXw;
638
639 spec = specifierForUID(instructionID);
640
641 if (getIDWithAttrMask(&instructionIDWithREXw,
642 insn,
643 attrMask & (~ATTR_XD))) {
644 /*
645 * Decoding with REX.w would yield nothing; give up and return original
646 * decode.
647 */
648
649 insn->instructionID = instructionID;
650 insn->spec = spec;
651 return 0;
652 }
653
654 specWithREXw = specifierForUID(instructionIDWithREXw);
655
656 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
657 insn->instructionID = instructionIDWithREXw;
658 insn->spec = specWithREXw;
659 } else {
660 insn->instructionID = instructionID;
661 insn->spec = spec;
662 }
663 return 0;
664 }
665
666 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
667 /*
668 * The instruction tables make no distinction between instructions that
669 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
670 * particular spot (i.e., many MMX operations). In general we're
671 * conservative, but in the specific case where OpSize is present but not
672 * in the right place we check if there's a 16-bit operation.
673 */
674
675 struct InstructionSpecifier* spec;
676 uint16_t instructionIDWithOpsize;
677 struct InstructionSpecifier* specWithOpsize;
678
679 spec = specifierForUID(instructionID);
680
681 if (getIDWithAttrMask(&instructionIDWithOpsize,
682 insn,
683 attrMask | ATTR_OPSIZE)) {
684 /*
685 * ModRM required with OpSize but not present; give up and return version
686 * without OpSize set
687 */
688
689 insn->instructionID = instructionID;
690 insn->spec = spec;
691 return 0;
692 }
693
694 specWithOpsize = specifierForUID(instructionIDWithOpsize);
695
696 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
697 insn->instructionID = instructionIDWithOpsize;
698 insn->spec = specWithOpsize;
699 } else {
700 insn->instructionID = instructionID;
701 insn->spec = spec;
702 }
703 return 0;
704 }
705
706 insn->instructionID = instructionID;
707 insn->spec = specifierForUID(insn->instructionID);
708
709 return 0;
710}
711
712/*
713 * readSIB - Consumes the SIB byte to determine addressing information for an
714 * instruction.
715 *
716 * @param insn - The instruction whose SIB byte is to be read.
717 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
718 */
719static int readSIB(struct InternalInstruction* insn) {
Daniel Dunbarbaf2e352009-12-22 01:41:37 +0000720 SIBIndex sibIndexBase = 0;
721 SIBBase sibBaseBase = 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000722 uint8_t index, base;
723
Nuno Lopes392bbd92009-12-19 12:07:00 +0000724 dbgprintf(insn, "readSIB()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000725
726 if (insn->consumedSIB)
727 return 0;
728
729 insn->consumedSIB = TRUE;
730
731 switch (insn->addressSize) {
732 case 2:
Nuno Lopes392bbd92009-12-19 12:07:00 +0000733 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000734 return -1;
735 break;
736 case 4:
737 sibIndexBase = SIB_INDEX_EAX;
738 sibBaseBase = SIB_BASE_EAX;
739 break;
740 case 8:
741 sibIndexBase = SIB_INDEX_RAX;
742 sibBaseBase = SIB_BASE_RAX;
743 break;
744 }
745
746 if (consumeByte(insn, &insn->sib))
747 return -1;
748
749 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
750
751 switch (index) {
752 case 0x4:
753 insn->sibIndex = SIB_INDEX_NONE;
754 break;
755 default:
756 insn->sibIndex = (EABase)(sibIndexBase + index);
757 if (insn->sibIndex == SIB_INDEX_sib ||
758 insn->sibIndex == SIB_INDEX_sib64)
759 insn->sibIndex = SIB_INDEX_NONE;
760 break;
761 }
762
763 switch (scaleFromSIB(insn->sib)) {
764 case 0:
765 insn->sibScale = 1;
766 break;
767 case 1:
768 insn->sibScale = 2;
769 break;
770 case 2:
771 insn->sibScale = 4;
772 break;
773 case 3:
774 insn->sibScale = 8;
775 break;
776 }
777
778 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
779
780 switch (base) {
781 case 0x5:
782 switch (modFromModRM(insn->modRM)) {
783 case 0x0:
784 insn->eaDisplacement = EA_DISP_32;
785 insn->sibBase = SIB_BASE_NONE;
786 break;
787 case 0x1:
788 insn->eaDisplacement = EA_DISP_8;
789 insn->sibBase = (insn->addressSize == 4 ?
790 SIB_BASE_EBP : SIB_BASE_RBP);
791 break;
792 case 0x2:
793 insn->eaDisplacement = EA_DISP_32;
794 insn->sibBase = (insn->addressSize == 4 ?
795 SIB_BASE_EBP : SIB_BASE_RBP);
796 break;
797 case 0x3:
Sean Callanana144c3f2010-04-02 21:23:51 +0000798 debug("Cannot have Mod = 0b11 and a SIB byte");
799 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000800 }
801 break;
802 default:
803 insn->sibBase = (EABase)(sibBaseBase + base);
804 break;
805 }
806
807 return 0;
808}
809
810/*
811 * readDisplacement - Consumes the displacement of an instruction.
812 *
813 * @param insn - The instruction whose displacement is to be read.
814 * @return - 0 if the displacement byte was successfully read; nonzero
815 * otherwise.
816 */
817static int readDisplacement(struct InternalInstruction* insn) {
818 int8_t d8;
819 int16_t d16;
820 int32_t d32;
821
Nuno Lopes392bbd92009-12-19 12:07:00 +0000822 dbgprintf(insn, "readDisplacement()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000823
824 if (insn->consumedDisplacement)
825 return 0;
826
827 insn->consumedDisplacement = TRUE;
828
829 switch (insn->eaDisplacement) {
830 case EA_DISP_NONE:
831 insn->consumedDisplacement = FALSE;
832 break;
833 case EA_DISP_8:
834 if (consumeInt8(insn, &d8))
835 return -1;
836 insn->displacement = d8;
837 break;
838 case EA_DISP_16:
839 if (consumeInt16(insn, &d16))
840 return -1;
841 insn->displacement = d16;
842 break;
843 case EA_DISP_32:
844 if (consumeInt32(insn, &d32))
845 return -1;
846 insn->displacement = d32;
847 break;
848 }
849
850 insn->consumedDisplacement = TRUE;
851 return 0;
852}
853
854/*
855 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
856 * displacement) for an instruction and interprets it.
857 *
858 * @param insn - The instruction whose addressing information is to be read.
859 * @return - 0 if the information was successfully read; nonzero otherwise.
860 */
861static int readModRM(struct InternalInstruction* insn) {
862 uint8_t mod, rm, reg;
863
Nuno Lopes392bbd92009-12-19 12:07:00 +0000864 dbgprintf(insn, "readModRM()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000865
866 if (insn->consumedModRM)
867 return 0;
868
869 consumeByte(insn, &insn->modRM);
870 insn->consumedModRM = TRUE;
871
872 mod = modFromModRM(insn->modRM);
873 rm = rmFromModRM(insn->modRM);
874 reg = regFromModRM(insn->modRM);
875
876 /*
877 * This goes by insn->registerSize to pick the correct register, which messes
878 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
879 * fixupReg().
880 */
881 switch (insn->registerSize) {
882 case 2:
Sean Callanan06b766d2009-12-22 02:07:42 +0000883 insn->regBase = MODRM_REG_AX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000884 insn->eaRegBase = EA_REG_AX;
885 break;
886 case 4:
Sean Callanan06b766d2009-12-22 02:07:42 +0000887 insn->regBase = MODRM_REG_EAX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000888 insn->eaRegBase = EA_REG_EAX;
889 break;
890 case 8:
Sean Callanan06b766d2009-12-22 02:07:42 +0000891 insn->regBase = MODRM_REG_RAX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000892 insn->eaRegBase = EA_REG_RAX;
893 break;
894 }
895
896 reg |= rFromREX(insn->rexPrefix) << 3;
897 rm |= bFromREX(insn->rexPrefix) << 3;
898
899 insn->reg = (Reg)(insn->regBase + reg);
900
901 switch (insn->addressSize) {
902 case 2:
903 insn->eaBaseBase = EA_BASE_BX_SI;
904
905 switch (mod) {
906 case 0x0:
907 if (rm == 0x6) {
908 insn->eaBase = EA_BASE_NONE;
909 insn->eaDisplacement = EA_DISP_16;
Sean Callanana144c3f2010-04-02 21:23:51 +0000910 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000911 return -1;
912 } else {
913 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
914 insn->eaDisplacement = EA_DISP_NONE;
915 }
916 break;
917 case 0x1:
918 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
919 insn->eaDisplacement = EA_DISP_8;
Sean Callanana144c3f2010-04-02 21:23:51 +0000920 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000921 return -1;
922 break;
923 case 0x2:
924 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
925 insn->eaDisplacement = EA_DISP_16;
Sean Callanana144c3f2010-04-02 21:23:51 +0000926 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000927 return -1;
928 break;
929 case 0x3:
930 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanana144c3f2010-04-02 21:23:51 +0000931 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000932 return -1;
933 break;
934 }
935 break;
936 case 4:
937 case 8:
938 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
939
940 switch (mod) {
941 case 0x0:
942 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
943 switch (rm) {
944 case 0x4:
945 case 0xc: /* in case REXW.b is set */
946 insn->eaBase = (insn->addressSize == 4 ?
947 EA_BASE_sib : EA_BASE_sib64);
948 readSIB(insn);
Sean Callanana144c3f2010-04-02 21:23:51 +0000949 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000950 return -1;
951 break;
952 case 0x5:
953 insn->eaBase = EA_BASE_NONE;
954 insn->eaDisplacement = EA_DISP_32;
Sean Callanana144c3f2010-04-02 21:23:51 +0000955 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000956 return -1;
957 break;
958 default:
959 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
960 break;
961 }
962 break;
963 case 0x1:
964 case 0x2:
965 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
966 switch (rm) {
967 case 0x4:
968 case 0xc: /* in case REXW.b is set */
969 insn->eaBase = EA_BASE_sib;
970 readSIB(insn);
Sean Callanana144c3f2010-04-02 21:23:51 +0000971 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000972 return -1;
973 break;
974 default:
975 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanana144c3f2010-04-02 21:23:51 +0000976 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000977 return -1;
978 break;
979 }
980 break;
981 case 0x3:
982 insn->eaDisplacement = EA_DISP_NONE;
983 insn->eaBase = (EABase)(insn->eaRegBase + rm);
984 break;
985 }
986 break;
987 } /* switch (insn->addressSize) */
988
989 return 0;
990}
991
992#define GENERIC_FIXUP_FUNC(name, base, prefix) \
993 static uint8_t name(struct InternalInstruction *insn, \
994 OperandType type, \
995 uint8_t index, \
996 uint8_t *valid) { \
997 *valid = 1; \
998 switch (type) { \
999 default: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001000 debug("Unhandled register type"); \
1001 *valid = 0; \
1002 return 0; \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001003 case TYPE_Rv: \
1004 return base + index; \
1005 case TYPE_R8: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001006 if (insn->rexPrefix && \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001007 index >= 4 && index <= 7) { \
1008 return prefix##_SPL + (index - 4); \
1009 } else { \
1010 return prefix##_AL + index; \
1011 } \
1012 case TYPE_R16: \
1013 return prefix##_AX + index; \
1014 case TYPE_R32: \
1015 return prefix##_EAX + index; \
1016 case TYPE_R64: \
1017 return prefix##_RAX + index; \
1018 case TYPE_XMM128: \
1019 case TYPE_XMM64: \
1020 case TYPE_XMM32: \
1021 case TYPE_XMM: \
1022 return prefix##_XMM0 + index; \
1023 case TYPE_MM64: \
1024 case TYPE_MM32: \
1025 case TYPE_MM: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001026 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001027 *valid = 0; \
1028 return prefix##_MM0 + index; \
1029 case TYPE_SEGMENTREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001030 if (index > 5) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001031 *valid = 0; \
1032 return prefix##_ES + index; \
1033 case TYPE_DEBUGREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001034 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001035 *valid = 0; \
1036 return prefix##_DR0 + index; \
1037 case TYPE_CR32: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001038 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001039 *valid = 0; \
1040 return prefix##_ECR0 + index; \
1041 case TYPE_CR64: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001042 if (index > 8) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001043 *valid = 0; \
1044 return prefix##_RCR0 + index; \
1045 } \
1046 }
1047
1048/*
1049 * fixup*Value - Consults an operand type to determine the meaning of the
1050 * reg or R/M field. If the operand is an XMM operand, for example, an
1051 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1052 * misinterpret it as.
1053 *
1054 * @param insn - The instruction containing the operand.
1055 * @param type - The operand type.
1056 * @param index - The existing value of the field as reported by readModRM().
1057 * @param valid - The address of a uint8_t. The target is set to 1 if the
1058 * field is valid for the register class; 0 if not.
Sean Callanana144c3f2010-04-02 21:23:51 +00001059 * @return - The proper value.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001060 */
Sean Callanan06b766d2009-12-22 02:07:42 +00001061GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan8ed9f512009-12-19 02:59:52 +00001062GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1063
1064/*
1065 * fixupReg - Consults an operand specifier to determine which of the
1066 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1067 *
1068 * @param insn - See fixup*Value().
1069 * @param op - The operand specifier.
1070 * @return - 0 if fixup was successful; -1 if the register returned was
1071 * invalid for its class.
1072 */
1073static int fixupReg(struct InternalInstruction *insn,
1074 struct OperandSpecifier *op) {
1075 uint8_t valid;
1076
Nuno Lopes392bbd92009-12-19 12:07:00 +00001077 dbgprintf(insn, "fixupReg()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001078
1079 switch ((OperandEncoding)op->encoding) {
1080 default:
Sean Callanana144c3f2010-04-02 21:23:51 +00001081 debug("Expected a REG or R/M encoding in fixupReg");
1082 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001083 case ENCODING_REG:
1084 insn->reg = (Reg)fixupRegValue(insn,
1085 (OperandType)op->type,
1086 insn->reg - insn->regBase,
1087 &valid);
1088 if (!valid)
1089 return -1;
1090 break;
1091 case ENCODING_RM:
1092 if (insn->eaBase >= insn->eaRegBase) {
1093 insn->eaBase = (EABase)fixupRMValue(insn,
1094 (OperandType)op->type,
1095 insn->eaBase - insn->eaRegBase,
1096 &valid);
1097 if (!valid)
1098 return -1;
1099 }
1100 break;
1101 }
1102
1103 return 0;
1104}
1105
1106/*
1107 * readOpcodeModifier - Reads an operand from the opcode field of an
1108 * instruction. Handles AddRegFrm instructions.
1109 *
1110 * @param insn - The instruction whose opcode field is to be read.
1111 * @param inModRM - Indicates that the opcode field is to be read from the
1112 * ModR/M extension; useful for escape opcodes
Sean Callanana144c3f2010-04-02 21:23:51 +00001113 * @return - 0 on success; nonzero otherwise.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001114 */
Sean Callanana144c3f2010-04-02 21:23:51 +00001115static int readOpcodeModifier(struct InternalInstruction* insn) {
Nuno Lopes392bbd92009-12-19 12:07:00 +00001116 dbgprintf(insn, "readOpcodeModifier()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001117
1118 if (insn->consumedOpcodeModifier)
Sean Callanana144c3f2010-04-02 21:23:51 +00001119 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001120
1121 insn->consumedOpcodeModifier = TRUE;
1122
Sean Callanana144c3f2010-04-02 21:23:51 +00001123 switch (insn->spec->modifierType) {
Sean Callanan8ed9f512009-12-19 02:59:52 +00001124 default:
Sean Callanana144c3f2010-04-02 21:23:51 +00001125 debug("Unknown modifier type.");
1126 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001127 case MODIFIER_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +00001128 debug("No modifier but an operand expects one.");
1129 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001130 case MODIFIER_OPCODE:
1131 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
Sean Callanana144c3f2010-04-02 21:23:51 +00001132 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001133 case MODIFIER_MODRM:
1134 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
Sean Callanana144c3f2010-04-02 21:23:51 +00001135 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001136 }
1137}
1138
1139/*
1140 * readOpcodeRegister - Reads an operand from the opcode field of an
1141 * instruction and interprets it appropriately given the operand width.
1142 * Handles AddRegFrm instructions.
1143 *
1144 * @param insn - See readOpcodeModifier().
1145 * @param size - The width (in bytes) of the register being specified.
1146 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1147 * RAX.
Sean Callanana144c3f2010-04-02 21:23:51 +00001148 * @return - 0 on success; nonzero otherwise.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001149 */
Sean Callanana144c3f2010-04-02 21:23:51 +00001150static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes392bbd92009-12-19 12:07:00 +00001151 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001152
Sean Callanana144c3f2010-04-02 21:23:51 +00001153 if (readOpcodeModifier(insn))
1154 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001155
1156 if (size == 0)
1157 size = insn->registerSize;
1158
1159 switch (size) {
1160 case 1:
Sean Callanan06b766d2009-12-22 02:07:42 +00001161 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1162 | insn->opcodeModifier));
Sean Callanana144c3f2010-04-02 21:23:51 +00001163 if (insn->rexPrefix &&
1164 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1165 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan06b766d2009-12-22 02:07:42 +00001166 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1167 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001168 }
1169
1170 break;
1171 case 2:
Sean Callanan06b766d2009-12-22 02:07:42 +00001172 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1173 + ((bFromREX(insn->rexPrefix) << 3)
1174 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001175 break;
1176 case 4:
Sean Callanana144c3f2010-04-02 21:23:51 +00001177 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
Sean Callanan06b766d2009-12-22 02:07:42 +00001178 + ((bFromREX(insn->rexPrefix) << 3)
1179 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001180 break;
1181 case 8:
Sean Callanan06b766d2009-12-22 02:07:42 +00001182 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1183 + ((bFromREX(insn->rexPrefix) << 3)
1184 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001185 break;
1186 }
Sean Callanana144c3f2010-04-02 21:23:51 +00001187
1188 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001189}
1190
1191/*
1192 * readImmediate - Consumes an immediate operand from an instruction, given the
1193 * desired operand size.
1194 *
1195 * @param insn - The instruction whose operand is to be read.
1196 * @param size - The width (in bytes) of the operand.
1197 * @return - 0 if the immediate was successfully consumed; nonzero
1198 * otherwise.
1199 */
1200static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1201 uint8_t imm8;
1202 uint16_t imm16;
1203 uint32_t imm32;
1204 uint64_t imm64;
1205
Nuno Lopes392bbd92009-12-19 12:07:00 +00001206 dbgprintf(insn, "readImmediate()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001207
Sean Callanana144c3f2010-04-02 21:23:51 +00001208 if (insn->numImmediatesConsumed == 2) {
1209 debug("Already consumed two immediates");
1210 return -1;
1211 }
Sean Callanan8ed9f512009-12-19 02:59:52 +00001212
1213 if (size == 0)
1214 size = insn->immediateSize;
1215 else
1216 insn->immediateSize = size;
1217
1218 switch (size) {
1219 case 1:
1220 if (consumeByte(insn, &imm8))
1221 return -1;
1222 insn->immediates[insn->numImmediatesConsumed] = imm8;
1223 break;
1224 case 2:
1225 if (consumeUInt16(insn, &imm16))
1226 return -1;
1227 insn->immediates[insn->numImmediatesConsumed] = imm16;
1228 break;
1229 case 4:
1230 if (consumeUInt32(insn, &imm32))
1231 return -1;
1232 insn->immediates[insn->numImmediatesConsumed] = imm32;
1233 break;
1234 case 8:
1235 if (consumeUInt64(insn, &imm64))
1236 return -1;
1237 insn->immediates[insn->numImmediatesConsumed] = imm64;
1238 break;
1239 }
1240
1241 insn->numImmediatesConsumed++;
1242
1243 return 0;
1244}
1245
1246/*
1247 * readOperands - Consults the specifier for an instruction and consumes all
1248 * operands for that instruction, interpreting them as it goes.
1249 *
1250 * @param insn - The instruction whose operands are to be read and interpreted.
1251 * @return - 0 if all operands could be read; nonzero otherwise.
1252 */
1253static int readOperands(struct InternalInstruction* insn) {
1254 int index;
1255
Nuno Lopes392bbd92009-12-19 12:07:00 +00001256 dbgprintf(insn, "readOperands()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001257
1258 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1259 switch (insn->spec->operands[index].encoding) {
1260 case ENCODING_NONE:
1261 break;
1262 case ENCODING_REG:
1263 case ENCODING_RM:
1264 if (readModRM(insn))
1265 return -1;
1266 if (fixupReg(insn, &insn->spec->operands[index]))
1267 return -1;
1268 break;
1269 case ENCODING_CB:
1270 case ENCODING_CW:
1271 case ENCODING_CD:
1272 case ENCODING_CP:
1273 case ENCODING_CO:
1274 case ENCODING_CT:
Nuno Lopes392bbd92009-12-19 12:07:00 +00001275 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001276 return -1;
1277 case ENCODING_IB:
1278 if (readImmediate(insn, 1))
1279 return -1;
1280 break;
1281 case ENCODING_IW:
1282 if (readImmediate(insn, 2))
1283 return -1;
1284 break;
1285 case ENCODING_ID:
1286 if (readImmediate(insn, 4))
1287 return -1;
1288 break;
1289 case ENCODING_IO:
1290 if (readImmediate(insn, 8))
1291 return -1;
1292 break;
1293 case ENCODING_Iv:
Sean Callanana144c3f2010-04-02 21:23:51 +00001294 if (readImmediate(insn, insn->immediateSize))
1295 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001296 case ENCODING_Ia:
Sean Callanana144c3f2010-04-02 21:23:51 +00001297 if (readImmediate(insn, insn->addressSize))
1298 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001299 break;
1300 case ENCODING_RB:
Sean Callanana144c3f2010-04-02 21:23:51 +00001301 if (readOpcodeRegister(insn, 1))
1302 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001303 break;
1304 case ENCODING_RW:
Sean Callanana144c3f2010-04-02 21:23:51 +00001305 if (readOpcodeRegister(insn, 2))
1306 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001307 break;
1308 case ENCODING_RD:
Sean Callanana144c3f2010-04-02 21:23:51 +00001309 if (readOpcodeRegister(insn, 4))
1310 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001311 break;
1312 case ENCODING_RO:
Sean Callanana144c3f2010-04-02 21:23:51 +00001313 if (readOpcodeRegister(insn, 8))
1314 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001315 break;
1316 case ENCODING_Rv:
Sean Callanana144c3f2010-04-02 21:23:51 +00001317 if (readOpcodeRegister(insn, 0))
1318 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001319 break;
1320 case ENCODING_I:
Sean Callanana144c3f2010-04-02 21:23:51 +00001321 if (readOpcodeModifier(insn))
1322 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001323 case ENCODING_DUP:
1324 break;
1325 default:
Nuno Lopes392bbd92009-12-19 12:07:00 +00001326 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001327 return -1;
1328 }
1329 }
1330
1331 return 0;
1332}
1333
1334/*
1335 * decodeInstruction - Reads and interprets a full instruction provided by the
1336 * user.
1337 *
1338 * @param insn - A pointer to the instruction to be populated. Must be
1339 * pre-allocated.
1340 * @param reader - The function to be used to read the instruction's bytes.
1341 * @param readerArg - A generic argument to be passed to the reader to store
1342 * any internal state.
1343 * @param logger - If non-NULL, the function to be used to write log messages
1344 * and warnings.
1345 * @param loggerArg - A generic argument to be passed to the logger to store
1346 * any internal state.
1347 * @param startLoc - The address (in the reader's address space) of the first
1348 * byte in the instruction.
1349 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1350 * decode the instruction in.
1351 * @return - 0 if the instruction's memory could be read; nonzero if
1352 * not.
1353 */
1354int decodeInstruction(struct InternalInstruction* insn,
1355 byteReader_t reader,
1356 void* readerArg,
1357 dlog_t logger,
1358 void* loggerArg,
1359 uint64_t startLoc,
1360 DisassemblerMode mode) {
Daniel Dunbar71f842d2009-12-19 03:31:50 +00001361 memset(insn, 0, sizeof(struct InternalInstruction));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001362
1363 insn->reader = reader;
1364 insn->readerArg = readerArg;
1365 insn->dlog = logger;
1366 insn->dlogArg = loggerArg;
1367 insn->startLocation = startLoc;
1368 insn->readerCursor = startLoc;
1369 insn->mode = mode;
1370 insn->numImmediatesConsumed = 0;
1371
1372 if (readPrefixes(insn) ||
1373 readOpcode(insn) ||
1374 getID(insn) ||
1375 insn->instructionID == 0 ||
1376 readOperands(insn))
1377 return -1;
1378
1379 insn->length = insn->readerCursor - insn->startLocation;
1380
Benjamin Kramer7c97ed72010-03-18 12:18:36 +00001381 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1382 startLoc, insn->readerCursor, insn->length);
Sean Callanan8ed9f512009-12-19 02:59:52 +00001383
1384 if (insn->length > 15)
Nuno Lopes392bbd92009-12-19 12:07:00 +00001385 dbgprintf(insn, "Instruction exceeds 15-byte limit");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001386
1387 return 0;
1388}