blob: 0af510c4cec74f1048652a7a64e25ad5795244b4 [file] [log] [blame]
Sean Callanan8ed9f512009-12-19 02:59:52 +00001/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
Sean Callanan8ed9f512009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbar71f842d2009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan8ed9f512009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
23#include "X86GenDisassemblerTables.inc"
24
25#define TRUE 1
26#define FALSE 0
27
Sean Callanana144c3f2010-04-02 21:23:51 +000028typedef int8_t bool;
29
Sean Callanana144c3f2010-04-02 21:23:51 +000030#ifndef NDEBUG
31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32#else
33#define debug(s) do { } while (0)
34#endif
35
Sean Callanan8ed9f512009-12-19 02:59:52 +000036
37/*
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
40 *
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
44 */
Sean Callanan542eabc2009-12-22 22:51:40 +000045static InstructionContext contextForAttrs(uint8_t attrMask) {
Sean Callanan8ed9f512009-12-19 02:59:52 +000046 return CONTEXTS_SYM[attrMask];
47}
48
49/*
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
52 *
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
55 * contextForAttrs.
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
59 */
Sean Callanan542eabc2009-12-22 22:51:40 +000060static int modRMRequired(OpcodeType type,
Sean Callanan8ed9f512009-12-19 02:59:52 +000061 InstructionContext insnContext,
62 uint8_t opcode) {
Daniel Dunbarbaf2e352009-12-22 01:41:37 +000063 const struct ContextDecision* decision = 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +000064
65 switch (type) {
66 case ONEBYTE:
67 decision = &ONEBYTE_SYM;
68 break;
69 case TWOBYTE:
70 decision = &TWOBYTE_SYM;
71 break;
72 case THREEBYTE_38:
73 decision = &THREEBYTE38_SYM;
74 break;
75 case THREEBYTE_3A:
76 decision = &THREEBYTE3A_SYM;
77 break;
78 }
79
80 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
81 modrm_type != MODRM_ONEENTRY;
82
Sean Callanan8ed9f512009-12-19 02:59:52 +000083 return 0;
84}
85
86/*
87 * decode - Reads the appropriate instruction table to obtain the unique ID of
88 * an instruction.
89 *
90 * @param type - See modRMRequired().
91 * @param insnContext - See modRMRequired().
92 * @param opcode - See modRMRequired().
93 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanana144c3f2010-04-02 21:23:51 +000094 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan8ed9f512009-12-19 02:59:52 +000095 */
Sean Callanan542eabc2009-12-22 22:51:40 +000096static InstrUID decode(OpcodeType type,
Sean Callanana144c3f2010-04-02 21:23:51 +000097 InstructionContext insnContext,
98 uint8_t opcode,
99 uint8_t modRM) {
Benjamin Kramer4d1dca92010-10-23 09:10:44 +0000100 const struct ModRMDecision* dec;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000101
102 switch (type) {
103 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000104 debug("Unknown opcode type");
105 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000106 case ONEBYTE:
107 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
108 break;
109 case TWOBYTE:
110 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
111 break;
112 case THREEBYTE_38:
113 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
114 break;
115 case THREEBYTE_3A:
116 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
117 break;
118 }
119
120 switch (dec->modrm_type) {
121 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000122 debug("Corrupt table! Unknown modrm_type");
123 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000124 case MODRM_ONEENTRY:
125 return dec->instructionIDs[0];
126 case MODRM_SPLITRM:
127 if (modFromModRM(modRM) == 0x3)
128 return dec->instructionIDs[1];
129 else
130 return dec->instructionIDs[0];
131 case MODRM_FULL:
132 return dec->instructionIDs[modRM];
133 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000134}
135
136/*
137 * specifierForUID - Given a UID, returns the name and operand specification for
138 * that instruction.
139 *
140 * @param uid - The unique ID for the instruction. This should be returned by
141 * decode(); specifierForUID will not check bounds.
142 * @return - A pointer to the specification for that instruction.
143 */
Benjamin Kramer4d1dca92010-10-23 09:10:44 +0000144static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000145 return &INSTRUCTIONS_SYM[uid];
146}
147
148/*
149 * consumeByte - Uses the reader function provided by the user to consume one
150 * byte from the instruction's memory and advance the cursor.
151 *
152 * @param insn - The instruction with the reader function to use. The cursor
153 * for this instruction is advanced.
154 * @param byte - A pointer to a pre-allocated memory buffer to be populated
155 * with the data read.
156 * @return - 0 if the read was successful; nonzero otherwise.
157 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000158static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000159 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
160
161 if (!ret)
162 ++(insn->readerCursor);
163
164 return ret;
165}
166
167/*
168 * lookAtByte - Like consumeByte, but does not advance the cursor.
169 *
170 * @param insn - See consumeByte().
171 * @param byte - See consumeByte().
172 * @return - See consumeByte().
173 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000174static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000175 return insn->reader(insn->readerArg, byte, insn->readerCursor);
176}
177
Sean Callanan542eabc2009-12-22 22:51:40 +0000178static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000179 insn->readerCursor--;
180}
181
Sean Callanan542eabc2009-12-22 22:51:40 +0000182#define CONSUME_FUNC(name, type) \
183 static int name(struct InternalInstruction* insn, type* ptr) { \
184 type combined = 0; \
185 unsigned offset; \
186 for (offset = 0; offset < sizeof(type); ++offset) { \
187 uint8_t byte; \
188 int ret = insn->reader(insn->readerArg, \
189 &byte, \
190 insn->readerCursor + offset); \
191 if (ret) \
192 return ret; \
193 combined = combined | ((type)byte << ((type)offset * 8)); \
194 } \
195 *ptr = combined; \
196 insn->readerCursor += sizeof(type); \
197 return 0; \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000198 }
199
200/*
201 * consume* - Use the reader function provided by the user to consume data
202 * values of various sizes from the instruction's memory and advance the
203 * cursor appropriately. These readers perform endian conversion.
204 *
205 * @param insn - See consumeByte().
206 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
207 * be populated with the data read.
208 * @return - See consumeByte().
209 */
210CONSUME_FUNC(consumeInt8, int8_t)
211CONSUME_FUNC(consumeInt16, int16_t)
212CONSUME_FUNC(consumeInt32, int32_t)
213CONSUME_FUNC(consumeUInt16, uint16_t)
214CONSUME_FUNC(consumeUInt32, uint32_t)
215CONSUME_FUNC(consumeUInt64, uint64_t)
216
217/*
Nuno Lopes392bbd92009-12-19 12:07:00 +0000218 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan8ed9f512009-12-19 02:59:52 +0000219 * message, typically without a carriage-return.
220 *
221 * @param insn - The instruction containing the logging function.
222 * @param format - See printf().
223 * @param ... - See printf().
224 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000225static void dbgprintf(struct InternalInstruction* insn,
226 const char* format,
227 ...) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000228 char buffer[256];
229 va_list ap;
230
231 if (!insn->dlog)
232 return;
233
234 va_start(ap, format);
235 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
236 va_end(ap);
237
238 insn->dlog(insn->dlogArg, buffer);
239
240 return;
241}
242
243/*
244 * setPrefixPresent - Marks that a particular prefix is present at a particular
245 * location.
246 *
247 * @param insn - The instruction to be marked as having the prefix.
248 * @param prefix - The prefix that is present.
249 * @param location - The location where the prefix is located (in the address
250 * space of the instruction's reader).
251 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000252static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan8ed9f512009-12-19 02:59:52 +0000253 uint8_t prefix,
254 uint64_t location)
255{
256 insn->prefixPresent[prefix] = 1;
257 insn->prefixLocations[prefix] = location;
258}
259
260/*
261 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
262 * present at a given location.
263 *
264 * @param insn - The instruction to be queried.
265 * @param prefix - The prefix.
266 * @param location - The location to query.
267 * @return - Whether the prefix is at that location.
268 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000269static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
270 uint8_t prefix,
271 uint64_t location)
Sean Callanan8ed9f512009-12-19 02:59:52 +0000272{
273 if (insn->prefixPresent[prefix] == 1 &&
274 insn->prefixLocations[prefix] == location)
275 return TRUE;
276 else
277 return FALSE;
278}
279
280/*
281 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
282 * instruction as having them. Also sets the instruction's default operand,
283 * address, and other relevant data sizes to report operands correctly.
284 *
285 * @param insn - The instruction whose prefixes are to be read.
286 * @return - 0 if the instruction could be read until the end of the prefix
287 * bytes, and no prefixes conflicted; nonzero otherwise.
288 */
289static int readPrefixes(struct InternalInstruction* insn) {
290 BOOL isPrefix = TRUE;
291 BOOL prefixGroups[4] = { FALSE };
292 uint64_t prefixLocation;
293 uint8_t byte;
294
295 BOOL hasAdSize = FALSE;
296 BOOL hasOpSize = FALSE;
297
Nuno Lopes392bbd92009-12-19 12:07:00 +0000298 dbgprintf(insn, "readPrefixes()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000299
300 while (isPrefix) {
301 prefixLocation = insn->readerCursor;
302
303 if (consumeByte(insn, &byte))
304 return -1;
305
306 switch (byte) {
307 case 0xf0: /* LOCK */
308 case 0xf2: /* REPNE/REPNZ */
309 case 0xf3: /* REP or REPE/REPZ */
310 if (prefixGroups[0])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000311 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000312 prefixGroups[0] = TRUE;
313 setPrefixPresent(insn, byte, prefixLocation);
314 break;
315 case 0x2e: /* CS segment override -OR- Branch not taken */
316 case 0x36: /* SS segment override -OR- Branch taken */
317 case 0x3e: /* DS segment override */
318 case 0x26: /* ES segment override */
319 case 0x64: /* FS segment override */
320 case 0x65: /* GS segment override */
321 switch (byte) {
322 case 0x2e:
323 insn->segmentOverride = SEG_OVERRIDE_CS;
324 break;
325 case 0x36:
326 insn->segmentOverride = SEG_OVERRIDE_SS;
327 break;
328 case 0x3e:
329 insn->segmentOverride = SEG_OVERRIDE_DS;
330 break;
331 case 0x26:
332 insn->segmentOverride = SEG_OVERRIDE_ES;
333 break;
334 case 0x64:
335 insn->segmentOverride = SEG_OVERRIDE_FS;
336 break;
337 case 0x65:
338 insn->segmentOverride = SEG_OVERRIDE_GS;
339 break;
340 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000341 debug("Unhandled override");
342 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000343 }
344 if (prefixGroups[1])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000345 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000346 prefixGroups[1] = TRUE;
347 setPrefixPresent(insn, byte, prefixLocation);
348 break;
349 case 0x66: /* Operand-size override */
350 if (prefixGroups[2])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000351 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000352 prefixGroups[2] = TRUE;
353 hasOpSize = TRUE;
354 setPrefixPresent(insn, byte, prefixLocation);
355 break;
356 case 0x67: /* Address-size override */
357 if (prefixGroups[3])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000358 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000359 prefixGroups[3] = TRUE;
360 hasAdSize = TRUE;
361 setPrefixPresent(insn, byte, prefixLocation);
362 break;
363 default: /* Not a prefix byte */
364 isPrefix = FALSE;
365 break;
366 }
367
368 if (isPrefix)
Nuno Lopes392bbd92009-12-19 12:07:00 +0000369 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000370 }
371
372 if (insn->mode == MODE_64BIT) {
373 if ((byte & 0xf0) == 0x40) {
374 uint8_t opcodeByte;
375
Sean Callanana144c3f2010-04-02 21:23:51 +0000376 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000377 dbgprintf(insn, "Redundant REX prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000378 return -1;
379 }
380
381 insn->rexPrefix = byte;
382 insn->necessaryPrefixLocation = insn->readerCursor - 2;
383
Nuno Lopes392bbd92009-12-19 12:07:00 +0000384 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000385 } else {
386 unconsumeByte(insn);
387 insn->necessaryPrefixLocation = insn->readerCursor - 1;
388 }
389 } else {
390 unconsumeByte(insn);
391 }
392
393 if (insn->mode == MODE_16BIT) {
394 insn->registerSize = (hasOpSize ? 4 : 2);
395 insn->addressSize = (hasAdSize ? 4 : 2);
396 insn->displacementSize = (hasAdSize ? 4 : 2);
397 insn->immediateSize = (hasOpSize ? 4 : 2);
398 } else if (insn->mode == MODE_32BIT) {
399 insn->registerSize = (hasOpSize ? 2 : 4);
400 insn->addressSize = (hasAdSize ? 2 : 4);
401 insn->displacementSize = (hasAdSize ? 2 : 4);
Sean Callanan751752e2010-10-22 01:24:11 +0000402 insn->immediateSize = (hasOpSize ? 2 : 4);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000403 } else if (insn->mode == MODE_64BIT) {
404 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
405 insn->registerSize = 8;
406 insn->addressSize = (hasAdSize ? 4 : 8);
407 insn->displacementSize = 4;
408 insn->immediateSize = 4;
409 } else if (insn->rexPrefix) {
410 insn->registerSize = (hasOpSize ? 2 : 4);
411 insn->addressSize = (hasAdSize ? 4 : 8);
412 insn->displacementSize = (hasOpSize ? 2 : 4);
413 insn->immediateSize = (hasOpSize ? 2 : 4);
414 } else {
415 insn->registerSize = (hasOpSize ? 2 : 4);
416 insn->addressSize = (hasAdSize ? 4 : 8);
417 insn->displacementSize = (hasOpSize ? 2 : 4);
418 insn->immediateSize = (hasOpSize ? 2 : 4);
419 }
420 }
421
422 return 0;
423}
424
425/*
426 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
427 * extended or escape opcodes).
428 *
429 * @param insn - The instruction whose opcode is to be read.
430 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
431 */
432static int readOpcode(struct InternalInstruction* insn) {
433 /* Determine the length of the primary opcode */
434
435 uint8_t current;
436
Nuno Lopes392bbd92009-12-19 12:07:00 +0000437 dbgprintf(insn, "readOpcode()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000438
439 insn->opcodeType = ONEBYTE;
440 if (consumeByte(insn, &current))
441 return -1;
442
443 if (current == 0x0f) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000444 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000445
446 insn->twoByteEscape = current;
447
448 if (consumeByte(insn, &current))
449 return -1;
450
451 if (current == 0x38) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000452 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000453
454 insn->threeByteEscape = current;
455
456 if (consumeByte(insn, &current))
457 return -1;
458
459 insn->opcodeType = THREEBYTE_38;
460 } else if (current == 0x3a) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000461 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000462
463 insn->threeByteEscape = current;
464
465 if (consumeByte(insn, &current))
466 return -1;
467
468 insn->opcodeType = THREEBYTE_3A;
469 } else {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000470 dbgprintf(insn, "Didn't find a three-byte escape prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000471
472 insn->opcodeType = TWOBYTE;
473 }
474 }
475
476 /*
477 * At this point we have consumed the full opcode.
478 * Anything we consume from here on must be unconsumed.
479 */
480
481 insn->opcode = current;
482
483 return 0;
484}
485
486static int readModRM(struct InternalInstruction* insn);
487
488/*
489 * getIDWithAttrMask - Determines the ID of an instruction, consuming
490 * the ModR/M byte as appropriate for extended and escape opcodes,
491 * and using a supplied attribute mask.
492 *
493 * @param instructionID - A pointer whose target is filled in with the ID of the
494 * instruction.
495 * @param insn - The instruction whose ID is to be determined.
496 * @param attrMask - The attribute mask to search.
497 * @return - 0 if the ModR/M could be read when needed or was not
498 * needed; nonzero otherwise.
499 */
500static int getIDWithAttrMask(uint16_t* instructionID,
501 struct InternalInstruction* insn,
502 uint8_t attrMask) {
503 BOOL hasModRMExtension;
504
505 uint8_t instructionClass;
506
507 instructionClass = contextForAttrs(attrMask);
508
509 hasModRMExtension = modRMRequired(insn->opcodeType,
510 instructionClass,
511 insn->opcode);
512
513 if (hasModRMExtension) {
Rafael Espindola2f867a62011-01-06 16:48:42 +0000514 if (readModRM(insn))
515 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000516
517 *instructionID = decode(insn->opcodeType,
518 instructionClass,
519 insn->opcode,
520 insn->modRM);
521 } else {
522 *instructionID = decode(insn->opcodeType,
523 instructionClass,
524 insn->opcode,
525 0);
526 }
527
528 return 0;
529}
530
531/*
532 * is16BitEquivalent - Determines whether two instruction names refer to
533 * equivalent instructions but one is 16-bit whereas the other is not.
534 *
535 * @param orig - The instruction that is not 16-bit
536 * @param equiv - The instruction that is 16-bit
537 */
538static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
539 off_t i;
540
Sean Callanana144c3f2010-04-02 21:23:51 +0000541 for (i = 0;; i++) {
542 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000543 return TRUE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000544 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000545 return FALSE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000546 if (orig[i] != equiv[i]) {
547 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000548 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000549 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000550 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000551 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000552 continue;
553 return FALSE;
554 }
555 }
556}
557
558/*
559 * is64BitEquivalent - Determines whether two instruction names refer to
560 * equivalent instructions but one is 64-bit whereas the other is not.
561 *
562 * @param orig - The instruction that is not 64-bit
563 * @param equiv - The instruction that is 64-bit
564 */
565static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
566 off_t i;
567
Sean Callanana144c3f2010-04-02 21:23:51 +0000568 for (i = 0;; i++) {
569 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000570 return TRUE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000571 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000572 return FALSE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000573 if (orig[i] != equiv[i]) {
574 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000575 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000576 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000577 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000578 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000579 continue;
580 return FALSE;
581 }
582 }
583}
584
585
586/*
587 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
588 * appropriate for extended and escape opcodes. Determines the attributes and
589 * context for the instruction before doing so.
590 *
591 * @param insn - The instruction whose ID is to be determined.
592 * @return - 0 if the ModR/M could be read when needed or was not needed;
593 * nonzero otherwise.
594 */
595static int getID(struct InternalInstruction* insn) {
596 uint8_t attrMask;
597 uint16_t instructionID;
598
Nuno Lopes392bbd92009-12-19 12:07:00 +0000599 dbgprintf(insn, "getID()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000600
601 attrMask = ATTR_NONE;
602
603 if (insn->mode == MODE_64BIT)
604 attrMask |= ATTR_64BIT;
605
606 if (insn->rexPrefix & 0x08)
607 attrMask |= ATTR_REXW;
608
609 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
610 attrMask |= ATTR_OPSIZE;
611 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
612 attrMask |= ATTR_XS;
613 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
614 attrMask |= ATTR_XD;
615
Sean Callanana144c3f2010-04-02 21:23:51 +0000616 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000617 return -1;
618
619 /* The following clauses compensate for limitations of the tables. */
620
621 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
622 /*
623 * Although for SSE instructions it is usually necessary to treat REX.W+F2
624 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
625 * an occasional instruction where F2 is incidental and REX.W is the more
626 * significant. If the decoded instruction is 32-bit and adding REX.W
627 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
628 */
629
Benjamin Kramer4d1dca92010-10-23 09:10:44 +0000630 const struct InstructionSpecifier *spec;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000631 uint16_t instructionIDWithREXw;
Benjamin Kramer4d1dca92010-10-23 09:10:44 +0000632 const struct InstructionSpecifier *specWithREXw;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000633
634 spec = specifierForUID(instructionID);
635
636 if (getIDWithAttrMask(&instructionIDWithREXw,
637 insn,
638 attrMask & (~ATTR_XD))) {
639 /*
640 * Decoding with REX.w would yield nothing; give up and return original
641 * decode.
642 */
643
644 insn->instructionID = instructionID;
645 insn->spec = spec;
646 return 0;
647 }
648
649 specWithREXw = specifierForUID(instructionIDWithREXw);
650
651 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
652 insn->instructionID = instructionIDWithREXw;
653 insn->spec = specWithREXw;
654 } else {
655 insn->instructionID = instructionID;
656 insn->spec = spec;
657 }
658 return 0;
659 }
660
661 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
662 /*
663 * The instruction tables make no distinction between instructions that
664 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
665 * particular spot (i.e., many MMX operations). In general we're
666 * conservative, but in the specific case where OpSize is present but not
667 * in the right place we check if there's a 16-bit operation.
668 */
669
Benjamin Kramer4d1dca92010-10-23 09:10:44 +0000670 const struct InstructionSpecifier *spec;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000671 uint16_t instructionIDWithOpsize;
Benjamin Kramer4d1dca92010-10-23 09:10:44 +0000672 const struct InstructionSpecifier *specWithOpsize;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000673
674 spec = specifierForUID(instructionID);
675
676 if (getIDWithAttrMask(&instructionIDWithOpsize,
677 insn,
678 attrMask | ATTR_OPSIZE)) {
679 /*
680 * ModRM required with OpSize but not present; give up and return version
681 * without OpSize set
682 */
683
684 insn->instructionID = instructionID;
685 insn->spec = spec;
686 return 0;
687 }
688
689 specWithOpsize = specifierForUID(instructionIDWithOpsize);
690
691 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
692 insn->instructionID = instructionIDWithOpsize;
693 insn->spec = specWithOpsize;
694 } else {
695 insn->instructionID = instructionID;
696 insn->spec = spec;
697 }
698 return 0;
699 }
700
701 insn->instructionID = instructionID;
702 insn->spec = specifierForUID(insn->instructionID);
703
704 return 0;
705}
706
707/*
708 * readSIB - Consumes the SIB byte to determine addressing information for an
709 * instruction.
710 *
711 * @param insn - The instruction whose SIB byte is to be read.
712 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
713 */
714static int readSIB(struct InternalInstruction* insn) {
Daniel Dunbarbaf2e352009-12-22 01:41:37 +0000715 SIBIndex sibIndexBase = 0;
716 SIBBase sibBaseBase = 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000717 uint8_t index, base;
718
Nuno Lopes392bbd92009-12-19 12:07:00 +0000719 dbgprintf(insn, "readSIB()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000720
721 if (insn->consumedSIB)
722 return 0;
723
724 insn->consumedSIB = TRUE;
725
726 switch (insn->addressSize) {
727 case 2:
Nuno Lopes392bbd92009-12-19 12:07:00 +0000728 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000729 return -1;
730 break;
731 case 4:
732 sibIndexBase = SIB_INDEX_EAX;
733 sibBaseBase = SIB_BASE_EAX;
734 break;
735 case 8:
736 sibIndexBase = SIB_INDEX_RAX;
737 sibBaseBase = SIB_BASE_RAX;
738 break;
739 }
740
741 if (consumeByte(insn, &insn->sib))
742 return -1;
743
744 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
745
746 switch (index) {
747 case 0x4:
748 insn->sibIndex = SIB_INDEX_NONE;
749 break;
750 default:
751 insn->sibIndex = (EABase)(sibIndexBase + index);
752 if (insn->sibIndex == SIB_INDEX_sib ||
753 insn->sibIndex == SIB_INDEX_sib64)
754 insn->sibIndex = SIB_INDEX_NONE;
755 break;
756 }
757
758 switch (scaleFromSIB(insn->sib)) {
759 case 0:
760 insn->sibScale = 1;
761 break;
762 case 1:
763 insn->sibScale = 2;
764 break;
765 case 2:
766 insn->sibScale = 4;
767 break;
768 case 3:
769 insn->sibScale = 8;
770 break;
771 }
772
773 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
774
775 switch (base) {
776 case 0x5:
777 switch (modFromModRM(insn->modRM)) {
778 case 0x0:
779 insn->eaDisplacement = EA_DISP_32;
780 insn->sibBase = SIB_BASE_NONE;
781 break;
782 case 0x1:
783 insn->eaDisplacement = EA_DISP_8;
784 insn->sibBase = (insn->addressSize == 4 ?
785 SIB_BASE_EBP : SIB_BASE_RBP);
786 break;
787 case 0x2:
788 insn->eaDisplacement = EA_DISP_32;
789 insn->sibBase = (insn->addressSize == 4 ?
790 SIB_BASE_EBP : SIB_BASE_RBP);
791 break;
792 case 0x3:
Sean Callanana144c3f2010-04-02 21:23:51 +0000793 debug("Cannot have Mod = 0b11 and a SIB byte");
794 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000795 }
796 break;
797 default:
798 insn->sibBase = (EABase)(sibBaseBase + base);
799 break;
800 }
801
802 return 0;
803}
804
805/*
806 * readDisplacement - Consumes the displacement of an instruction.
807 *
808 * @param insn - The instruction whose displacement is to be read.
809 * @return - 0 if the displacement byte was successfully read; nonzero
810 * otherwise.
811 */
812static int readDisplacement(struct InternalInstruction* insn) {
813 int8_t d8;
814 int16_t d16;
815 int32_t d32;
816
Nuno Lopes392bbd92009-12-19 12:07:00 +0000817 dbgprintf(insn, "readDisplacement()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000818
819 if (insn->consumedDisplacement)
820 return 0;
821
822 insn->consumedDisplacement = TRUE;
823
824 switch (insn->eaDisplacement) {
825 case EA_DISP_NONE:
826 insn->consumedDisplacement = FALSE;
827 break;
828 case EA_DISP_8:
829 if (consumeInt8(insn, &d8))
830 return -1;
831 insn->displacement = d8;
832 break;
833 case EA_DISP_16:
834 if (consumeInt16(insn, &d16))
835 return -1;
836 insn->displacement = d16;
837 break;
838 case EA_DISP_32:
839 if (consumeInt32(insn, &d32))
840 return -1;
841 insn->displacement = d32;
842 break;
843 }
844
845 insn->consumedDisplacement = TRUE;
846 return 0;
847}
848
849/*
850 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
851 * displacement) for an instruction and interprets it.
852 *
853 * @param insn - The instruction whose addressing information is to be read.
854 * @return - 0 if the information was successfully read; nonzero otherwise.
855 */
856static int readModRM(struct InternalInstruction* insn) {
857 uint8_t mod, rm, reg;
858
Nuno Lopes392bbd92009-12-19 12:07:00 +0000859 dbgprintf(insn, "readModRM()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000860
861 if (insn->consumedModRM)
862 return 0;
863
Rafael Espindola2f867a62011-01-06 16:48:42 +0000864 if (consumeByte(insn, &insn->modRM))
865 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000866 insn->consumedModRM = TRUE;
867
868 mod = modFromModRM(insn->modRM);
869 rm = rmFromModRM(insn->modRM);
870 reg = regFromModRM(insn->modRM);
871
872 /*
873 * This goes by insn->registerSize to pick the correct register, which messes
874 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
875 * fixupReg().
876 */
877 switch (insn->registerSize) {
878 case 2:
Sean Callanan06b766d2009-12-22 02:07:42 +0000879 insn->regBase = MODRM_REG_AX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000880 insn->eaRegBase = EA_REG_AX;
881 break;
882 case 4:
Sean Callanan06b766d2009-12-22 02:07:42 +0000883 insn->regBase = MODRM_REG_EAX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000884 insn->eaRegBase = EA_REG_EAX;
885 break;
886 case 8:
Sean Callanan06b766d2009-12-22 02:07:42 +0000887 insn->regBase = MODRM_REG_RAX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000888 insn->eaRegBase = EA_REG_RAX;
889 break;
890 }
891
892 reg |= rFromREX(insn->rexPrefix) << 3;
893 rm |= bFromREX(insn->rexPrefix) << 3;
894
895 insn->reg = (Reg)(insn->regBase + reg);
896
897 switch (insn->addressSize) {
898 case 2:
899 insn->eaBaseBase = EA_BASE_BX_SI;
900
901 switch (mod) {
902 case 0x0:
903 if (rm == 0x6) {
904 insn->eaBase = EA_BASE_NONE;
905 insn->eaDisplacement = EA_DISP_16;
Sean Callanana144c3f2010-04-02 21:23:51 +0000906 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000907 return -1;
908 } else {
909 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
910 insn->eaDisplacement = EA_DISP_NONE;
911 }
912 break;
913 case 0x1:
914 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
915 insn->eaDisplacement = EA_DISP_8;
Sean Callanana144c3f2010-04-02 21:23:51 +0000916 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000917 return -1;
918 break;
919 case 0x2:
920 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
921 insn->eaDisplacement = EA_DISP_16;
Sean Callanana144c3f2010-04-02 21:23:51 +0000922 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000923 return -1;
924 break;
925 case 0x3:
926 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanana144c3f2010-04-02 21:23:51 +0000927 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000928 return -1;
929 break;
930 }
931 break;
932 case 4:
933 case 8:
934 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
935
936 switch (mod) {
937 case 0x0:
938 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
939 switch (rm) {
940 case 0x4:
941 case 0xc: /* in case REXW.b is set */
942 insn->eaBase = (insn->addressSize == 4 ?
943 EA_BASE_sib : EA_BASE_sib64);
944 readSIB(insn);
Sean Callanana144c3f2010-04-02 21:23:51 +0000945 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000946 return -1;
947 break;
948 case 0x5:
949 insn->eaBase = EA_BASE_NONE;
950 insn->eaDisplacement = EA_DISP_32;
Sean Callanana144c3f2010-04-02 21:23:51 +0000951 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000952 return -1;
953 break;
954 default:
955 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
956 break;
957 }
958 break;
959 case 0x1:
960 case 0x2:
961 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
962 switch (rm) {
963 case 0x4:
964 case 0xc: /* in case REXW.b is set */
965 insn->eaBase = EA_BASE_sib;
966 readSIB(insn);
Sean Callanana144c3f2010-04-02 21:23:51 +0000967 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000968 return -1;
969 break;
970 default:
971 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanana144c3f2010-04-02 21:23:51 +0000972 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000973 return -1;
974 break;
975 }
976 break;
977 case 0x3:
978 insn->eaDisplacement = EA_DISP_NONE;
979 insn->eaBase = (EABase)(insn->eaRegBase + rm);
980 break;
981 }
982 break;
983 } /* switch (insn->addressSize) */
984
985 return 0;
986}
987
988#define GENERIC_FIXUP_FUNC(name, base, prefix) \
989 static uint8_t name(struct InternalInstruction *insn, \
990 OperandType type, \
991 uint8_t index, \
992 uint8_t *valid) { \
993 *valid = 1; \
994 switch (type) { \
995 default: \
Sean Callanana144c3f2010-04-02 21:23:51 +0000996 debug("Unhandled register type"); \
997 *valid = 0; \
998 return 0; \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000999 case TYPE_Rv: \
1000 return base + index; \
1001 case TYPE_R8: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001002 if (insn->rexPrefix && \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001003 index >= 4 && index <= 7) { \
1004 return prefix##_SPL + (index - 4); \
1005 } else { \
1006 return prefix##_AL + index; \
1007 } \
1008 case TYPE_R16: \
1009 return prefix##_AX + index; \
1010 case TYPE_R32: \
1011 return prefix##_EAX + index; \
1012 case TYPE_R64: \
1013 return prefix##_RAX + index; \
1014 case TYPE_XMM128: \
1015 case TYPE_XMM64: \
1016 case TYPE_XMM32: \
1017 case TYPE_XMM: \
1018 return prefix##_XMM0 + index; \
1019 case TYPE_MM64: \
1020 case TYPE_MM32: \
1021 case TYPE_MM: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001022 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001023 *valid = 0; \
1024 return prefix##_MM0 + index; \
1025 case TYPE_SEGMENTREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001026 if (index > 5) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001027 *valid = 0; \
1028 return prefix##_ES + index; \
1029 case TYPE_DEBUGREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001030 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001031 *valid = 0; \
1032 return prefix##_DR0 + index; \
Sean Callanan1a8b7892010-05-06 20:59:00 +00001033 case TYPE_CONTROLREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001034 if (index > 8) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001035 *valid = 0; \
Sean Callanan1a8b7892010-05-06 20:59:00 +00001036 return prefix##_CR0 + index; \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001037 } \
1038 }
1039
1040/*
1041 * fixup*Value - Consults an operand type to determine the meaning of the
1042 * reg or R/M field. If the operand is an XMM operand, for example, an
1043 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1044 * misinterpret it as.
1045 *
1046 * @param insn - The instruction containing the operand.
1047 * @param type - The operand type.
1048 * @param index - The existing value of the field as reported by readModRM().
1049 * @param valid - The address of a uint8_t. The target is set to 1 if the
1050 * field is valid for the register class; 0 if not.
Sean Callanana144c3f2010-04-02 21:23:51 +00001051 * @return - The proper value.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001052 */
Sean Callanan06b766d2009-12-22 02:07:42 +00001053GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan8ed9f512009-12-19 02:59:52 +00001054GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1055
1056/*
1057 * fixupReg - Consults an operand specifier to determine which of the
1058 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1059 *
1060 * @param insn - See fixup*Value().
1061 * @param op - The operand specifier.
1062 * @return - 0 if fixup was successful; -1 if the register returned was
1063 * invalid for its class.
1064 */
1065static int fixupReg(struct InternalInstruction *insn,
Benjamin Kramer4d1dca92010-10-23 09:10:44 +00001066 const struct OperandSpecifier *op) {
Sean Callanan8ed9f512009-12-19 02:59:52 +00001067 uint8_t valid;
1068
Nuno Lopes392bbd92009-12-19 12:07:00 +00001069 dbgprintf(insn, "fixupReg()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001070
1071 switch ((OperandEncoding)op->encoding) {
1072 default:
Sean Callanana144c3f2010-04-02 21:23:51 +00001073 debug("Expected a REG or R/M encoding in fixupReg");
1074 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001075 case ENCODING_REG:
1076 insn->reg = (Reg)fixupRegValue(insn,
1077 (OperandType)op->type,
1078 insn->reg - insn->regBase,
1079 &valid);
1080 if (!valid)
1081 return -1;
1082 break;
1083 case ENCODING_RM:
1084 if (insn->eaBase >= insn->eaRegBase) {
1085 insn->eaBase = (EABase)fixupRMValue(insn,
1086 (OperandType)op->type,
1087 insn->eaBase - insn->eaRegBase,
1088 &valid);
1089 if (!valid)
1090 return -1;
1091 }
1092 break;
1093 }
1094
1095 return 0;
1096}
1097
1098/*
1099 * readOpcodeModifier - Reads an operand from the opcode field of an
1100 * instruction. Handles AddRegFrm instructions.
1101 *
1102 * @param insn - The instruction whose opcode field is to be read.
1103 * @param inModRM - Indicates that the opcode field is to be read from the
1104 * ModR/M extension; useful for escape opcodes
Sean Callanana144c3f2010-04-02 21:23:51 +00001105 * @return - 0 on success; nonzero otherwise.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001106 */
Sean Callanana144c3f2010-04-02 21:23:51 +00001107static int readOpcodeModifier(struct InternalInstruction* insn) {
Nuno Lopes392bbd92009-12-19 12:07:00 +00001108 dbgprintf(insn, "readOpcodeModifier()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001109
1110 if (insn->consumedOpcodeModifier)
Sean Callanana144c3f2010-04-02 21:23:51 +00001111 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001112
1113 insn->consumedOpcodeModifier = TRUE;
1114
Sean Callanana144c3f2010-04-02 21:23:51 +00001115 switch (insn->spec->modifierType) {
Sean Callanan8ed9f512009-12-19 02:59:52 +00001116 default:
Sean Callanana144c3f2010-04-02 21:23:51 +00001117 debug("Unknown modifier type.");
1118 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001119 case MODIFIER_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +00001120 debug("No modifier but an operand expects one.");
1121 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001122 case MODIFIER_OPCODE:
1123 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
Sean Callanana144c3f2010-04-02 21:23:51 +00001124 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001125 case MODIFIER_MODRM:
1126 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
Sean Callanana144c3f2010-04-02 21:23:51 +00001127 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001128 }
1129}
1130
1131/*
1132 * readOpcodeRegister - Reads an operand from the opcode field of an
1133 * instruction and interprets it appropriately given the operand width.
1134 * Handles AddRegFrm instructions.
1135 *
1136 * @param insn - See readOpcodeModifier().
1137 * @param size - The width (in bytes) of the register being specified.
1138 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1139 * RAX.
Sean Callanana144c3f2010-04-02 21:23:51 +00001140 * @return - 0 on success; nonzero otherwise.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001141 */
Sean Callanana144c3f2010-04-02 21:23:51 +00001142static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes392bbd92009-12-19 12:07:00 +00001143 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001144
Sean Callanana144c3f2010-04-02 21:23:51 +00001145 if (readOpcodeModifier(insn))
1146 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001147
1148 if (size == 0)
1149 size = insn->registerSize;
1150
1151 switch (size) {
1152 case 1:
Sean Callanan06b766d2009-12-22 02:07:42 +00001153 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1154 | insn->opcodeModifier));
Sean Callanana144c3f2010-04-02 21:23:51 +00001155 if (insn->rexPrefix &&
1156 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1157 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan06b766d2009-12-22 02:07:42 +00001158 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1159 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001160 }
1161
1162 break;
1163 case 2:
Sean Callanan06b766d2009-12-22 02:07:42 +00001164 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1165 + ((bFromREX(insn->rexPrefix) << 3)
1166 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001167 break;
1168 case 4:
Sean Callanana144c3f2010-04-02 21:23:51 +00001169 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
Sean Callanan06b766d2009-12-22 02:07:42 +00001170 + ((bFromREX(insn->rexPrefix) << 3)
1171 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001172 break;
1173 case 8:
Sean Callanan06b766d2009-12-22 02:07:42 +00001174 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1175 + ((bFromREX(insn->rexPrefix) << 3)
1176 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001177 break;
1178 }
Sean Callanana144c3f2010-04-02 21:23:51 +00001179
1180 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001181}
1182
1183/*
1184 * readImmediate - Consumes an immediate operand from an instruction, given the
1185 * desired operand size.
1186 *
1187 * @param insn - The instruction whose operand is to be read.
1188 * @param size - The width (in bytes) of the operand.
1189 * @return - 0 if the immediate was successfully consumed; nonzero
1190 * otherwise.
1191 */
1192static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1193 uint8_t imm8;
1194 uint16_t imm16;
1195 uint32_t imm32;
1196 uint64_t imm64;
1197
Nuno Lopes392bbd92009-12-19 12:07:00 +00001198 dbgprintf(insn, "readImmediate()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001199
Sean Callanana144c3f2010-04-02 21:23:51 +00001200 if (insn->numImmediatesConsumed == 2) {
1201 debug("Already consumed two immediates");
1202 return -1;
1203 }
Sean Callanan8ed9f512009-12-19 02:59:52 +00001204
1205 if (size == 0)
1206 size = insn->immediateSize;
1207 else
1208 insn->immediateSize = size;
1209
1210 switch (size) {
1211 case 1:
1212 if (consumeByte(insn, &imm8))
1213 return -1;
1214 insn->immediates[insn->numImmediatesConsumed] = imm8;
1215 break;
1216 case 2:
1217 if (consumeUInt16(insn, &imm16))
1218 return -1;
1219 insn->immediates[insn->numImmediatesConsumed] = imm16;
1220 break;
1221 case 4:
1222 if (consumeUInt32(insn, &imm32))
1223 return -1;
1224 insn->immediates[insn->numImmediatesConsumed] = imm32;
1225 break;
1226 case 8:
1227 if (consumeUInt64(insn, &imm64))
1228 return -1;
1229 insn->immediates[insn->numImmediatesConsumed] = imm64;
1230 break;
1231 }
1232
1233 insn->numImmediatesConsumed++;
1234
1235 return 0;
1236}
1237
1238/*
1239 * readOperands - Consults the specifier for an instruction and consumes all
1240 * operands for that instruction, interpreting them as it goes.
1241 *
1242 * @param insn - The instruction whose operands are to be read and interpreted.
1243 * @return - 0 if all operands could be read; nonzero otherwise.
1244 */
1245static int readOperands(struct InternalInstruction* insn) {
1246 int index;
1247
Nuno Lopes392bbd92009-12-19 12:07:00 +00001248 dbgprintf(insn, "readOperands()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001249
1250 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1251 switch (insn->spec->operands[index].encoding) {
1252 case ENCODING_NONE:
1253 break;
1254 case ENCODING_REG:
1255 case ENCODING_RM:
1256 if (readModRM(insn))
1257 return -1;
1258 if (fixupReg(insn, &insn->spec->operands[index]))
1259 return -1;
1260 break;
1261 case ENCODING_CB:
1262 case ENCODING_CW:
1263 case ENCODING_CD:
1264 case ENCODING_CP:
1265 case ENCODING_CO:
1266 case ENCODING_CT:
Nuno Lopes392bbd92009-12-19 12:07:00 +00001267 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001268 return -1;
1269 case ENCODING_IB:
1270 if (readImmediate(insn, 1))
1271 return -1;
Sean Callanan5edca812010-04-07 21:42:19 +00001272 if (insn->spec->operands[index].type == TYPE_IMM3 &&
1273 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1274 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001275 break;
1276 case ENCODING_IW:
1277 if (readImmediate(insn, 2))
1278 return -1;
1279 break;
1280 case ENCODING_ID:
1281 if (readImmediate(insn, 4))
1282 return -1;
1283 break;
1284 case ENCODING_IO:
1285 if (readImmediate(insn, 8))
1286 return -1;
1287 break;
1288 case ENCODING_Iv:
Sean Callanana144c3f2010-04-02 21:23:51 +00001289 if (readImmediate(insn, insn->immediateSize))
1290 return -1;
Chris Lattneraef1fea2010-04-16 21:15:15 +00001291 break;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001292 case ENCODING_Ia:
Sean Callanana144c3f2010-04-02 21:23:51 +00001293 if (readImmediate(insn, insn->addressSize))
1294 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001295 break;
1296 case ENCODING_RB:
Sean Callanana144c3f2010-04-02 21:23:51 +00001297 if (readOpcodeRegister(insn, 1))
1298 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001299 break;
1300 case ENCODING_RW:
Sean Callanana144c3f2010-04-02 21:23:51 +00001301 if (readOpcodeRegister(insn, 2))
1302 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001303 break;
1304 case ENCODING_RD:
Sean Callanana144c3f2010-04-02 21:23:51 +00001305 if (readOpcodeRegister(insn, 4))
1306 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001307 break;
1308 case ENCODING_RO:
Sean Callanana144c3f2010-04-02 21:23:51 +00001309 if (readOpcodeRegister(insn, 8))
1310 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001311 break;
1312 case ENCODING_Rv:
Sean Callanana144c3f2010-04-02 21:23:51 +00001313 if (readOpcodeRegister(insn, 0))
1314 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001315 break;
1316 case ENCODING_I:
Sean Callanana144c3f2010-04-02 21:23:51 +00001317 if (readOpcodeModifier(insn))
1318 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001319 case ENCODING_DUP:
1320 break;
1321 default:
Nuno Lopes392bbd92009-12-19 12:07:00 +00001322 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001323 return -1;
1324 }
1325 }
1326
1327 return 0;
1328}
1329
1330/*
1331 * decodeInstruction - Reads and interprets a full instruction provided by the
1332 * user.
1333 *
1334 * @param insn - A pointer to the instruction to be populated. Must be
1335 * pre-allocated.
1336 * @param reader - The function to be used to read the instruction's bytes.
1337 * @param readerArg - A generic argument to be passed to the reader to store
1338 * any internal state.
1339 * @param logger - If non-NULL, the function to be used to write log messages
1340 * and warnings.
1341 * @param loggerArg - A generic argument to be passed to the logger to store
1342 * any internal state.
1343 * @param startLoc - The address (in the reader's address space) of the first
1344 * byte in the instruction.
1345 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1346 * decode the instruction in.
1347 * @return - 0 if the instruction's memory could be read; nonzero if
1348 * not.
1349 */
1350int decodeInstruction(struct InternalInstruction* insn,
1351 byteReader_t reader,
1352 void* readerArg,
1353 dlog_t logger,
1354 void* loggerArg,
1355 uint64_t startLoc,
1356 DisassemblerMode mode) {
Daniel Dunbar71f842d2009-12-19 03:31:50 +00001357 memset(insn, 0, sizeof(struct InternalInstruction));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001358
1359 insn->reader = reader;
1360 insn->readerArg = readerArg;
1361 insn->dlog = logger;
1362 insn->dlogArg = loggerArg;
1363 insn->startLocation = startLoc;
1364 insn->readerCursor = startLoc;
1365 insn->mode = mode;
1366 insn->numImmediatesConsumed = 0;
1367
1368 if (readPrefixes(insn) ||
1369 readOpcode(insn) ||
1370 getID(insn) ||
1371 insn->instructionID == 0 ||
1372 readOperands(insn))
1373 return -1;
1374
1375 insn->length = insn->readerCursor - insn->startLocation;
1376
Benjamin Kramer7c97ed72010-03-18 12:18:36 +00001377 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1378 startLoc, insn->readerCursor, insn->length);
Sean Callanan8ed9f512009-12-19 02:59:52 +00001379
1380 if (insn->length > 15)
Nuno Lopes392bbd92009-12-19 12:07:00 +00001381 dbgprintf(insn, "Instruction exceeds 15-byte limit");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001382
1383 return 0;
1384}