blob: 0b7d3013012322ba052cbd9bd002d14ad0f08fb1 [file] [log] [blame]
Sean Callanan8ed9f512009-12-19 02:59:52 +00001/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the implementation of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
Sean Callanan8ed9f512009-12-19 02:59:52 +000016#include <stdarg.h> /* for va_*() */
17#include <stdio.h> /* for vsnprintf() */
18#include <stdlib.h> /* for exit() */
Daniel Dunbar71f842d2009-12-19 03:31:50 +000019#include <string.h> /* for memset() */
Sean Callanan8ed9f512009-12-19 02:59:52 +000020
21#include "X86DisassemblerDecoder.h"
22
23#include "X86GenDisassemblerTables.inc"
24
25#define TRUE 1
26#define FALSE 0
27
Sean Callanana144c3f2010-04-02 21:23:51 +000028typedef int8_t bool;
29
Sean Callanana144c3f2010-04-02 21:23:51 +000030#ifndef NDEBUG
31#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
32#else
33#define debug(s) do { } while (0)
34#endif
35
Sean Callanan8ed9f512009-12-19 02:59:52 +000036
37/*
38 * contextForAttrs - Client for the instruction context table. Takes a set of
39 * attributes and returns the appropriate decode context.
40 *
41 * @param attrMask - Attributes, from the enumeration attributeBits.
42 * @return - The InstructionContext to use when looking up an
43 * an instruction with these attributes.
44 */
Sean Callanan542eabc2009-12-22 22:51:40 +000045static InstructionContext contextForAttrs(uint8_t attrMask) {
Sean Callanan8ed9f512009-12-19 02:59:52 +000046 return CONTEXTS_SYM[attrMask];
47}
48
49/*
50 * modRMRequired - Reads the appropriate instruction table to determine whether
51 * the ModR/M byte is required to decode a particular instruction.
52 *
53 * @param type - The opcode type (i.e., how many bytes it has).
54 * @param insnContext - The context for the instruction, as returned by
55 * contextForAttrs.
56 * @param opcode - The last byte of the instruction's opcode, not counting
57 * ModR/M extensions and escapes.
58 * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
59 */
Sean Callanan542eabc2009-12-22 22:51:40 +000060static int modRMRequired(OpcodeType type,
Sean Callanan8ed9f512009-12-19 02:59:52 +000061 InstructionContext insnContext,
62 uint8_t opcode) {
Daniel Dunbarbaf2e352009-12-22 01:41:37 +000063 const struct ContextDecision* decision = 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +000064
65 switch (type) {
66 case ONEBYTE:
67 decision = &ONEBYTE_SYM;
68 break;
69 case TWOBYTE:
70 decision = &TWOBYTE_SYM;
71 break;
72 case THREEBYTE_38:
73 decision = &THREEBYTE38_SYM;
74 break;
75 case THREEBYTE_3A:
76 decision = &THREEBYTE3A_SYM;
77 break;
78 }
79
80 return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
81 modrm_type != MODRM_ONEENTRY;
82
Sean Callanan8ed9f512009-12-19 02:59:52 +000083 return 0;
84}
85
86/*
87 * decode - Reads the appropriate instruction table to obtain the unique ID of
88 * an instruction.
89 *
90 * @param type - See modRMRequired().
91 * @param insnContext - See modRMRequired().
92 * @param opcode - See modRMRequired().
93 * @param modRM - The ModR/M byte if required, or any value if not.
Sean Callanana144c3f2010-04-02 21:23:51 +000094 * @return - The UID of the instruction, or 0 on failure.
Sean Callanan8ed9f512009-12-19 02:59:52 +000095 */
Sean Callanan542eabc2009-12-22 22:51:40 +000096static InstrUID decode(OpcodeType type,
Sean Callanana144c3f2010-04-02 21:23:51 +000097 InstructionContext insnContext,
98 uint8_t opcode,
99 uint8_t modRM) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000100 struct ModRMDecision* dec;
101
102 switch (type) {
103 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000104 debug("Unknown opcode type");
105 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000106 case ONEBYTE:
107 dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
108 break;
109 case TWOBYTE:
110 dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
111 break;
112 case THREEBYTE_38:
113 dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
114 break;
115 case THREEBYTE_3A:
116 dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
117 break;
118 }
119
120 switch (dec->modrm_type) {
121 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000122 debug("Corrupt table! Unknown modrm_type");
123 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000124 case MODRM_ONEENTRY:
125 return dec->instructionIDs[0];
126 case MODRM_SPLITRM:
127 if (modFromModRM(modRM) == 0x3)
128 return dec->instructionIDs[1];
129 else
130 return dec->instructionIDs[0];
131 case MODRM_FULL:
132 return dec->instructionIDs[modRM];
133 }
Sean Callanan8ed9f512009-12-19 02:59:52 +0000134}
135
136/*
137 * specifierForUID - Given a UID, returns the name and operand specification for
138 * that instruction.
139 *
140 * @param uid - The unique ID for the instruction. This should be returned by
141 * decode(); specifierForUID will not check bounds.
142 * @return - A pointer to the specification for that instruction.
143 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000144static struct InstructionSpecifier* specifierForUID(InstrUID uid) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000145 return &INSTRUCTIONS_SYM[uid];
146}
147
148/*
149 * consumeByte - Uses the reader function provided by the user to consume one
150 * byte from the instruction's memory and advance the cursor.
151 *
152 * @param insn - The instruction with the reader function to use. The cursor
153 * for this instruction is advanced.
154 * @param byte - A pointer to a pre-allocated memory buffer to be populated
155 * with the data read.
156 * @return - 0 if the read was successful; nonzero otherwise.
157 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000158static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000159 int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
160
161 if (!ret)
162 ++(insn->readerCursor);
163
164 return ret;
165}
166
167/*
168 * lookAtByte - Like consumeByte, but does not advance the cursor.
169 *
170 * @param insn - See consumeByte().
171 * @param byte - See consumeByte().
172 * @return - See consumeByte().
173 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000174static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000175 return insn->reader(insn->readerArg, byte, insn->readerCursor);
176}
177
Sean Callanan542eabc2009-12-22 22:51:40 +0000178static void unconsumeByte(struct InternalInstruction* insn) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000179 insn->readerCursor--;
180}
181
Sean Callanan542eabc2009-12-22 22:51:40 +0000182#define CONSUME_FUNC(name, type) \
183 static int name(struct InternalInstruction* insn, type* ptr) { \
184 type combined = 0; \
185 unsigned offset; \
186 for (offset = 0; offset < sizeof(type); ++offset) { \
187 uint8_t byte; \
188 int ret = insn->reader(insn->readerArg, \
189 &byte, \
190 insn->readerCursor + offset); \
191 if (ret) \
192 return ret; \
193 combined = combined | ((type)byte << ((type)offset * 8)); \
194 } \
195 *ptr = combined; \
196 insn->readerCursor += sizeof(type); \
197 return 0; \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000198 }
199
200/*
201 * consume* - Use the reader function provided by the user to consume data
202 * values of various sizes from the instruction's memory and advance the
203 * cursor appropriately. These readers perform endian conversion.
204 *
205 * @param insn - See consumeByte().
206 * @param ptr - A pointer to a pre-allocated memory of appropriate size to
207 * be populated with the data read.
208 * @return - See consumeByte().
209 */
210CONSUME_FUNC(consumeInt8, int8_t)
211CONSUME_FUNC(consumeInt16, int16_t)
212CONSUME_FUNC(consumeInt32, int32_t)
213CONSUME_FUNC(consumeUInt16, uint16_t)
214CONSUME_FUNC(consumeUInt32, uint32_t)
215CONSUME_FUNC(consumeUInt64, uint64_t)
216
217/*
Nuno Lopes392bbd92009-12-19 12:07:00 +0000218 * dbgprintf - Uses the logging function provided by the user to log a single
Sean Callanan8ed9f512009-12-19 02:59:52 +0000219 * message, typically without a carriage-return.
220 *
221 * @param insn - The instruction containing the logging function.
222 * @param format - See printf().
223 * @param ... - See printf().
224 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000225static void dbgprintf(struct InternalInstruction* insn,
226 const char* format,
227 ...) {
Sean Callanan8ed9f512009-12-19 02:59:52 +0000228 char buffer[256];
229 va_list ap;
230
231 if (!insn->dlog)
232 return;
233
234 va_start(ap, format);
235 (void)vsnprintf(buffer, sizeof(buffer), format, ap);
236 va_end(ap);
237
238 insn->dlog(insn->dlogArg, buffer);
239
240 return;
241}
242
243/*
244 * setPrefixPresent - Marks that a particular prefix is present at a particular
245 * location.
246 *
247 * @param insn - The instruction to be marked as having the prefix.
248 * @param prefix - The prefix that is present.
249 * @param location - The location where the prefix is located (in the address
250 * space of the instruction's reader).
251 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000252static void setPrefixPresent(struct InternalInstruction* insn,
Sean Callanan8ed9f512009-12-19 02:59:52 +0000253 uint8_t prefix,
254 uint64_t location)
255{
256 insn->prefixPresent[prefix] = 1;
257 insn->prefixLocations[prefix] = location;
258}
259
260/*
261 * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
262 * present at a given location.
263 *
264 * @param insn - The instruction to be queried.
265 * @param prefix - The prefix.
266 * @param location - The location to query.
267 * @return - Whether the prefix is at that location.
268 */
Sean Callanan542eabc2009-12-22 22:51:40 +0000269static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
270 uint8_t prefix,
271 uint64_t location)
Sean Callanan8ed9f512009-12-19 02:59:52 +0000272{
273 if (insn->prefixPresent[prefix] == 1 &&
274 insn->prefixLocations[prefix] == location)
275 return TRUE;
276 else
277 return FALSE;
278}
279
280/*
281 * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
282 * instruction as having them. Also sets the instruction's default operand,
283 * address, and other relevant data sizes to report operands correctly.
284 *
285 * @param insn - The instruction whose prefixes are to be read.
286 * @return - 0 if the instruction could be read until the end of the prefix
287 * bytes, and no prefixes conflicted; nonzero otherwise.
288 */
289static int readPrefixes(struct InternalInstruction* insn) {
290 BOOL isPrefix = TRUE;
291 BOOL prefixGroups[4] = { FALSE };
292 uint64_t prefixLocation;
293 uint8_t byte;
294
295 BOOL hasAdSize = FALSE;
296 BOOL hasOpSize = FALSE;
297
Nuno Lopes392bbd92009-12-19 12:07:00 +0000298 dbgprintf(insn, "readPrefixes()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000299
300 while (isPrefix) {
301 prefixLocation = insn->readerCursor;
302
303 if (consumeByte(insn, &byte))
304 return -1;
305
306 switch (byte) {
307 case 0xf0: /* LOCK */
308 case 0xf2: /* REPNE/REPNZ */
309 case 0xf3: /* REP or REPE/REPZ */
310 if (prefixGroups[0])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000311 dbgprintf(insn, "Redundant Group 1 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000312 prefixGroups[0] = TRUE;
313 setPrefixPresent(insn, byte, prefixLocation);
314 break;
315 case 0x2e: /* CS segment override -OR- Branch not taken */
316 case 0x36: /* SS segment override -OR- Branch taken */
317 case 0x3e: /* DS segment override */
318 case 0x26: /* ES segment override */
319 case 0x64: /* FS segment override */
320 case 0x65: /* GS segment override */
321 switch (byte) {
322 case 0x2e:
323 insn->segmentOverride = SEG_OVERRIDE_CS;
324 break;
325 case 0x36:
326 insn->segmentOverride = SEG_OVERRIDE_SS;
327 break;
328 case 0x3e:
329 insn->segmentOverride = SEG_OVERRIDE_DS;
330 break;
331 case 0x26:
332 insn->segmentOverride = SEG_OVERRIDE_ES;
333 break;
334 case 0x64:
335 insn->segmentOverride = SEG_OVERRIDE_FS;
336 break;
337 case 0x65:
338 insn->segmentOverride = SEG_OVERRIDE_GS;
339 break;
340 default:
Sean Callanana144c3f2010-04-02 21:23:51 +0000341 debug("Unhandled override");
342 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000343 }
344 if (prefixGroups[1])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000345 dbgprintf(insn, "Redundant Group 2 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000346 prefixGroups[1] = TRUE;
347 setPrefixPresent(insn, byte, prefixLocation);
348 break;
349 case 0x66: /* Operand-size override */
350 if (prefixGroups[2])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000351 dbgprintf(insn, "Redundant Group 3 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000352 prefixGroups[2] = TRUE;
353 hasOpSize = TRUE;
354 setPrefixPresent(insn, byte, prefixLocation);
355 break;
356 case 0x67: /* Address-size override */
357 if (prefixGroups[3])
Nuno Lopes392bbd92009-12-19 12:07:00 +0000358 dbgprintf(insn, "Redundant Group 4 prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000359 prefixGroups[3] = TRUE;
360 hasAdSize = TRUE;
361 setPrefixPresent(insn, byte, prefixLocation);
362 break;
363 default: /* Not a prefix byte */
364 isPrefix = FALSE;
365 break;
366 }
367
368 if (isPrefix)
Nuno Lopes392bbd92009-12-19 12:07:00 +0000369 dbgprintf(insn, "Found prefix 0x%hhx", byte);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000370 }
371
372 if (insn->mode == MODE_64BIT) {
373 if ((byte & 0xf0) == 0x40) {
374 uint8_t opcodeByte;
375
Sean Callanana144c3f2010-04-02 21:23:51 +0000376 if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000377 dbgprintf(insn, "Redundant REX prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000378 return -1;
379 }
380
381 insn->rexPrefix = byte;
382 insn->necessaryPrefixLocation = insn->readerCursor - 2;
383
Nuno Lopes392bbd92009-12-19 12:07:00 +0000384 dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000385 } else {
386 unconsumeByte(insn);
387 insn->necessaryPrefixLocation = insn->readerCursor - 1;
388 }
389 } else {
390 unconsumeByte(insn);
391 }
392
393 if (insn->mode == MODE_16BIT) {
394 insn->registerSize = (hasOpSize ? 4 : 2);
395 insn->addressSize = (hasAdSize ? 4 : 2);
396 insn->displacementSize = (hasAdSize ? 4 : 2);
397 insn->immediateSize = (hasOpSize ? 4 : 2);
398 } else if (insn->mode == MODE_32BIT) {
399 insn->registerSize = (hasOpSize ? 2 : 4);
400 insn->addressSize = (hasAdSize ? 2 : 4);
401 insn->displacementSize = (hasAdSize ? 2 : 4);
Sean Callanan751752e2010-10-22 01:24:11 +0000402 insn->immediateSize = (hasOpSize ? 2 : 4);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000403 } else if (insn->mode == MODE_64BIT) {
404 if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
405 insn->registerSize = 8;
406 insn->addressSize = (hasAdSize ? 4 : 8);
407 insn->displacementSize = 4;
408 insn->immediateSize = 4;
409 } else if (insn->rexPrefix) {
410 insn->registerSize = (hasOpSize ? 2 : 4);
411 insn->addressSize = (hasAdSize ? 4 : 8);
412 insn->displacementSize = (hasOpSize ? 2 : 4);
413 insn->immediateSize = (hasOpSize ? 2 : 4);
414 } else {
415 insn->registerSize = (hasOpSize ? 2 : 4);
416 insn->addressSize = (hasAdSize ? 4 : 8);
417 insn->displacementSize = (hasOpSize ? 2 : 4);
418 insn->immediateSize = (hasOpSize ? 2 : 4);
419 }
420 }
421
422 return 0;
423}
424
425/*
426 * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
427 * extended or escape opcodes).
428 *
429 * @param insn - The instruction whose opcode is to be read.
430 * @return - 0 if the opcode could be read successfully; nonzero otherwise.
431 */
432static int readOpcode(struct InternalInstruction* insn) {
433 /* Determine the length of the primary opcode */
434
435 uint8_t current;
436
Nuno Lopes392bbd92009-12-19 12:07:00 +0000437 dbgprintf(insn, "readOpcode()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000438
439 insn->opcodeType = ONEBYTE;
440 if (consumeByte(insn, &current))
441 return -1;
442
443 if (current == 0x0f) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000444 dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000445
446 insn->twoByteEscape = current;
447
448 if (consumeByte(insn, &current))
449 return -1;
450
451 if (current == 0x38) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000452 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000453
454 insn->threeByteEscape = current;
455
456 if (consumeByte(insn, &current))
457 return -1;
458
459 insn->opcodeType = THREEBYTE_38;
460 } else if (current == 0x3a) {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000461 dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
Sean Callanan8ed9f512009-12-19 02:59:52 +0000462
463 insn->threeByteEscape = current;
464
465 if (consumeByte(insn, &current))
466 return -1;
467
468 insn->opcodeType = THREEBYTE_3A;
469 } else {
Nuno Lopes392bbd92009-12-19 12:07:00 +0000470 dbgprintf(insn, "Didn't find a three-byte escape prefix");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000471
472 insn->opcodeType = TWOBYTE;
473 }
474 }
475
476 /*
477 * At this point we have consumed the full opcode.
478 * Anything we consume from here on must be unconsumed.
479 */
480
481 insn->opcode = current;
482
483 return 0;
484}
485
486static int readModRM(struct InternalInstruction* insn);
487
488/*
489 * getIDWithAttrMask - Determines the ID of an instruction, consuming
490 * the ModR/M byte as appropriate for extended and escape opcodes,
491 * and using a supplied attribute mask.
492 *
493 * @param instructionID - A pointer whose target is filled in with the ID of the
494 * instruction.
495 * @param insn - The instruction whose ID is to be determined.
496 * @param attrMask - The attribute mask to search.
497 * @return - 0 if the ModR/M could be read when needed or was not
498 * needed; nonzero otherwise.
499 */
500static int getIDWithAttrMask(uint16_t* instructionID,
501 struct InternalInstruction* insn,
502 uint8_t attrMask) {
503 BOOL hasModRMExtension;
504
505 uint8_t instructionClass;
506
507 instructionClass = contextForAttrs(attrMask);
508
509 hasModRMExtension = modRMRequired(insn->opcodeType,
510 instructionClass,
511 insn->opcode);
512
513 if (hasModRMExtension) {
514 readModRM(insn);
515
516 *instructionID = decode(insn->opcodeType,
517 instructionClass,
518 insn->opcode,
519 insn->modRM);
520 } else {
521 *instructionID = decode(insn->opcodeType,
522 instructionClass,
523 insn->opcode,
524 0);
525 }
526
527 return 0;
528}
529
530/*
531 * is16BitEquivalent - Determines whether two instruction names refer to
532 * equivalent instructions but one is 16-bit whereas the other is not.
533 *
534 * @param orig - The instruction that is not 16-bit
535 * @param equiv - The instruction that is 16-bit
536 */
537static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
538 off_t i;
539
Sean Callanana144c3f2010-04-02 21:23:51 +0000540 for (i = 0;; i++) {
541 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000542 return TRUE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000543 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000544 return FALSE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000545 if (orig[i] != equiv[i]) {
546 if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000547 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000548 if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000549 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000550 if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000551 continue;
552 return FALSE;
553 }
554 }
555}
556
557/*
558 * is64BitEquivalent - Determines whether two instruction names refer to
559 * equivalent instructions but one is 64-bit whereas the other is not.
560 *
561 * @param orig - The instruction that is not 64-bit
562 * @param equiv - The instruction that is 64-bit
563 */
564static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
565 off_t i;
566
Sean Callanana144c3f2010-04-02 21:23:51 +0000567 for (i = 0;; i++) {
568 if (orig[i] == '\0' && equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000569 return TRUE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000570 if (orig[i] == '\0' || equiv[i] == '\0')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000571 return FALSE;
Sean Callanana144c3f2010-04-02 21:23:51 +0000572 if (orig[i] != equiv[i]) {
573 if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000574 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000575 if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000576 continue;
Sean Callanana144c3f2010-04-02 21:23:51 +0000577 if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
Sean Callanan8ed9f512009-12-19 02:59:52 +0000578 continue;
579 return FALSE;
580 }
581 }
582}
583
584
585/*
586 * getID - Determines the ID of an instruction, consuming the ModR/M byte as
587 * appropriate for extended and escape opcodes. Determines the attributes and
588 * context for the instruction before doing so.
589 *
590 * @param insn - The instruction whose ID is to be determined.
591 * @return - 0 if the ModR/M could be read when needed or was not needed;
592 * nonzero otherwise.
593 */
594static int getID(struct InternalInstruction* insn) {
595 uint8_t attrMask;
596 uint16_t instructionID;
597
Nuno Lopes392bbd92009-12-19 12:07:00 +0000598 dbgprintf(insn, "getID()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000599
600 attrMask = ATTR_NONE;
601
602 if (insn->mode == MODE_64BIT)
603 attrMask |= ATTR_64BIT;
604
605 if (insn->rexPrefix & 0x08)
606 attrMask |= ATTR_REXW;
607
608 if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
609 attrMask |= ATTR_OPSIZE;
610 else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
611 attrMask |= ATTR_XS;
612 else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
613 attrMask |= ATTR_XD;
614
Sean Callanana144c3f2010-04-02 21:23:51 +0000615 if (getIDWithAttrMask(&instructionID, insn, attrMask))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000616 return -1;
617
618 /* The following clauses compensate for limitations of the tables. */
619
620 if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
621 /*
622 * Although for SSE instructions it is usually necessary to treat REX.W+F2
623 * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
624 * an occasional instruction where F2 is incidental and REX.W is the more
625 * significant. If the decoded instruction is 32-bit and adding REX.W
626 * instead of F2 changes a 32 to a 64, we adopt the new encoding.
627 */
628
629 struct InstructionSpecifier* spec;
630 uint16_t instructionIDWithREXw;
631 struct InstructionSpecifier* specWithREXw;
632
633 spec = specifierForUID(instructionID);
634
635 if (getIDWithAttrMask(&instructionIDWithREXw,
636 insn,
637 attrMask & (~ATTR_XD))) {
638 /*
639 * Decoding with REX.w would yield nothing; give up and return original
640 * decode.
641 */
642
643 insn->instructionID = instructionID;
644 insn->spec = spec;
645 return 0;
646 }
647
648 specWithREXw = specifierForUID(instructionIDWithREXw);
649
650 if (is64BitEquivalent(spec->name, specWithREXw->name)) {
651 insn->instructionID = instructionIDWithREXw;
652 insn->spec = specWithREXw;
653 } else {
654 insn->instructionID = instructionID;
655 insn->spec = spec;
656 }
657 return 0;
658 }
659
660 if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
661 /*
662 * The instruction tables make no distinction between instructions that
663 * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
664 * particular spot (i.e., many MMX operations). In general we're
665 * conservative, but in the specific case where OpSize is present but not
666 * in the right place we check if there's a 16-bit operation.
667 */
668
669 struct InstructionSpecifier* spec;
670 uint16_t instructionIDWithOpsize;
671 struct InstructionSpecifier* specWithOpsize;
672
673 spec = specifierForUID(instructionID);
674
675 if (getIDWithAttrMask(&instructionIDWithOpsize,
676 insn,
677 attrMask | ATTR_OPSIZE)) {
678 /*
679 * ModRM required with OpSize but not present; give up and return version
680 * without OpSize set
681 */
682
683 insn->instructionID = instructionID;
684 insn->spec = spec;
685 return 0;
686 }
687
688 specWithOpsize = specifierForUID(instructionIDWithOpsize);
689
690 if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
691 insn->instructionID = instructionIDWithOpsize;
692 insn->spec = specWithOpsize;
693 } else {
694 insn->instructionID = instructionID;
695 insn->spec = spec;
696 }
697 return 0;
698 }
699
700 insn->instructionID = instructionID;
701 insn->spec = specifierForUID(insn->instructionID);
702
703 return 0;
704}
705
706/*
707 * readSIB - Consumes the SIB byte to determine addressing information for an
708 * instruction.
709 *
710 * @param insn - The instruction whose SIB byte is to be read.
711 * @return - 0 if the SIB byte was successfully read; nonzero otherwise.
712 */
713static int readSIB(struct InternalInstruction* insn) {
Daniel Dunbarbaf2e352009-12-22 01:41:37 +0000714 SIBIndex sibIndexBase = 0;
715 SIBBase sibBaseBase = 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000716 uint8_t index, base;
717
Nuno Lopes392bbd92009-12-19 12:07:00 +0000718 dbgprintf(insn, "readSIB()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000719
720 if (insn->consumedSIB)
721 return 0;
722
723 insn->consumedSIB = TRUE;
724
725 switch (insn->addressSize) {
726 case 2:
Nuno Lopes392bbd92009-12-19 12:07:00 +0000727 dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000728 return -1;
729 break;
730 case 4:
731 sibIndexBase = SIB_INDEX_EAX;
732 sibBaseBase = SIB_BASE_EAX;
733 break;
734 case 8:
735 sibIndexBase = SIB_INDEX_RAX;
736 sibBaseBase = SIB_BASE_RAX;
737 break;
738 }
739
740 if (consumeByte(insn, &insn->sib))
741 return -1;
742
743 index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
744
745 switch (index) {
746 case 0x4:
747 insn->sibIndex = SIB_INDEX_NONE;
748 break;
749 default:
750 insn->sibIndex = (EABase)(sibIndexBase + index);
751 if (insn->sibIndex == SIB_INDEX_sib ||
752 insn->sibIndex == SIB_INDEX_sib64)
753 insn->sibIndex = SIB_INDEX_NONE;
754 break;
755 }
756
757 switch (scaleFromSIB(insn->sib)) {
758 case 0:
759 insn->sibScale = 1;
760 break;
761 case 1:
762 insn->sibScale = 2;
763 break;
764 case 2:
765 insn->sibScale = 4;
766 break;
767 case 3:
768 insn->sibScale = 8;
769 break;
770 }
771
772 base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
773
774 switch (base) {
775 case 0x5:
776 switch (modFromModRM(insn->modRM)) {
777 case 0x0:
778 insn->eaDisplacement = EA_DISP_32;
779 insn->sibBase = SIB_BASE_NONE;
780 break;
781 case 0x1:
782 insn->eaDisplacement = EA_DISP_8;
783 insn->sibBase = (insn->addressSize == 4 ?
784 SIB_BASE_EBP : SIB_BASE_RBP);
785 break;
786 case 0x2:
787 insn->eaDisplacement = EA_DISP_32;
788 insn->sibBase = (insn->addressSize == 4 ?
789 SIB_BASE_EBP : SIB_BASE_RBP);
790 break;
791 case 0x3:
Sean Callanana144c3f2010-04-02 21:23:51 +0000792 debug("Cannot have Mod = 0b11 and a SIB byte");
793 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000794 }
795 break;
796 default:
797 insn->sibBase = (EABase)(sibBaseBase + base);
798 break;
799 }
800
801 return 0;
802}
803
804/*
805 * readDisplacement - Consumes the displacement of an instruction.
806 *
807 * @param insn - The instruction whose displacement is to be read.
808 * @return - 0 if the displacement byte was successfully read; nonzero
809 * otherwise.
810 */
811static int readDisplacement(struct InternalInstruction* insn) {
812 int8_t d8;
813 int16_t d16;
814 int32_t d32;
815
Nuno Lopes392bbd92009-12-19 12:07:00 +0000816 dbgprintf(insn, "readDisplacement()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000817
818 if (insn->consumedDisplacement)
819 return 0;
820
821 insn->consumedDisplacement = TRUE;
822
823 switch (insn->eaDisplacement) {
824 case EA_DISP_NONE:
825 insn->consumedDisplacement = FALSE;
826 break;
827 case EA_DISP_8:
828 if (consumeInt8(insn, &d8))
829 return -1;
830 insn->displacement = d8;
831 break;
832 case EA_DISP_16:
833 if (consumeInt16(insn, &d16))
834 return -1;
835 insn->displacement = d16;
836 break;
837 case EA_DISP_32:
838 if (consumeInt32(insn, &d32))
839 return -1;
840 insn->displacement = d32;
841 break;
842 }
843
844 insn->consumedDisplacement = TRUE;
845 return 0;
846}
847
848/*
849 * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
850 * displacement) for an instruction and interprets it.
851 *
852 * @param insn - The instruction whose addressing information is to be read.
853 * @return - 0 if the information was successfully read; nonzero otherwise.
854 */
855static int readModRM(struct InternalInstruction* insn) {
856 uint8_t mod, rm, reg;
857
Nuno Lopes392bbd92009-12-19 12:07:00 +0000858 dbgprintf(insn, "readModRM()");
Sean Callanan8ed9f512009-12-19 02:59:52 +0000859
860 if (insn->consumedModRM)
861 return 0;
862
863 consumeByte(insn, &insn->modRM);
864 insn->consumedModRM = TRUE;
865
866 mod = modFromModRM(insn->modRM);
867 rm = rmFromModRM(insn->modRM);
868 reg = regFromModRM(insn->modRM);
869
870 /*
871 * This goes by insn->registerSize to pick the correct register, which messes
872 * up if we're using (say) XMM or 8-bit register operands. That gets fixed in
873 * fixupReg().
874 */
875 switch (insn->registerSize) {
876 case 2:
Sean Callanan06b766d2009-12-22 02:07:42 +0000877 insn->regBase = MODRM_REG_AX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000878 insn->eaRegBase = EA_REG_AX;
879 break;
880 case 4:
Sean Callanan06b766d2009-12-22 02:07:42 +0000881 insn->regBase = MODRM_REG_EAX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000882 insn->eaRegBase = EA_REG_EAX;
883 break;
884 case 8:
Sean Callanan06b766d2009-12-22 02:07:42 +0000885 insn->regBase = MODRM_REG_RAX;
Sean Callanan8ed9f512009-12-19 02:59:52 +0000886 insn->eaRegBase = EA_REG_RAX;
887 break;
888 }
889
890 reg |= rFromREX(insn->rexPrefix) << 3;
891 rm |= bFromREX(insn->rexPrefix) << 3;
892
893 insn->reg = (Reg)(insn->regBase + reg);
894
895 switch (insn->addressSize) {
896 case 2:
897 insn->eaBaseBase = EA_BASE_BX_SI;
898
899 switch (mod) {
900 case 0x0:
901 if (rm == 0x6) {
902 insn->eaBase = EA_BASE_NONE;
903 insn->eaDisplacement = EA_DISP_16;
Sean Callanana144c3f2010-04-02 21:23:51 +0000904 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000905 return -1;
906 } else {
907 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
908 insn->eaDisplacement = EA_DISP_NONE;
909 }
910 break;
911 case 0x1:
912 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
913 insn->eaDisplacement = EA_DISP_8;
Sean Callanana144c3f2010-04-02 21:23:51 +0000914 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000915 return -1;
916 break;
917 case 0x2:
918 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
919 insn->eaDisplacement = EA_DISP_16;
Sean Callanana144c3f2010-04-02 21:23:51 +0000920 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000921 return -1;
922 break;
923 case 0x3:
924 insn->eaBase = (EABase)(insn->eaRegBase + rm);
Sean Callanana144c3f2010-04-02 21:23:51 +0000925 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000926 return -1;
927 break;
928 }
929 break;
930 case 4:
931 case 8:
932 insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
933
934 switch (mod) {
935 case 0x0:
936 insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
937 switch (rm) {
938 case 0x4:
939 case 0xc: /* in case REXW.b is set */
940 insn->eaBase = (insn->addressSize == 4 ?
941 EA_BASE_sib : EA_BASE_sib64);
942 readSIB(insn);
Sean Callanana144c3f2010-04-02 21:23:51 +0000943 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000944 return -1;
945 break;
946 case 0x5:
947 insn->eaBase = EA_BASE_NONE;
948 insn->eaDisplacement = EA_DISP_32;
Sean Callanana144c3f2010-04-02 21:23:51 +0000949 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000950 return -1;
951 break;
952 default:
953 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
954 break;
955 }
956 break;
957 case 0x1:
958 case 0x2:
959 insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
960 switch (rm) {
961 case 0x4:
962 case 0xc: /* in case REXW.b is set */
963 insn->eaBase = EA_BASE_sib;
964 readSIB(insn);
Sean Callanana144c3f2010-04-02 21:23:51 +0000965 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000966 return -1;
967 break;
968 default:
969 insn->eaBase = (EABase)(insn->eaBaseBase + rm);
Sean Callanana144c3f2010-04-02 21:23:51 +0000970 if (readDisplacement(insn))
Sean Callanan8ed9f512009-12-19 02:59:52 +0000971 return -1;
972 break;
973 }
974 break;
975 case 0x3:
976 insn->eaDisplacement = EA_DISP_NONE;
977 insn->eaBase = (EABase)(insn->eaRegBase + rm);
978 break;
979 }
980 break;
981 } /* switch (insn->addressSize) */
982
983 return 0;
984}
985
986#define GENERIC_FIXUP_FUNC(name, base, prefix) \
987 static uint8_t name(struct InternalInstruction *insn, \
988 OperandType type, \
989 uint8_t index, \
990 uint8_t *valid) { \
991 *valid = 1; \
992 switch (type) { \
993 default: \
Sean Callanana144c3f2010-04-02 21:23:51 +0000994 debug("Unhandled register type"); \
995 *valid = 0; \
996 return 0; \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000997 case TYPE_Rv: \
998 return base + index; \
999 case TYPE_R8: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001000 if (insn->rexPrefix && \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001001 index >= 4 && index <= 7) { \
1002 return prefix##_SPL + (index - 4); \
1003 } else { \
1004 return prefix##_AL + index; \
1005 } \
1006 case TYPE_R16: \
1007 return prefix##_AX + index; \
1008 case TYPE_R32: \
1009 return prefix##_EAX + index; \
1010 case TYPE_R64: \
1011 return prefix##_RAX + index; \
1012 case TYPE_XMM128: \
1013 case TYPE_XMM64: \
1014 case TYPE_XMM32: \
1015 case TYPE_XMM: \
1016 return prefix##_XMM0 + index; \
1017 case TYPE_MM64: \
1018 case TYPE_MM32: \
1019 case TYPE_MM: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001020 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001021 *valid = 0; \
1022 return prefix##_MM0 + index; \
1023 case TYPE_SEGMENTREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001024 if (index > 5) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001025 *valid = 0; \
1026 return prefix##_ES + index; \
1027 case TYPE_DEBUGREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001028 if (index > 7) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001029 *valid = 0; \
1030 return prefix##_DR0 + index; \
Sean Callanan1a8b7892010-05-06 20:59:00 +00001031 case TYPE_CONTROLREG: \
Sean Callanana144c3f2010-04-02 21:23:51 +00001032 if (index > 8) \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001033 *valid = 0; \
Sean Callanan1a8b7892010-05-06 20:59:00 +00001034 return prefix##_CR0 + index; \
Sean Callanan8ed9f512009-12-19 02:59:52 +00001035 } \
1036 }
1037
1038/*
1039 * fixup*Value - Consults an operand type to determine the meaning of the
1040 * reg or R/M field. If the operand is an XMM operand, for example, an
1041 * operand would be XMM0 instead of AX, which readModRM() would otherwise
1042 * misinterpret it as.
1043 *
1044 * @param insn - The instruction containing the operand.
1045 * @param type - The operand type.
1046 * @param index - The existing value of the field as reported by readModRM().
1047 * @param valid - The address of a uint8_t. The target is set to 1 if the
1048 * field is valid for the register class; 0 if not.
Sean Callanana144c3f2010-04-02 21:23:51 +00001049 * @return - The proper value.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001050 */
Sean Callanan06b766d2009-12-22 02:07:42 +00001051GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
Sean Callanan8ed9f512009-12-19 02:59:52 +00001052GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
1053
1054/*
1055 * fixupReg - Consults an operand specifier to determine which of the
1056 * fixup*Value functions to use in correcting readModRM()'ss interpretation.
1057 *
1058 * @param insn - See fixup*Value().
1059 * @param op - The operand specifier.
1060 * @return - 0 if fixup was successful; -1 if the register returned was
1061 * invalid for its class.
1062 */
1063static int fixupReg(struct InternalInstruction *insn,
1064 struct OperandSpecifier *op) {
1065 uint8_t valid;
1066
Nuno Lopes392bbd92009-12-19 12:07:00 +00001067 dbgprintf(insn, "fixupReg()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001068
1069 switch ((OperandEncoding)op->encoding) {
1070 default:
Sean Callanana144c3f2010-04-02 21:23:51 +00001071 debug("Expected a REG or R/M encoding in fixupReg");
1072 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001073 case ENCODING_REG:
1074 insn->reg = (Reg)fixupRegValue(insn,
1075 (OperandType)op->type,
1076 insn->reg - insn->regBase,
1077 &valid);
1078 if (!valid)
1079 return -1;
1080 break;
1081 case ENCODING_RM:
1082 if (insn->eaBase >= insn->eaRegBase) {
1083 insn->eaBase = (EABase)fixupRMValue(insn,
1084 (OperandType)op->type,
1085 insn->eaBase - insn->eaRegBase,
1086 &valid);
1087 if (!valid)
1088 return -1;
1089 }
1090 break;
1091 }
1092
1093 return 0;
1094}
1095
1096/*
1097 * readOpcodeModifier - Reads an operand from the opcode field of an
1098 * instruction. Handles AddRegFrm instructions.
1099 *
1100 * @param insn - The instruction whose opcode field is to be read.
1101 * @param inModRM - Indicates that the opcode field is to be read from the
1102 * ModR/M extension; useful for escape opcodes
Sean Callanana144c3f2010-04-02 21:23:51 +00001103 * @return - 0 on success; nonzero otherwise.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001104 */
Sean Callanana144c3f2010-04-02 21:23:51 +00001105static int readOpcodeModifier(struct InternalInstruction* insn) {
Nuno Lopes392bbd92009-12-19 12:07:00 +00001106 dbgprintf(insn, "readOpcodeModifier()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001107
1108 if (insn->consumedOpcodeModifier)
Sean Callanana144c3f2010-04-02 21:23:51 +00001109 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001110
1111 insn->consumedOpcodeModifier = TRUE;
1112
Sean Callanana144c3f2010-04-02 21:23:51 +00001113 switch (insn->spec->modifierType) {
Sean Callanan8ed9f512009-12-19 02:59:52 +00001114 default:
Sean Callanana144c3f2010-04-02 21:23:51 +00001115 debug("Unknown modifier type.");
1116 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001117 case MODIFIER_NONE:
Sean Callanana144c3f2010-04-02 21:23:51 +00001118 debug("No modifier but an operand expects one.");
1119 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001120 case MODIFIER_OPCODE:
1121 insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
Sean Callanana144c3f2010-04-02 21:23:51 +00001122 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001123 case MODIFIER_MODRM:
1124 insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
Sean Callanana144c3f2010-04-02 21:23:51 +00001125 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001126 }
1127}
1128
1129/*
1130 * readOpcodeRegister - Reads an operand from the opcode field of an
1131 * instruction and interprets it appropriately given the operand width.
1132 * Handles AddRegFrm instructions.
1133 *
1134 * @param insn - See readOpcodeModifier().
1135 * @param size - The width (in bytes) of the register being specified.
1136 * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1137 * RAX.
Sean Callanana144c3f2010-04-02 21:23:51 +00001138 * @return - 0 on success; nonzero otherwise.
Sean Callanan8ed9f512009-12-19 02:59:52 +00001139 */
Sean Callanana144c3f2010-04-02 21:23:51 +00001140static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
Nuno Lopes392bbd92009-12-19 12:07:00 +00001141 dbgprintf(insn, "readOpcodeRegister()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001142
Sean Callanana144c3f2010-04-02 21:23:51 +00001143 if (readOpcodeModifier(insn))
1144 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001145
1146 if (size == 0)
1147 size = insn->registerSize;
1148
1149 switch (size) {
1150 case 1:
Sean Callanan06b766d2009-12-22 02:07:42 +00001151 insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3)
1152 | insn->opcodeModifier));
Sean Callanana144c3f2010-04-02 21:23:51 +00001153 if (insn->rexPrefix &&
1154 insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1155 insn->opcodeRegister < MODRM_REG_AL + 0x8) {
Sean Callanan06b766d2009-12-22 02:07:42 +00001156 insn->opcodeRegister = (Reg)(MODRM_REG_SPL
1157 + (insn->opcodeRegister - MODRM_REG_AL - 4));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001158 }
1159
1160 break;
1161 case 2:
Sean Callanan06b766d2009-12-22 02:07:42 +00001162 insn->opcodeRegister = (Reg)(MODRM_REG_AX
1163 + ((bFromREX(insn->rexPrefix) << 3)
1164 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001165 break;
1166 case 4:
Sean Callanana144c3f2010-04-02 21:23:51 +00001167 insn->opcodeRegister = (Reg)(MODRM_REG_EAX
Sean Callanan06b766d2009-12-22 02:07:42 +00001168 + ((bFromREX(insn->rexPrefix) << 3)
1169 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001170 break;
1171 case 8:
Sean Callanan06b766d2009-12-22 02:07:42 +00001172 insn->opcodeRegister = (Reg)(MODRM_REG_RAX
1173 + ((bFromREX(insn->rexPrefix) << 3)
1174 | insn->opcodeModifier));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001175 break;
1176 }
Sean Callanana144c3f2010-04-02 21:23:51 +00001177
1178 return 0;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001179}
1180
1181/*
1182 * readImmediate - Consumes an immediate operand from an instruction, given the
1183 * desired operand size.
1184 *
1185 * @param insn - The instruction whose operand is to be read.
1186 * @param size - The width (in bytes) of the operand.
1187 * @return - 0 if the immediate was successfully consumed; nonzero
1188 * otherwise.
1189 */
1190static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
1191 uint8_t imm8;
1192 uint16_t imm16;
1193 uint32_t imm32;
1194 uint64_t imm64;
1195
Nuno Lopes392bbd92009-12-19 12:07:00 +00001196 dbgprintf(insn, "readImmediate()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001197
Sean Callanana144c3f2010-04-02 21:23:51 +00001198 if (insn->numImmediatesConsumed == 2) {
1199 debug("Already consumed two immediates");
1200 return -1;
1201 }
Sean Callanan8ed9f512009-12-19 02:59:52 +00001202
1203 if (size == 0)
1204 size = insn->immediateSize;
1205 else
1206 insn->immediateSize = size;
1207
1208 switch (size) {
1209 case 1:
1210 if (consumeByte(insn, &imm8))
1211 return -1;
1212 insn->immediates[insn->numImmediatesConsumed] = imm8;
1213 break;
1214 case 2:
1215 if (consumeUInt16(insn, &imm16))
1216 return -1;
1217 insn->immediates[insn->numImmediatesConsumed] = imm16;
1218 break;
1219 case 4:
1220 if (consumeUInt32(insn, &imm32))
1221 return -1;
1222 insn->immediates[insn->numImmediatesConsumed] = imm32;
1223 break;
1224 case 8:
1225 if (consumeUInt64(insn, &imm64))
1226 return -1;
1227 insn->immediates[insn->numImmediatesConsumed] = imm64;
1228 break;
1229 }
1230
1231 insn->numImmediatesConsumed++;
1232
1233 return 0;
1234}
1235
1236/*
1237 * readOperands - Consults the specifier for an instruction and consumes all
1238 * operands for that instruction, interpreting them as it goes.
1239 *
1240 * @param insn - The instruction whose operands are to be read and interpreted.
1241 * @return - 0 if all operands could be read; nonzero otherwise.
1242 */
1243static int readOperands(struct InternalInstruction* insn) {
1244 int index;
1245
Nuno Lopes392bbd92009-12-19 12:07:00 +00001246 dbgprintf(insn, "readOperands()");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001247
1248 for (index = 0; index < X86_MAX_OPERANDS; ++index) {
1249 switch (insn->spec->operands[index].encoding) {
1250 case ENCODING_NONE:
1251 break;
1252 case ENCODING_REG:
1253 case ENCODING_RM:
1254 if (readModRM(insn))
1255 return -1;
1256 if (fixupReg(insn, &insn->spec->operands[index]))
1257 return -1;
1258 break;
1259 case ENCODING_CB:
1260 case ENCODING_CW:
1261 case ENCODING_CD:
1262 case ENCODING_CP:
1263 case ENCODING_CO:
1264 case ENCODING_CT:
Nuno Lopes392bbd92009-12-19 12:07:00 +00001265 dbgprintf(insn, "We currently don't hande code-offset encodings");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001266 return -1;
1267 case ENCODING_IB:
1268 if (readImmediate(insn, 1))
1269 return -1;
Sean Callanan5edca812010-04-07 21:42:19 +00001270 if (insn->spec->operands[index].type == TYPE_IMM3 &&
1271 insn->immediates[insn->numImmediatesConsumed - 1] > 7)
1272 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001273 break;
1274 case ENCODING_IW:
1275 if (readImmediate(insn, 2))
1276 return -1;
1277 break;
1278 case ENCODING_ID:
1279 if (readImmediate(insn, 4))
1280 return -1;
1281 break;
1282 case ENCODING_IO:
1283 if (readImmediate(insn, 8))
1284 return -1;
1285 break;
1286 case ENCODING_Iv:
Sean Callanana144c3f2010-04-02 21:23:51 +00001287 if (readImmediate(insn, insn->immediateSize))
1288 return -1;
Chris Lattneraef1fea2010-04-16 21:15:15 +00001289 break;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001290 case ENCODING_Ia:
Sean Callanana144c3f2010-04-02 21:23:51 +00001291 if (readImmediate(insn, insn->addressSize))
1292 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001293 break;
1294 case ENCODING_RB:
Sean Callanana144c3f2010-04-02 21:23:51 +00001295 if (readOpcodeRegister(insn, 1))
1296 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001297 break;
1298 case ENCODING_RW:
Sean Callanana144c3f2010-04-02 21:23:51 +00001299 if (readOpcodeRegister(insn, 2))
1300 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001301 break;
1302 case ENCODING_RD:
Sean Callanana144c3f2010-04-02 21:23:51 +00001303 if (readOpcodeRegister(insn, 4))
1304 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001305 break;
1306 case ENCODING_RO:
Sean Callanana144c3f2010-04-02 21:23:51 +00001307 if (readOpcodeRegister(insn, 8))
1308 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001309 break;
1310 case ENCODING_Rv:
Sean Callanana144c3f2010-04-02 21:23:51 +00001311 if (readOpcodeRegister(insn, 0))
1312 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001313 break;
1314 case ENCODING_I:
Sean Callanana144c3f2010-04-02 21:23:51 +00001315 if (readOpcodeModifier(insn))
1316 return -1;
Sean Callanan8ed9f512009-12-19 02:59:52 +00001317 case ENCODING_DUP:
1318 break;
1319 default:
Nuno Lopes392bbd92009-12-19 12:07:00 +00001320 dbgprintf(insn, "Encountered an operand with an unknown encoding.");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001321 return -1;
1322 }
1323 }
1324
1325 return 0;
1326}
1327
1328/*
1329 * decodeInstruction - Reads and interprets a full instruction provided by the
1330 * user.
1331 *
1332 * @param insn - A pointer to the instruction to be populated. Must be
1333 * pre-allocated.
1334 * @param reader - The function to be used to read the instruction's bytes.
1335 * @param readerArg - A generic argument to be passed to the reader to store
1336 * any internal state.
1337 * @param logger - If non-NULL, the function to be used to write log messages
1338 * and warnings.
1339 * @param loggerArg - A generic argument to be passed to the logger to store
1340 * any internal state.
1341 * @param startLoc - The address (in the reader's address space) of the first
1342 * byte in the instruction.
1343 * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
1344 * decode the instruction in.
1345 * @return - 0 if the instruction's memory could be read; nonzero if
1346 * not.
1347 */
1348int decodeInstruction(struct InternalInstruction* insn,
1349 byteReader_t reader,
1350 void* readerArg,
1351 dlog_t logger,
1352 void* loggerArg,
1353 uint64_t startLoc,
1354 DisassemblerMode mode) {
Daniel Dunbar71f842d2009-12-19 03:31:50 +00001355 memset(insn, 0, sizeof(struct InternalInstruction));
Sean Callanan8ed9f512009-12-19 02:59:52 +00001356
1357 insn->reader = reader;
1358 insn->readerArg = readerArg;
1359 insn->dlog = logger;
1360 insn->dlogArg = loggerArg;
1361 insn->startLocation = startLoc;
1362 insn->readerCursor = startLoc;
1363 insn->mode = mode;
1364 insn->numImmediatesConsumed = 0;
1365
1366 if (readPrefixes(insn) ||
1367 readOpcode(insn) ||
1368 getID(insn) ||
1369 insn->instructionID == 0 ||
1370 readOperands(insn))
1371 return -1;
1372
1373 insn->length = insn->readerCursor - insn->startLocation;
1374
Benjamin Kramer7c97ed72010-03-18 12:18:36 +00001375 dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
1376 startLoc, insn->readerCursor, insn->length);
Sean Callanan8ed9f512009-12-19 02:59:52 +00001377
1378 if (insn->length > 15)
Nuno Lopes392bbd92009-12-19 12:07:00 +00001379 dbgprintf(insn, "Instruction exceeds 15-byte limit");
Sean Callanan8ed9f512009-12-19 02:59:52 +00001380
1381 return 0;
1382}