| Jia Liu | b22310f | 2012-02-18 12:03:15 +0000 | [diff] [blame] | 1 | /*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 2 | * | 
|  | 3 | *                     The LLVM Compiler Infrastructure | 
|  | 4 | * | 
|  | 5 | * This file is distributed under the University of Illinois Open Source | 
|  | 6 | * License. See LICENSE.TXT for details. | 
|  | 7 | * | 
|  | 8 | *===----------------------------------------------------------------------===* | 
|  | 9 | * | 
|  | 10 | * This file is part of the X86 Disassembler. | 
|  | 11 | * It contains the implementation of the instruction decoder. | 
|  | 12 | * Documentation for the disassembler can be found in X86Disassembler.h. | 
|  | 13 | * | 
|  | 14 | *===----------------------------------------------------------------------===*/ | 
|  | 15 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 16 | #include <stdarg.h>   /* for va_*()       */ | 
|  | 17 | #include <stdio.h>    /* for vsnprintf()  */ | 
|  | 18 | #include <stdlib.h>   /* for exit()       */ | 
| Daniel Dunbar | c745a62 | 2009-12-19 03:31:50 +0000 | [diff] [blame] | 19 | #include <string.h>   /* for memset()     */ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 20 |  | 
|  | 21 | #include "X86DisassemblerDecoder.h" | 
|  | 22 |  | 
|  | 23 | #include "X86GenDisassemblerTables.inc" | 
|  | 24 |  | 
|  | 25 | #define TRUE  1 | 
|  | 26 | #define FALSE 0 | 
|  | 27 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 28 | typedef int8_t bool; | 
|  | 29 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 30 | #ifndef NDEBUG | 
|  | 31 | #define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) | 
|  | 32 | #else | 
|  | 33 | #define debug(s) do { } while (0) | 
|  | 34 | #endif | 
|  | 35 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 36 |  | 
|  | 37 | /* | 
|  | 38 | * contextForAttrs - Client for the instruction context table.  Takes a set of | 
|  | 39 | *   attributes and returns the appropriate decode context. | 
|  | 40 | * | 
|  | 41 | * @param attrMask  - Attributes, from the enumeration attributeBits. | 
|  | 42 | * @return          - The InstructionContext to use when looking up an | 
|  | 43 | *                    an instruction with these attributes. | 
|  | 44 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 45 | static InstructionContext contextForAttrs(uint8_t attrMask) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 46 | return CONTEXTS_SYM[attrMask]; | 
|  | 47 | } | 
|  | 48 |  | 
|  | 49 | /* | 
|  | 50 | * modRMRequired - Reads the appropriate instruction table to determine whether | 
|  | 51 | *   the ModR/M byte is required to decode a particular instruction. | 
|  | 52 | * | 
|  | 53 | * @param type        - The opcode type (i.e., how many bytes it has). | 
|  | 54 | * @param insnContext - The context for the instruction, as returned by | 
|  | 55 | *                      contextForAttrs. | 
|  | 56 | * @param opcode      - The last byte of the instruction's opcode, not counting | 
|  | 57 | *                      ModR/M extensions and escapes. | 
|  | 58 | * @return            - TRUE if the ModR/M byte is required, FALSE otherwise. | 
|  | 59 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 60 | static int modRMRequired(OpcodeType type, | 
| Craig Topper | 21c3365 | 2011-10-02 16:56:09 +0000 | [diff] [blame] | 61 | InstructionContext insnContext, | 
|  | 62 | uint8_t opcode) { | 
| Daniel Dunbar | 8b532de | 2009-12-22 01:41:37 +0000 | [diff] [blame] | 63 | const struct ContextDecision* decision = 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 64 |  | 
|  | 65 | switch (type) { | 
|  | 66 | case ONEBYTE: | 
|  | 67 | decision = &ONEBYTE_SYM; | 
|  | 68 | break; | 
|  | 69 | case TWOBYTE: | 
|  | 70 | decision = &TWOBYTE_SYM; | 
|  | 71 | break; | 
|  | 72 | case THREEBYTE_38: | 
|  | 73 | decision = &THREEBYTE38_SYM; | 
|  | 74 | break; | 
|  | 75 | case THREEBYTE_3A: | 
|  | 76 | decision = &THREEBYTE3A_SYM; | 
|  | 77 | break; | 
| Joerg Sonnenberger | fc4789d | 2011-04-04 16:58:13 +0000 | [diff] [blame] | 78 | case THREEBYTE_A6: | 
|  | 79 | decision = &THREEBYTEA6_SYM; | 
|  | 80 | break; | 
|  | 81 | case THREEBYTE_A7: | 
|  | 82 | decision = &THREEBYTEA7_SYM; | 
|  | 83 | break; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 84 | } | 
| Ahmed Charles | 636a3d6 | 2012-02-19 11:37:01 +0000 | [diff] [blame] | 85 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 86 | return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. | 
|  | 87 | modrm_type != MODRM_ONEENTRY; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 88 | } | 
|  | 89 |  | 
|  | 90 | /* | 
|  | 91 | * decode - Reads the appropriate instruction table to obtain the unique ID of | 
|  | 92 | *   an instruction. | 
|  | 93 | * | 
|  | 94 | * @param type        - See modRMRequired(). | 
|  | 95 | * @param insnContext - See modRMRequired(). | 
|  | 96 | * @param opcode      - See modRMRequired(). | 
|  | 97 | * @param modRM       - The ModR/M byte if required, or any value if not. | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 98 | * @return            - The UID of the instruction, or 0 on failure. | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 99 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 100 | static InstrUID decode(OpcodeType type, | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 101 | InstructionContext insnContext, | 
|  | 102 | uint8_t opcode, | 
|  | 103 | uint8_t modRM) { | 
| Duncan Sands | ae22c60 | 2012-02-05 14:20:11 +0000 | [diff] [blame] | 104 | const struct ModRMDecision* dec = 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 105 |  | 
|  | 106 | switch (type) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 107 | case ONEBYTE: | 
|  | 108 | dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; | 
|  | 109 | break; | 
|  | 110 | case TWOBYTE: | 
|  | 111 | dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; | 
|  | 112 | break; | 
|  | 113 | case THREEBYTE_38: | 
|  | 114 | dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; | 
|  | 115 | break; | 
|  | 116 | case THREEBYTE_3A: | 
|  | 117 | dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; | 
|  | 118 | break; | 
| Joerg Sonnenberger | fc4789d | 2011-04-04 16:58:13 +0000 | [diff] [blame] | 119 | case THREEBYTE_A6: | 
|  | 120 | dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; | 
|  | 121 | break; | 
|  | 122 | case THREEBYTE_A7: | 
|  | 123 | dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; | 
|  | 124 | break; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 125 | } | 
|  | 126 |  | 
|  | 127 | switch (dec->modrm_type) { | 
|  | 128 | default: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 129 | debug("Corrupt table!  Unknown modrm_type"); | 
|  | 130 | return 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 131 | case MODRM_ONEENTRY: | 
| Craig Topper | 487e744 | 2012-02-09 07:45:30 +0000 | [diff] [blame] | 132 | return modRMTable[dec->instructionIDs]; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 133 | case MODRM_SPLITRM: | 
|  | 134 | if (modFromModRM(modRM) == 0x3) | 
| Craig Topper | 487e744 | 2012-02-09 07:45:30 +0000 | [diff] [blame] | 135 | return modRMTable[dec->instructionIDs+1]; | 
|  | 136 | return modRMTable[dec->instructionIDs]; | 
| Craig Topper | a0cd970 | 2012-02-09 08:58:07 +0000 | [diff] [blame] | 137 | case MODRM_SPLITREG: | 
|  | 138 | if (modFromModRM(modRM) == 0x3) | 
|  | 139 | return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; | 
|  | 140 | return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; | 
| Craig Topper | 963305b | 2012-09-13 05:45:42 +0000 | [diff] [blame] | 141 | case MODRM_SPLITMISC: | 
|  | 142 | if (modFromModRM(modRM) == 0x3) | 
|  | 143 | return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; | 
|  | 144 | return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 145 | case MODRM_FULL: | 
| Craig Topper | 487e744 | 2012-02-09 07:45:30 +0000 | [diff] [blame] | 146 | return modRMTable[dec->instructionIDs+modRM]; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 147 | } | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 148 | } | 
|  | 149 |  | 
|  | 150 | /* | 
|  | 151 | * specifierForUID - Given a UID, returns the name and operand specification for | 
|  | 152 | *   that instruction. | 
|  | 153 | * | 
|  | 154 | * @param uid - The unique ID for the instruction.  This should be returned by | 
|  | 155 | *              decode(); specifierForUID will not check bounds. | 
|  | 156 | * @return    - A pointer to the specification for that instruction. | 
|  | 157 | */ | 
| Benjamin Kramer | de0a4fb | 2010-10-23 09:10:44 +0000 | [diff] [blame] | 158 | static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 159 | return &INSTRUCTIONS_SYM[uid]; | 
|  | 160 | } | 
|  | 161 |  | 
|  | 162 | /* | 
|  | 163 | * consumeByte - Uses the reader function provided by the user to consume one | 
|  | 164 | *   byte from the instruction's memory and advance the cursor. | 
|  | 165 | * | 
|  | 166 | * @param insn  - The instruction with the reader function to use.  The cursor | 
|  | 167 | *                for this instruction is advanced. | 
|  | 168 | * @param byte  - A pointer to a pre-allocated memory buffer to be populated | 
|  | 169 | *                with the data read. | 
|  | 170 | * @return      - 0 if the read was successful; nonzero otherwise. | 
|  | 171 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 172 | static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 173 | int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); | 
|  | 174 |  | 
|  | 175 | if (!ret) | 
|  | 176 | ++(insn->readerCursor); | 
|  | 177 |  | 
|  | 178 | return ret; | 
|  | 179 | } | 
|  | 180 |  | 
|  | 181 | /* | 
|  | 182 | * lookAtByte - Like consumeByte, but does not advance the cursor. | 
|  | 183 | * | 
|  | 184 | * @param insn  - See consumeByte(). | 
|  | 185 | * @param byte  - See consumeByte(). | 
|  | 186 | * @return      - See consumeByte(). | 
|  | 187 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 188 | static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 189 | return insn->reader(insn->readerArg, byte, insn->readerCursor); | 
|  | 190 | } | 
|  | 191 |  | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 192 | static void unconsumeByte(struct InternalInstruction* insn) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 193 | insn->readerCursor--; | 
|  | 194 | } | 
|  | 195 |  | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 196 | #define CONSUME_FUNC(name, type)                                  \ | 
|  | 197 | static int name(struct InternalInstruction* insn, type* ptr) {  \ | 
|  | 198 | type combined = 0;                                            \ | 
|  | 199 | unsigned offset;                                              \ | 
|  | 200 | for (offset = 0; offset < sizeof(type); ++offset) {           \ | 
|  | 201 | uint8_t byte;                                               \ | 
|  | 202 | int ret = insn->reader(insn->readerArg,                     \ | 
|  | 203 | &byte,                               \ | 
|  | 204 | insn->readerCursor + offset);        \ | 
|  | 205 | if (ret)                                                    \ | 
|  | 206 | return ret;                                               \ | 
| Richard Smith | 228e6d4 | 2012-08-24 23:29:28 +0000 | [diff] [blame] | 207 | combined = combined | ((uint64_t)byte << (offset * 8));     \ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 208 | }                                                             \ | 
|  | 209 | *ptr = combined;                                              \ | 
|  | 210 | insn->readerCursor += sizeof(type);                           \ | 
|  | 211 | return 0;                                                     \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 212 | } | 
|  | 213 |  | 
|  | 214 | /* | 
|  | 215 | * consume* - Use the reader function provided by the user to consume data | 
|  | 216 | *   values of various sizes from the instruction's memory and advance the | 
|  | 217 | *   cursor appropriately.  These readers perform endian conversion. | 
|  | 218 | * | 
|  | 219 | * @param insn    - See consumeByte(). | 
|  | 220 | * @param ptr     - A pointer to a pre-allocated memory of appropriate size to | 
|  | 221 | *                  be populated with the data read. | 
|  | 222 | * @return        - See consumeByte(). | 
|  | 223 | */ | 
|  | 224 | CONSUME_FUNC(consumeInt8, int8_t) | 
|  | 225 | CONSUME_FUNC(consumeInt16, int16_t) | 
|  | 226 | CONSUME_FUNC(consumeInt32, int32_t) | 
|  | 227 | CONSUME_FUNC(consumeUInt16, uint16_t) | 
|  | 228 | CONSUME_FUNC(consumeUInt32, uint32_t) | 
|  | 229 | CONSUME_FUNC(consumeUInt64, uint64_t) | 
|  | 230 |  | 
|  | 231 | /* | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 232 | * dbgprintf - Uses the logging function provided by the user to log a single | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 233 | *   message, typically without a carriage-return. | 
|  | 234 | * | 
|  | 235 | * @param insn    - The instruction containing the logging function. | 
|  | 236 | * @param format  - See printf(). | 
|  | 237 | * @param ...     - See printf(). | 
|  | 238 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 239 | static void dbgprintf(struct InternalInstruction* insn, | 
|  | 240 | const char* format, | 
|  | 241 | ...) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 242 | char buffer[256]; | 
|  | 243 | va_list ap; | 
|  | 244 |  | 
|  | 245 | if (!insn->dlog) | 
|  | 246 | return; | 
|  | 247 |  | 
|  | 248 | va_start(ap, format); | 
|  | 249 | (void)vsnprintf(buffer, sizeof(buffer), format, ap); | 
|  | 250 | va_end(ap); | 
|  | 251 |  | 
|  | 252 | insn->dlog(insn->dlogArg, buffer); | 
|  | 253 |  | 
|  | 254 | return; | 
|  | 255 | } | 
|  | 256 |  | 
|  | 257 | /* | 
|  | 258 | * setPrefixPresent - Marks that a particular prefix is present at a particular | 
|  | 259 | *   location. | 
|  | 260 | * | 
|  | 261 | * @param insn      - The instruction to be marked as having the prefix. | 
|  | 262 | * @param prefix    - The prefix that is present. | 
|  | 263 | * @param location  - The location where the prefix is located (in the address | 
|  | 264 | *                    space of the instruction's reader). | 
|  | 265 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 266 | static void setPrefixPresent(struct InternalInstruction* insn, | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 267 | uint8_t prefix, | 
|  | 268 | uint64_t location) | 
|  | 269 | { | 
|  | 270 | insn->prefixPresent[prefix] = 1; | 
|  | 271 | insn->prefixLocations[prefix] = location; | 
|  | 272 | } | 
|  | 273 |  | 
|  | 274 | /* | 
|  | 275 | * isPrefixAtLocation - Queries an instruction to determine whether a prefix is | 
|  | 276 | *   present at a given location. | 
|  | 277 | * | 
|  | 278 | * @param insn      - The instruction to be queried. | 
|  | 279 | * @param prefix    - The prefix. | 
|  | 280 | * @param location  - The location to query. | 
|  | 281 | * @return          - Whether the prefix is at that location. | 
|  | 282 | */ | 
| Sean Callanan | 588785c | 2009-12-22 22:51:40 +0000 | [diff] [blame] | 283 | static BOOL isPrefixAtLocation(struct InternalInstruction* insn, | 
|  | 284 | uint8_t prefix, | 
|  | 285 | uint64_t location) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 286 | { | 
|  | 287 | if (insn->prefixPresent[prefix] == 1 && | 
|  | 288 | insn->prefixLocations[prefix] == location) | 
|  | 289 | return TRUE; | 
|  | 290 | else | 
|  | 291 | return FALSE; | 
|  | 292 | } | 
|  | 293 |  | 
|  | 294 | /* | 
|  | 295 | * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the | 
|  | 296 | *   instruction as having them.  Also sets the instruction's default operand, | 
|  | 297 | *   address, and other relevant data sizes to report operands correctly. | 
|  | 298 | * | 
|  | 299 | * @param insn  - The instruction whose prefixes are to be read. | 
|  | 300 | * @return      - 0 if the instruction could be read until the end of the prefix | 
|  | 301 | *                bytes, and no prefixes conflicted; nonzero otherwise. | 
|  | 302 | */ | 
|  | 303 | static int readPrefixes(struct InternalInstruction* insn) { | 
|  | 304 | BOOL isPrefix = TRUE; | 
|  | 305 | BOOL prefixGroups[4] = { FALSE }; | 
|  | 306 | uint64_t prefixLocation; | 
| Ted Kremenek | 3c4408c | 2011-01-23 17:05:06 +0000 | [diff] [blame] | 307 | uint8_t byte = 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 308 |  | 
|  | 309 | BOOL hasAdSize = FALSE; | 
|  | 310 | BOOL hasOpSize = FALSE; | 
|  | 311 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 312 | dbgprintf(insn, "readPrefixes()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 313 |  | 
|  | 314 | while (isPrefix) { | 
|  | 315 | prefixLocation = insn->readerCursor; | 
|  | 316 |  | 
|  | 317 | if (consumeByte(insn, &byte)) | 
|  | 318 | return -1; | 
| Kevin Enderby | 014e1cd | 2012-03-09 17:52:49 +0000 | [diff] [blame] | 319 |  | 
| Benjamin Kramer | adfc73d | 2012-03-10 15:10:06 +0000 | [diff] [blame] | 320 | /* | 
|  | 321 | * If the first byte is a LOCK prefix break and let it be disassembled | 
|  | 322 | * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. | 
|  | 323 | * FIXME there is currently no way to get the disassembler to print the | 
|  | 324 | * lock prefix if it is not the first byte. | 
|  | 325 | */ | 
| Kevin Enderby | 014e1cd | 2012-03-09 17:52:49 +0000 | [diff] [blame] | 326 | if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) | 
|  | 327 | break; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 328 |  | 
|  | 329 | switch (byte) { | 
|  | 330 | case 0xf0:  /* LOCK */ | 
|  | 331 | case 0xf2:  /* REPNE/REPNZ */ | 
|  | 332 | case 0xf3:  /* REP or REPE/REPZ */ | 
|  | 333 | if (prefixGroups[0]) | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 334 | dbgprintf(insn, "Redundant Group 1 prefix"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 335 | prefixGroups[0] = TRUE; | 
|  | 336 | setPrefixPresent(insn, byte, prefixLocation); | 
|  | 337 | break; | 
|  | 338 | case 0x2e:  /* CS segment override -OR- Branch not taken */ | 
|  | 339 | case 0x36:  /* SS segment override -OR- Branch taken */ | 
|  | 340 | case 0x3e:  /* DS segment override */ | 
|  | 341 | case 0x26:  /* ES segment override */ | 
|  | 342 | case 0x64:  /* FS segment override */ | 
|  | 343 | case 0x65:  /* GS segment override */ | 
|  | 344 | switch (byte) { | 
|  | 345 | case 0x2e: | 
|  | 346 | insn->segmentOverride = SEG_OVERRIDE_CS; | 
|  | 347 | break; | 
|  | 348 | case 0x36: | 
|  | 349 | insn->segmentOverride = SEG_OVERRIDE_SS; | 
|  | 350 | break; | 
|  | 351 | case 0x3e: | 
|  | 352 | insn->segmentOverride = SEG_OVERRIDE_DS; | 
|  | 353 | break; | 
|  | 354 | case 0x26: | 
|  | 355 | insn->segmentOverride = SEG_OVERRIDE_ES; | 
|  | 356 | break; | 
|  | 357 | case 0x64: | 
|  | 358 | insn->segmentOverride = SEG_OVERRIDE_FS; | 
|  | 359 | break; | 
|  | 360 | case 0x65: | 
|  | 361 | insn->segmentOverride = SEG_OVERRIDE_GS; | 
|  | 362 | break; | 
|  | 363 | default: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 364 | debug("Unhandled override"); | 
|  | 365 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 366 | } | 
|  | 367 | if (prefixGroups[1]) | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 368 | dbgprintf(insn, "Redundant Group 2 prefix"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 369 | prefixGroups[1] = TRUE; | 
|  | 370 | setPrefixPresent(insn, byte, prefixLocation); | 
|  | 371 | break; | 
|  | 372 | case 0x66:  /* Operand-size override */ | 
|  | 373 | if (prefixGroups[2]) | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 374 | dbgprintf(insn, "Redundant Group 3 prefix"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 375 | prefixGroups[2] = TRUE; | 
|  | 376 | hasOpSize = TRUE; | 
|  | 377 | setPrefixPresent(insn, byte, prefixLocation); | 
|  | 378 | break; | 
|  | 379 | case 0x67:  /* Address-size override */ | 
|  | 380 | if (prefixGroups[3]) | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 381 | dbgprintf(insn, "Redundant Group 4 prefix"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 382 | prefixGroups[3] = TRUE; | 
|  | 383 | hasAdSize = TRUE; | 
|  | 384 | setPrefixPresent(insn, byte, prefixLocation); | 
|  | 385 | break; | 
|  | 386 | default:    /* Not a prefix byte */ | 
|  | 387 | isPrefix = FALSE; | 
|  | 388 | break; | 
|  | 389 | } | 
|  | 390 |  | 
|  | 391 | if (isPrefix) | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 392 | dbgprintf(insn, "Found prefix 0x%hhx", byte); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 393 | } | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 394 |  | 
|  | 395 | insn->vexSize = 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 396 |  | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 397 | if (byte == 0xc4) { | 
|  | 398 | uint8_t byte1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 399 |  | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 400 | if (lookAtByte(insn, &byte1)) { | 
|  | 401 | dbgprintf(insn, "Couldn't read second byte of VEX"); | 
|  | 402 | return -1; | 
|  | 403 | } | 
|  | 404 |  | 
| Craig Topper | 45faba9 | 2011-09-26 05:12:43 +0000 | [diff] [blame] | 405 | if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 406 | insn->vexSize = 3; | 
|  | 407 | insn->necessaryPrefixLocation = insn->readerCursor - 1; | 
|  | 408 | } | 
|  | 409 | else { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 410 | unconsumeByte(insn); | 
|  | 411 | insn->necessaryPrefixLocation = insn->readerCursor - 1; | 
|  | 412 | } | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 413 |  | 
|  | 414 | if (insn->vexSize == 3) { | 
|  | 415 | insn->vexPrefix[0] = byte; | 
|  | 416 | consumeByte(insn, &insn->vexPrefix[1]); | 
|  | 417 | consumeByte(insn, &insn->vexPrefix[2]); | 
|  | 418 |  | 
|  | 419 | /* We simulate the REX prefix for simplicity's sake */ | 
| Craig Topper | 31854ba | 2011-10-03 07:51:09 +0000 | [diff] [blame] | 420 |  | 
|  | 421 | if (insn->mode == MODE_64BIT) { | 
|  | 422 | insn->rexPrefix = 0x40 | 
|  | 423 | | (wFromVEX3of3(insn->vexPrefix[2]) << 3) | 
|  | 424 | | (rFromVEX2of3(insn->vexPrefix[1]) << 2) | 
|  | 425 | | (xFromVEX2of3(insn->vexPrefix[1]) << 1) | 
|  | 426 | | (bFromVEX2of3(insn->vexPrefix[1]) << 0); | 
|  | 427 | } | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 428 |  | 
|  | 429 | switch (ppFromVEX3of3(insn->vexPrefix[2])) | 
|  | 430 | { | 
|  | 431 | default: | 
|  | 432 | break; | 
|  | 433 | case VEX_PREFIX_66: | 
|  | 434 | hasOpSize = TRUE; | 
|  | 435 | break; | 
|  | 436 | } | 
|  | 437 |  | 
|  | 438 | dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); | 
|  | 439 | } | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 440 | } | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 441 | else if (byte == 0xc5) { | 
|  | 442 | uint8_t byte1; | 
|  | 443 |  | 
|  | 444 | if (lookAtByte(insn, &byte1)) { | 
|  | 445 | dbgprintf(insn, "Couldn't read second byte of VEX"); | 
|  | 446 | return -1; | 
|  | 447 | } | 
|  | 448 |  | 
| Craig Topper | 45faba9 | 2011-09-26 05:12:43 +0000 | [diff] [blame] | 449 | if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 450 | insn->vexSize = 2; | 
|  | 451 | } | 
|  | 452 | else { | 
|  | 453 | unconsumeByte(insn); | 
|  | 454 | } | 
|  | 455 |  | 
|  | 456 | if (insn->vexSize == 2) { | 
|  | 457 | insn->vexPrefix[0] = byte; | 
|  | 458 | consumeByte(insn, &insn->vexPrefix[1]); | 
|  | 459 |  | 
| Craig Topper | 31854ba | 2011-10-03 07:51:09 +0000 | [diff] [blame] | 460 | if (insn->mode == MODE_64BIT) { | 
|  | 461 | insn->rexPrefix = 0x40 | 
|  | 462 | | (rFromVEX2of2(insn->vexPrefix[1]) << 2); | 
|  | 463 | } | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 464 |  | 
|  | 465 | switch (ppFromVEX2of2(insn->vexPrefix[1])) | 
|  | 466 | { | 
|  | 467 | default: | 
|  | 468 | break; | 
|  | 469 | case VEX_PREFIX_66: | 
|  | 470 | hasOpSize = TRUE; | 
|  | 471 | break; | 
|  | 472 | } | 
|  | 473 |  | 
|  | 474 | dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); | 
|  | 475 | } | 
|  | 476 | } | 
|  | 477 | else { | 
|  | 478 | if (insn->mode == MODE_64BIT) { | 
|  | 479 | if ((byte & 0xf0) == 0x40) { | 
|  | 480 | uint8_t opcodeByte; | 
|  | 481 |  | 
|  | 482 | if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { | 
|  | 483 | dbgprintf(insn, "Redundant REX prefix"); | 
|  | 484 | return -1; | 
|  | 485 | } | 
|  | 486 |  | 
|  | 487 | insn->rexPrefix = byte; | 
|  | 488 | insn->necessaryPrefixLocation = insn->readerCursor - 2; | 
|  | 489 |  | 
|  | 490 | dbgprintf(insn, "Found REX prefix 0x%hhx", byte); | 
|  | 491 | } else { | 
|  | 492 | unconsumeByte(insn); | 
|  | 493 | insn->necessaryPrefixLocation = insn->readerCursor - 1; | 
|  | 494 | } | 
|  | 495 | } else { | 
|  | 496 | unconsumeByte(insn); | 
|  | 497 | insn->necessaryPrefixLocation = insn->readerCursor - 1; | 
|  | 498 | } | 
|  | 499 | } | 
|  | 500 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 501 | if (insn->mode == MODE_16BIT) { | 
|  | 502 | insn->registerSize       = (hasOpSize ? 4 : 2); | 
|  | 503 | insn->addressSize        = (hasAdSize ? 4 : 2); | 
|  | 504 | insn->displacementSize   = (hasAdSize ? 4 : 2); | 
|  | 505 | insn->immediateSize      = (hasOpSize ? 4 : 2); | 
|  | 506 | } else if (insn->mode == MODE_32BIT) { | 
|  | 507 | insn->registerSize       = (hasOpSize ? 2 : 4); | 
|  | 508 | insn->addressSize        = (hasAdSize ? 2 : 4); | 
|  | 509 | insn->displacementSize   = (hasAdSize ? 2 : 4); | 
| Sean Callanan | 9f6c622 | 2010-10-22 01:24:11 +0000 | [diff] [blame] | 510 | insn->immediateSize      = (hasOpSize ? 2 : 4); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 511 | } else if (insn->mode == MODE_64BIT) { | 
|  | 512 | if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { | 
|  | 513 | insn->registerSize       = 8; | 
|  | 514 | insn->addressSize        = (hasAdSize ? 4 : 8); | 
|  | 515 | insn->displacementSize   = 4; | 
|  | 516 | insn->immediateSize      = 4; | 
|  | 517 | } else if (insn->rexPrefix) { | 
|  | 518 | insn->registerSize       = (hasOpSize ? 2 : 4); | 
|  | 519 | insn->addressSize        = (hasAdSize ? 4 : 8); | 
|  | 520 | insn->displacementSize   = (hasOpSize ? 2 : 4); | 
|  | 521 | insn->immediateSize      = (hasOpSize ? 2 : 4); | 
|  | 522 | } else { | 
|  | 523 | insn->registerSize       = (hasOpSize ? 2 : 4); | 
|  | 524 | insn->addressSize        = (hasAdSize ? 4 : 8); | 
|  | 525 | insn->displacementSize   = (hasOpSize ? 2 : 4); | 
|  | 526 | insn->immediateSize      = (hasOpSize ? 2 : 4); | 
|  | 527 | } | 
|  | 528 | } | 
|  | 529 |  | 
|  | 530 | return 0; | 
|  | 531 | } | 
|  | 532 |  | 
|  | 533 | /* | 
|  | 534 | * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of | 
|  | 535 | *   extended or escape opcodes). | 
|  | 536 | * | 
|  | 537 | * @param insn  - The instruction whose opcode is to be read. | 
|  | 538 | * @return      - 0 if the opcode could be read successfully; nonzero otherwise. | 
|  | 539 | */ | 
|  | 540 | static int readOpcode(struct InternalInstruction* insn) { | 
|  | 541 | /* Determine the length of the primary opcode */ | 
|  | 542 |  | 
|  | 543 | uint8_t current; | 
|  | 544 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 545 | dbgprintf(insn, "readOpcode()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 546 |  | 
|  | 547 | insn->opcodeType = ONEBYTE; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 548 |  | 
|  | 549 | if (insn->vexSize == 3) | 
|  | 550 | { | 
|  | 551 | switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) | 
|  | 552 | { | 
|  | 553 | default: | 
|  | 554 | dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); | 
|  | 555 | return -1; | 
|  | 556 | case 0: | 
|  | 557 | break; | 
|  | 558 | case VEX_LOB_0F: | 
|  | 559 | insn->twoByteEscape = 0x0f; | 
|  | 560 | insn->opcodeType = TWOBYTE; | 
|  | 561 | return consumeByte(insn, &insn->opcode); | 
|  | 562 | case VEX_LOB_0F38: | 
|  | 563 | insn->twoByteEscape = 0x0f; | 
|  | 564 | insn->threeByteEscape = 0x38; | 
|  | 565 | insn->opcodeType = THREEBYTE_38; | 
|  | 566 | return consumeByte(insn, &insn->opcode); | 
|  | 567 | case VEX_LOB_0F3A: | 
|  | 568 | insn->twoByteEscape = 0x0f; | 
|  | 569 | insn->threeByteEscape = 0x3a; | 
|  | 570 | insn->opcodeType = THREEBYTE_3A; | 
|  | 571 | return consumeByte(insn, &insn->opcode); | 
|  | 572 | } | 
|  | 573 | } | 
|  | 574 | else if (insn->vexSize == 2) | 
|  | 575 | { | 
|  | 576 | insn->twoByteEscape = 0x0f; | 
|  | 577 | insn->opcodeType = TWOBYTE; | 
|  | 578 | return consumeByte(insn, &insn->opcode); | 
|  | 579 | } | 
|  | 580 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 581 | if (consumeByte(insn, ¤t)) | 
|  | 582 | return -1; | 
|  | 583 |  | 
|  | 584 | if (current == 0x0f) { | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 585 | dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 586 |  | 
|  | 587 | insn->twoByteEscape = current; | 
|  | 588 |  | 
|  | 589 | if (consumeByte(insn, ¤t)) | 
|  | 590 | return -1; | 
|  | 591 |  | 
|  | 592 | if (current == 0x38) { | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 593 | dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 594 |  | 
|  | 595 | insn->threeByteEscape = current; | 
|  | 596 |  | 
|  | 597 | if (consumeByte(insn, ¤t)) | 
|  | 598 | return -1; | 
|  | 599 |  | 
|  | 600 | insn->opcodeType = THREEBYTE_38; | 
|  | 601 | } else if (current == 0x3a) { | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 602 | dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 603 |  | 
|  | 604 | insn->threeByteEscape = current; | 
|  | 605 |  | 
|  | 606 | if (consumeByte(insn, ¤t)) | 
|  | 607 | return -1; | 
|  | 608 |  | 
|  | 609 | insn->opcodeType = THREEBYTE_3A; | 
| Joerg Sonnenberger | fc4789d | 2011-04-04 16:58:13 +0000 | [diff] [blame] | 610 | } else if (current == 0xa6) { | 
|  | 611 | dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); | 
|  | 612 |  | 
|  | 613 | insn->threeByteEscape = current; | 
|  | 614 |  | 
|  | 615 | if (consumeByte(insn, ¤t)) | 
|  | 616 | return -1; | 
|  | 617 |  | 
|  | 618 | insn->opcodeType = THREEBYTE_A6; | 
|  | 619 | } else if (current == 0xa7) { | 
|  | 620 | dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); | 
|  | 621 |  | 
|  | 622 | insn->threeByteEscape = current; | 
|  | 623 |  | 
|  | 624 | if (consumeByte(insn, ¤t)) | 
|  | 625 | return -1; | 
|  | 626 |  | 
|  | 627 | insn->opcodeType = THREEBYTE_A7; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 628 | } else { | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 629 | dbgprintf(insn, "Didn't find a three-byte escape prefix"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 630 |  | 
|  | 631 | insn->opcodeType = TWOBYTE; | 
|  | 632 | } | 
|  | 633 | } | 
|  | 634 |  | 
|  | 635 | /* | 
|  | 636 | * At this point we have consumed the full opcode. | 
|  | 637 | * Anything we consume from here on must be unconsumed. | 
|  | 638 | */ | 
|  | 639 |  | 
|  | 640 | insn->opcode = current; | 
|  | 641 |  | 
|  | 642 | return 0; | 
|  | 643 | } | 
|  | 644 |  | 
|  | 645 | static int readModRM(struct InternalInstruction* insn); | 
|  | 646 |  | 
|  | 647 | /* | 
|  | 648 | * getIDWithAttrMask - Determines the ID of an instruction, consuming | 
|  | 649 | *   the ModR/M byte as appropriate for extended and escape opcodes, | 
|  | 650 | *   and using a supplied attribute mask. | 
|  | 651 | * | 
|  | 652 | * @param instructionID - A pointer whose target is filled in with the ID of the | 
|  | 653 | *                        instruction. | 
|  | 654 | * @param insn          - The instruction whose ID is to be determined. | 
|  | 655 | * @param attrMask      - The attribute mask to search. | 
|  | 656 | * @return              - 0 if the ModR/M could be read when needed or was not | 
|  | 657 | *                        needed; nonzero otherwise. | 
|  | 658 | */ | 
|  | 659 | static int getIDWithAttrMask(uint16_t* instructionID, | 
|  | 660 | struct InternalInstruction* insn, | 
|  | 661 | uint8_t attrMask) { | 
|  | 662 | BOOL hasModRMExtension; | 
|  | 663 |  | 
|  | 664 | uint8_t instructionClass; | 
|  | 665 |  | 
|  | 666 | instructionClass = contextForAttrs(attrMask); | 
|  | 667 |  | 
|  | 668 | hasModRMExtension = modRMRequired(insn->opcodeType, | 
|  | 669 | instructionClass, | 
|  | 670 | insn->opcode); | 
|  | 671 |  | 
|  | 672 | if (hasModRMExtension) { | 
| Rafael Espindola | 9f9a106 | 2011-01-06 16:48:42 +0000 | [diff] [blame] | 673 | if (readModRM(insn)) | 
|  | 674 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 675 |  | 
|  | 676 | *instructionID = decode(insn->opcodeType, | 
|  | 677 | instructionClass, | 
|  | 678 | insn->opcode, | 
|  | 679 | insn->modRM); | 
|  | 680 | } else { | 
|  | 681 | *instructionID = decode(insn->opcodeType, | 
|  | 682 | instructionClass, | 
|  | 683 | insn->opcode, | 
|  | 684 | 0); | 
|  | 685 | } | 
|  | 686 |  | 
|  | 687 | return 0; | 
|  | 688 | } | 
|  | 689 |  | 
|  | 690 | /* | 
|  | 691 | * is16BitEquivalent - Determines whether two instruction names refer to | 
|  | 692 | * equivalent instructions but one is 16-bit whereas the other is not. | 
|  | 693 | * | 
|  | 694 | * @param orig  - The instruction that is not 16-bit | 
|  | 695 | * @param equiv - The instruction that is 16-bit | 
|  | 696 | */ | 
|  | 697 | static BOOL is16BitEquvalent(const char* orig, const char* equiv) { | 
|  | 698 | off_t i; | 
|  | 699 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 700 | for (i = 0;; i++) { | 
|  | 701 | if (orig[i] == '\0' && equiv[i] == '\0') | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 702 | return TRUE; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 703 | if (orig[i] == '\0' || equiv[i] == '\0') | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 704 | return FALSE; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 705 | if (orig[i] != equiv[i]) { | 
|  | 706 | if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 707 | continue; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 708 | if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 709 | continue; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 710 | if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 711 | continue; | 
|  | 712 | return FALSE; | 
|  | 713 | } | 
|  | 714 | } | 
|  | 715 | } | 
|  | 716 |  | 
|  | 717 | /* | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 718 | * getID - Determines the ID of an instruction, consuming the ModR/M byte as | 
|  | 719 | *   appropriate for extended and escape opcodes.  Determines the attributes and | 
|  | 720 | *   context for the instruction before doing so. | 
|  | 721 | * | 
|  | 722 | * @param insn  - The instruction whose ID is to be determined. | 
|  | 723 | * @return      - 0 if the ModR/M could be read when needed or was not needed; | 
|  | 724 | *                nonzero otherwise. | 
|  | 725 | */ | 
| Roman Divacky | 6792380 | 2012-09-05 21:17:34 +0000 | [diff] [blame] | 726 | static int getID(struct InternalInstruction* insn, const void *miiArg) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 727 | uint8_t attrMask; | 
|  | 728 | uint16_t instructionID; | 
|  | 729 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 730 | dbgprintf(insn, "getID()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 731 |  | 
|  | 732 | attrMask = ATTR_NONE; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 733 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 734 | if (insn->mode == MODE_64BIT) | 
|  | 735 | attrMask |= ATTR_64BIT; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 736 |  | 
|  | 737 | if (insn->vexSize) { | 
|  | 738 | attrMask |= ATTR_VEX; | 
|  | 739 |  | 
|  | 740 | if (insn->vexSize == 3) { | 
|  | 741 | switch (ppFromVEX3of3(insn->vexPrefix[2])) { | 
|  | 742 | case VEX_PREFIX_66: | 
|  | 743 | attrMask |= ATTR_OPSIZE; | 
|  | 744 | break; | 
|  | 745 | case VEX_PREFIX_F3: | 
|  | 746 | attrMask |= ATTR_XS; | 
|  | 747 | break; | 
|  | 748 | case VEX_PREFIX_F2: | 
|  | 749 | attrMask |= ATTR_XD; | 
|  | 750 | break; | 
|  | 751 | } | 
|  | 752 |  | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 753 | if (lFromVEX3of3(insn->vexPrefix[2])) | 
|  | 754 | attrMask |= ATTR_VEXL; | 
|  | 755 | } | 
|  | 756 | else if (insn->vexSize == 2) { | 
|  | 757 | switch (ppFromVEX2of2(insn->vexPrefix[1])) { | 
|  | 758 | case VEX_PREFIX_66: | 
|  | 759 | attrMask |= ATTR_OPSIZE; | 
|  | 760 | break; | 
|  | 761 | case VEX_PREFIX_F3: | 
|  | 762 | attrMask |= ATTR_XS; | 
|  | 763 | break; | 
|  | 764 | case VEX_PREFIX_F2: | 
|  | 765 | attrMask |= ATTR_XD; | 
|  | 766 | break; | 
|  | 767 | } | 
|  | 768 |  | 
|  | 769 | if (lFromVEX2of2(insn->vexPrefix[1])) | 
|  | 770 | attrMask |= ATTR_VEXL; | 
|  | 771 | } | 
|  | 772 | else { | 
|  | 773 | return -1; | 
|  | 774 | } | 
|  | 775 | } | 
|  | 776 | else { | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 777 | if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) | 
|  | 778 | attrMask |= ATTR_OPSIZE; | 
| Craig Topper | 6491c80 | 2012-02-27 01:54:29 +0000 | [diff] [blame] | 779 | else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) | 
|  | 780 | attrMask |= ATTR_ADSIZE; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 781 | else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) | 
|  | 782 | attrMask |= ATTR_XS; | 
|  | 783 | else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) | 
|  | 784 | attrMask |= ATTR_XD; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 785 | } | 
|  | 786 |  | 
| Craig Topper | f18c896 | 2011-10-04 06:30:42 +0000 | [diff] [blame] | 787 | if (insn->rexPrefix & 0x08) | 
|  | 788 | attrMask |= ATTR_REXW; | 
| Craig Topper | f01f1b5 | 2011-11-06 23:04:08 +0000 | [diff] [blame] | 789 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 790 | if (getIDWithAttrMask(&instructionID, insn, attrMask)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 791 | return -1; | 
| Craig Topper | f01f1b5 | 2011-11-06 23:04:08 +0000 | [diff] [blame] | 792 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 793 | /* The following clauses compensate for limitations of the tables. */ | 
| Craig Topper | f01f1b5 | 2011-11-06 23:04:08 +0000 | [diff] [blame] | 794 |  | 
|  | 795 | if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && | 
|  | 796 | !(attrMask & ATTR_OPSIZE)) { | 
| Craig Topper | f18c896 | 2011-10-04 06:30:42 +0000 | [diff] [blame] | 797 | /* | 
|  | 798 | * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit | 
|  | 799 | * has precedence since there are no L-bit with W-bit entries in the tables. | 
|  | 800 | * So if the L-bit isn't significant we should use the W-bit instead. | 
| Craig Topper | f01f1b5 | 2011-11-06 23:04:08 +0000 | [diff] [blame] | 801 | * We only need to do this if the instruction doesn't specify OpSize since | 
|  | 802 | * there is a VEX_L_W_OPSIZE table. | 
| Craig Topper | f18c896 | 2011-10-04 06:30:42 +0000 | [diff] [blame] | 803 | */ | 
|  | 804 |  | 
|  | 805 | const struct InstructionSpecifier *spec; | 
|  | 806 | uint16_t instructionIDWithWBit; | 
|  | 807 | const struct InstructionSpecifier *specWithWBit; | 
|  | 808 |  | 
|  | 809 | spec = specifierForUID(instructionID); | 
|  | 810 |  | 
|  | 811 | if (getIDWithAttrMask(&instructionIDWithWBit, | 
|  | 812 | insn, | 
|  | 813 | (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { | 
|  | 814 | insn->instructionID = instructionID; | 
|  | 815 | insn->spec = spec; | 
|  | 816 | return 0; | 
|  | 817 | } | 
|  | 818 |  | 
|  | 819 | specWithWBit = specifierForUID(instructionIDWithWBit); | 
|  | 820 |  | 
|  | 821 | if (instructionID != instructionIDWithWBit) { | 
|  | 822 | insn->instructionID = instructionIDWithWBit; | 
|  | 823 | insn->spec = specWithWBit; | 
|  | 824 | } else { | 
|  | 825 | insn->instructionID = instructionID; | 
|  | 826 | insn->spec = spec; | 
|  | 827 | } | 
|  | 828 | return 0; | 
|  | 829 | } | 
|  | 830 |  | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 831 | if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { | 
|  | 832 | /* | 
|  | 833 | * The instruction tables make no distinction between instructions that | 
|  | 834 | * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a | 
|  | 835 | * particular spot (i.e., many MMX operations).  In general we're | 
|  | 836 | * conservative, but in the specific case where OpSize is present but not | 
|  | 837 | * in the right place we check if there's a 16-bit operation. | 
|  | 838 | */ | 
|  | 839 |  | 
| Benjamin Kramer | de0a4fb | 2010-10-23 09:10:44 +0000 | [diff] [blame] | 840 | const struct InstructionSpecifier *spec; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 841 | uint16_t instructionIDWithOpsize; | 
| Benjamin Kramer | 915e3d9 | 2012-02-11 16:01:02 +0000 | [diff] [blame] | 842 | const char *specName, *specWithOpSizeName; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 843 |  | 
|  | 844 | spec = specifierForUID(instructionID); | 
|  | 845 |  | 
|  | 846 | if (getIDWithAttrMask(&instructionIDWithOpsize, | 
|  | 847 | insn, | 
|  | 848 | attrMask | ATTR_OPSIZE)) { | 
|  | 849 | /* | 
|  | 850 | * ModRM required with OpSize but not present; give up and return version | 
|  | 851 | * without OpSize set | 
|  | 852 | */ | 
|  | 853 |  | 
|  | 854 | insn->instructionID = instructionID; | 
|  | 855 | insn->spec = spec; | 
|  | 856 | return 0; | 
|  | 857 | } | 
|  | 858 |  | 
| Benjamin Kramer | 915e3d9 | 2012-02-11 16:01:02 +0000 | [diff] [blame] | 859 | specName = x86DisassemblerGetInstrName(instructionID, miiArg); | 
|  | 860 | specWithOpSizeName = | 
| Benjamin Kramer | 478e8de | 2012-02-11 14:50:54 +0000 | [diff] [blame] | 861 | x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); | 
|  | 862 |  | 
| Benjamin Kramer | 915e3d9 | 2012-02-11 16:01:02 +0000 | [diff] [blame] | 863 | if (is16BitEquvalent(specName, specWithOpSizeName)) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 864 | insn->instructionID = instructionIDWithOpsize; | 
| Benjamin Kramer | 915e3d9 | 2012-02-11 16:01:02 +0000 | [diff] [blame] | 865 | insn->spec = specifierForUID(instructionIDWithOpsize); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 866 | } else { | 
|  | 867 | insn->instructionID = instructionID; | 
|  | 868 | insn->spec = spec; | 
|  | 869 | } | 
|  | 870 | return 0; | 
|  | 871 | } | 
| Craig Topper | 21c3365 | 2011-10-02 16:56:09 +0000 | [diff] [blame] | 872 |  | 
|  | 873 | if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && | 
|  | 874 | insn->rexPrefix & 0x01) { | 
|  | 875 | /* | 
|  | 876 | * NOOP shouldn't decode as NOOP if REX.b is set. Instead | 
|  | 877 | * it should decode as XCHG %r8, %eax. | 
|  | 878 | */ | 
|  | 879 |  | 
|  | 880 | const struct InstructionSpecifier *spec; | 
|  | 881 | uint16_t instructionIDWithNewOpcode; | 
|  | 882 | const struct InstructionSpecifier *specWithNewOpcode; | 
|  | 883 |  | 
|  | 884 | spec = specifierForUID(instructionID); | 
|  | 885 |  | 
| Craig Topper | b58a966 | 2011-10-05 03:29:32 +0000 | [diff] [blame] | 886 | /* Borrow opcode from one of the other XCHGar opcodes */ | 
| Craig Topper | 21c3365 | 2011-10-02 16:56:09 +0000 | [diff] [blame] | 887 | insn->opcode = 0x91; | 
|  | 888 |  | 
|  | 889 | if (getIDWithAttrMask(&instructionIDWithNewOpcode, | 
|  | 890 | insn, | 
|  | 891 | attrMask)) { | 
|  | 892 | insn->opcode = 0x90; | 
|  | 893 |  | 
|  | 894 | insn->instructionID = instructionID; | 
|  | 895 | insn->spec = spec; | 
|  | 896 | return 0; | 
|  | 897 | } | 
|  | 898 |  | 
|  | 899 | specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); | 
|  | 900 |  | 
| Craig Topper | b58a966 | 2011-10-05 03:29:32 +0000 | [diff] [blame] | 901 | /* Change back */ | 
| Craig Topper | 21c3365 | 2011-10-02 16:56:09 +0000 | [diff] [blame] | 902 | insn->opcode = 0x90; | 
|  | 903 |  | 
|  | 904 | insn->instructionID = instructionIDWithNewOpcode; | 
|  | 905 | insn->spec = specWithNewOpcode; | 
|  | 906 |  | 
|  | 907 | return 0; | 
|  | 908 | } | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 909 |  | 
|  | 910 | insn->instructionID = instructionID; | 
|  | 911 | insn->spec = specifierForUID(insn->instructionID); | 
|  | 912 |  | 
|  | 913 | return 0; | 
|  | 914 | } | 
|  | 915 |  | 
|  | 916 | /* | 
|  | 917 | * readSIB - Consumes the SIB byte to determine addressing information for an | 
|  | 918 | *   instruction. | 
|  | 919 | * | 
|  | 920 | * @param insn  - The instruction whose SIB byte is to be read. | 
|  | 921 | * @return      - 0 if the SIB byte was successfully read; nonzero otherwise. | 
|  | 922 | */ | 
|  | 923 | static int readSIB(struct InternalInstruction* insn) { | 
| Daniel Dunbar | 8b532de | 2009-12-22 01:41:37 +0000 | [diff] [blame] | 924 | SIBIndex sibIndexBase = 0; | 
|  | 925 | SIBBase sibBaseBase = 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 926 | uint8_t index, base; | 
|  | 927 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 928 | dbgprintf(insn, "readSIB()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 929 |  | 
|  | 930 | if (insn->consumedSIB) | 
|  | 931 | return 0; | 
|  | 932 |  | 
|  | 933 | insn->consumedSIB = TRUE; | 
|  | 934 |  | 
|  | 935 | switch (insn->addressSize) { | 
|  | 936 | case 2: | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 937 | dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 938 | return -1; | 
|  | 939 | break; | 
|  | 940 | case 4: | 
|  | 941 | sibIndexBase = SIB_INDEX_EAX; | 
|  | 942 | sibBaseBase = SIB_BASE_EAX; | 
|  | 943 | break; | 
|  | 944 | case 8: | 
|  | 945 | sibIndexBase = SIB_INDEX_RAX; | 
|  | 946 | sibBaseBase = SIB_BASE_RAX; | 
|  | 947 | break; | 
|  | 948 | } | 
|  | 949 |  | 
|  | 950 | if (consumeByte(insn, &insn->sib)) | 
|  | 951 | return -1; | 
|  | 952 |  | 
|  | 953 | index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); | 
|  | 954 |  | 
|  | 955 | switch (index) { | 
|  | 956 | case 0x4: | 
|  | 957 | insn->sibIndex = SIB_INDEX_NONE; | 
|  | 958 | break; | 
|  | 959 | default: | 
| Benjamin Kramer | 25bddae | 2011-02-27 18:13:53 +0000 | [diff] [blame] | 960 | insn->sibIndex = (SIBIndex)(sibIndexBase + index); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 961 | if (insn->sibIndex == SIB_INDEX_sib || | 
|  | 962 | insn->sibIndex == SIB_INDEX_sib64) | 
|  | 963 | insn->sibIndex = SIB_INDEX_NONE; | 
|  | 964 | break; | 
|  | 965 | } | 
|  | 966 |  | 
|  | 967 | switch (scaleFromSIB(insn->sib)) { | 
|  | 968 | case 0: | 
|  | 969 | insn->sibScale = 1; | 
|  | 970 | break; | 
|  | 971 | case 1: | 
|  | 972 | insn->sibScale = 2; | 
|  | 973 | break; | 
|  | 974 | case 2: | 
|  | 975 | insn->sibScale = 4; | 
|  | 976 | break; | 
|  | 977 | case 3: | 
|  | 978 | insn->sibScale = 8; | 
|  | 979 | break; | 
|  | 980 | } | 
|  | 981 |  | 
|  | 982 | base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); | 
|  | 983 |  | 
|  | 984 | switch (base) { | 
|  | 985 | case 0x5: | 
|  | 986 | switch (modFromModRM(insn->modRM)) { | 
|  | 987 | case 0x0: | 
|  | 988 | insn->eaDisplacement = EA_DISP_32; | 
|  | 989 | insn->sibBase = SIB_BASE_NONE; | 
|  | 990 | break; | 
|  | 991 | case 0x1: | 
|  | 992 | insn->eaDisplacement = EA_DISP_8; | 
|  | 993 | insn->sibBase = (insn->addressSize == 4 ? | 
|  | 994 | SIB_BASE_EBP : SIB_BASE_RBP); | 
|  | 995 | break; | 
|  | 996 | case 0x2: | 
|  | 997 | insn->eaDisplacement = EA_DISP_32; | 
|  | 998 | insn->sibBase = (insn->addressSize == 4 ? | 
|  | 999 | SIB_BASE_EBP : SIB_BASE_RBP); | 
|  | 1000 | break; | 
|  | 1001 | case 0x3: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1002 | debug("Cannot have Mod = 0b11 and a SIB byte"); | 
|  | 1003 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1004 | } | 
|  | 1005 | break; | 
|  | 1006 | default: | 
| Benjamin Kramer | 25bddae | 2011-02-27 18:13:53 +0000 | [diff] [blame] | 1007 | insn->sibBase = (SIBBase)(sibBaseBase + base); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1008 | break; | 
|  | 1009 | } | 
|  | 1010 |  | 
|  | 1011 | return 0; | 
|  | 1012 | } | 
|  | 1013 |  | 
|  | 1014 | /* | 
|  | 1015 | * readDisplacement - Consumes the displacement of an instruction. | 
|  | 1016 | * | 
|  | 1017 | * @param insn  - The instruction whose displacement is to be read. | 
|  | 1018 | * @return      - 0 if the displacement byte was successfully read; nonzero | 
|  | 1019 | *                otherwise. | 
|  | 1020 | */ | 
|  | 1021 | static int readDisplacement(struct InternalInstruction* insn) { | 
|  | 1022 | int8_t d8; | 
|  | 1023 | int16_t d16; | 
|  | 1024 | int32_t d32; | 
|  | 1025 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1026 | dbgprintf(insn, "readDisplacement()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1027 |  | 
|  | 1028 | if (insn->consumedDisplacement) | 
|  | 1029 | return 0; | 
|  | 1030 |  | 
|  | 1031 | insn->consumedDisplacement = TRUE; | 
| Kevin Enderby | 6fbcd8d | 2012-02-23 18:18:17 +0000 | [diff] [blame] | 1032 | insn->displacementOffset = insn->readerCursor - insn->startLocation; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1033 |  | 
|  | 1034 | switch (insn->eaDisplacement) { | 
|  | 1035 | case EA_DISP_NONE: | 
|  | 1036 | insn->consumedDisplacement = FALSE; | 
|  | 1037 | break; | 
|  | 1038 | case EA_DISP_8: | 
|  | 1039 | if (consumeInt8(insn, &d8)) | 
|  | 1040 | return -1; | 
|  | 1041 | insn->displacement = d8; | 
|  | 1042 | break; | 
|  | 1043 | case EA_DISP_16: | 
|  | 1044 | if (consumeInt16(insn, &d16)) | 
|  | 1045 | return -1; | 
|  | 1046 | insn->displacement = d16; | 
|  | 1047 | break; | 
|  | 1048 | case EA_DISP_32: | 
|  | 1049 | if (consumeInt32(insn, &d32)) | 
|  | 1050 | return -1; | 
|  | 1051 | insn->displacement = d32; | 
|  | 1052 | break; | 
|  | 1053 | } | 
|  | 1054 |  | 
|  | 1055 | insn->consumedDisplacement = TRUE; | 
|  | 1056 | return 0; | 
|  | 1057 | } | 
|  | 1058 |  | 
|  | 1059 | /* | 
|  | 1060 | * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and | 
|  | 1061 | *   displacement) for an instruction and interprets it. | 
|  | 1062 | * | 
|  | 1063 | * @param insn  - The instruction whose addressing information is to be read. | 
|  | 1064 | * @return      - 0 if the information was successfully read; nonzero otherwise. | 
|  | 1065 | */ | 
|  | 1066 | static int readModRM(struct InternalInstruction* insn) { | 
|  | 1067 | uint8_t mod, rm, reg; | 
|  | 1068 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1069 | dbgprintf(insn, "readModRM()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1070 |  | 
|  | 1071 | if (insn->consumedModRM) | 
|  | 1072 | return 0; | 
|  | 1073 |  | 
| Rafael Espindola | 9f9a106 | 2011-01-06 16:48:42 +0000 | [diff] [blame] | 1074 | if (consumeByte(insn, &insn->modRM)) | 
|  | 1075 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1076 | insn->consumedModRM = TRUE; | 
|  | 1077 |  | 
|  | 1078 | mod     = modFromModRM(insn->modRM); | 
|  | 1079 | rm      = rmFromModRM(insn->modRM); | 
|  | 1080 | reg     = regFromModRM(insn->modRM); | 
|  | 1081 |  | 
|  | 1082 | /* | 
|  | 1083 | * This goes by insn->registerSize to pick the correct register, which messes | 
|  | 1084 | * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in | 
|  | 1085 | * fixupReg(). | 
|  | 1086 | */ | 
|  | 1087 | switch (insn->registerSize) { | 
|  | 1088 | case 2: | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1089 | insn->regBase = MODRM_REG_AX; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1090 | insn->eaRegBase = EA_REG_AX; | 
|  | 1091 | break; | 
|  | 1092 | case 4: | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1093 | insn->regBase = MODRM_REG_EAX; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1094 | insn->eaRegBase = EA_REG_EAX; | 
|  | 1095 | break; | 
|  | 1096 | case 8: | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1097 | insn->regBase = MODRM_REG_RAX; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1098 | insn->eaRegBase = EA_REG_RAX; | 
|  | 1099 | break; | 
|  | 1100 | } | 
|  | 1101 |  | 
|  | 1102 | reg |= rFromREX(insn->rexPrefix) << 3; | 
|  | 1103 | rm  |= bFromREX(insn->rexPrefix) << 3; | 
|  | 1104 |  | 
|  | 1105 | insn->reg = (Reg)(insn->regBase + reg); | 
|  | 1106 |  | 
|  | 1107 | switch (insn->addressSize) { | 
|  | 1108 | case 2: | 
|  | 1109 | insn->eaBaseBase = EA_BASE_BX_SI; | 
|  | 1110 |  | 
|  | 1111 | switch (mod) { | 
|  | 1112 | case 0x0: | 
|  | 1113 | if (rm == 0x6) { | 
|  | 1114 | insn->eaBase = EA_BASE_NONE; | 
|  | 1115 | insn->eaDisplacement = EA_DISP_16; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1116 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1117 | return -1; | 
|  | 1118 | } else { | 
|  | 1119 | insn->eaBase = (EABase)(insn->eaBaseBase + rm); | 
|  | 1120 | insn->eaDisplacement = EA_DISP_NONE; | 
|  | 1121 | } | 
|  | 1122 | break; | 
|  | 1123 | case 0x1: | 
|  | 1124 | insn->eaBase = (EABase)(insn->eaBaseBase + rm); | 
|  | 1125 | insn->eaDisplacement = EA_DISP_8; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1126 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1127 | return -1; | 
|  | 1128 | break; | 
|  | 1129 | case 0x2: | 
|  | 1130 | insn->eaBase = (EABase)(insn->eaBaseBase + rm); | 
|  | 1131 | insn->eaDisplacement = EA_DISP_16; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1132 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1133 | return -1; | 
|  | 1134 | break; | 
|  | 1135 | case 0x3: | 
|  | 1136 | insn->eaBase = (EABase)(insn->eaRegBase + rm); | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1137 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1138 | return -1; | 
|  | 1139 | break; | 
|  | 1140 | } | 
|  | 1141 | break; | 
|  | 1142 | case 4: | 
|  | 1143 | case 8: | 
|  | 1144 | insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); | 
|  | 1145 |  | 
|  | 1146 | switch (mod) { | 
|  | 1147 | case 0x0: | 
|  | 1148 | insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ | 
|  | 1149 | switch (rm) { | 
|  | 1150 | case 0x4: | 
|  | 1151 | case 0xc:   /* in case REXW.b is set */ | 
|  | 1152 | insn->eaBase = (insn->addressSize == 4 ? | 
|  | 1153 | EA_BASE_sib : EA_BASE_sib64); | 
|  | 1154 | readSIB(insn); | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1155 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1156 | return -1; | 
|  | 1157 | break; | 
|  | 1158 | case 0x5: | 
|  | 1159 | insn->eaBase = EA_BASE_NONE; | 
|  | 1160 | insn->eaDisplacement = EA_DISP_32; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1161 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1162 | return -1; | 
|  | 1163 | break; | 
|  | 1164 | default: | 
|  | 1165 | insn->eaBase = (EABase)(insn->eaBaseBase + rm); | 
|  | 1166 | break; | 
|  | 1167 | } | 
|  | 1168 | break; | 
|  | 1169 | case 0x1: | 
|  | 1170 | case 0x2: | 
|  | 1171 | insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); | 
|  | 1172 | switch (rm) { | 
|  | 1173 | case 0x4: | 
|  | 1174 | case 0xc:   /* in case REXW.b is set */ | 
|  | 1175 | insn->eaBase = EA_BASE_sib; | 
|  | 1176 | readSIB(insn); | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1177 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1178 | return -1; | 
|  | 1179 | break; | 
|  | 1180 | default: | 
|  | 1181 | insn->eaBase = (EABase)(insn->eaBaseBase + rm); | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1182 | if (readDisplacement(insn)) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1183 | return -1; | 
|  | 1184 | break; | 
|  | 1185 | } | 
|  | 1186 | break; | 
|  | 1187 | case 0x3: | 
|  | 1188 | insn->eaDisplacement = EA_DISP_NONE; | 
|  | 1189 | insn->eaBase = (EABase)(insn->eaRegBase + rm); | 
|  | 1190 | break; | 
|  | 1191 | } | 
|  | 1192 | break; | 
|  | 1193 | } /* switch (insn->addressSize) */ | 
|  | 1194 |  | 
|  | 1195 | return 0; | 
|  | 1196 | } | 
|  | 1197 |  | 
|  | 1198 | #define GENERIC_FIXUP_FUNC(name, base, prefix)            \ | 
|  | 1199 | static uint8_t name(struct InternalInstruction *insn,   \ | 
|  | 1200 | OperandType type,                   \ | 
|  | 1201 | uint8_t index,                      \ | 
|  | 1202 | uint8_t *valid) {                   \ | 
|  | 1203 | *valid = 1;                                           \ | 
|  | 1204 | switch (type) {                                       \ | 
|  | 1205 | default:                                              \ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1206 | debug("Unhandled register type");                   \ | 
|  | 1207 | *valid = 0;                                         \ | 
|  | 1208 | return 0;                                           \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1209 | case TYPE_Rv:                                         \ | 
|  | 1210 | return base + index;                                \ | 
|  | 1211 | case TYPE_R8:                                         \ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1212 | if (insn->rexPrefix &&                              \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1213 | index >= 4 && index <= 7) {                      \ | 
|  | 1214 | return prefix##_SPL + (index - 4);                \ | 
|  | 1215 | } else {                                            \ | 
|  | 1216 | return prefix##_AL + index;                       \ | 
|  | 1217 | }                                                   \ | 
|  | 1218 | case TYPE_R16:                                        \ | 
|  | 1219 | return prefix##_AX + index;                         \ | 
|  | 1220 | case TYPE_R32:                                        \ | 
|  | 1221 | return prefix##_EAX + index;                        \ | 
|  | 1222 | case TYPE_R64:                                        \ | 
|  | 1223 | return prefix##_RAX + index;                        \ | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1224 | case TYPE_XMM256:                                     \ | 
|  | 1225 | return prefix##_YMM0 + index;                       \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1226 | case TYPE_XMM128:                                     \ | 
|  | 1227 | case TYPE_XMM64:                                      \ | 
|  | 1228 | case TYPE_XMM32:                                      \ | 
|  | 1229 | case TYPE_XMM:                                        \ | 
|  | 1230 | return prefix##_XMM0 + index;                       \ | 
|  | 1231 | case TYPE_MM64:                                       \ | 
|  | 1232 | case TYPE_MM32:                                       \ | 
|  | 1233 | case TYPE_MM:                                         \ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1234 | if (index > 7)                                      \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1235 | *valid = 0;                                       \ | 
|  | 1236 | return prefix##_MM0 + index;                        \ | 
|  | 1237 | case TYPE_SEGMENTREG:                                 \ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1238 | if (index > 5)                                      \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1239 | *valid = 0;                                       \ | 
|  | 1240 | return prefix##_ES + index;                         \ | 
|  | 1241 | case TYPE_DEBUGREG:                                   \ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1242 | if (index > 7)                                      \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1243 | *valid = 0;                                       \ | 
|  | 1244 | return prefix##_DR0 + index;                        \ | 
| Sean Callanan | e7e1cf9 | 2010-05-06 20:59:00 +0000 | [diff] [blame] | 1245 | case TYPE_CONTROLREG:                                 \ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1246 | if (index > 8)                                      \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1247 | *valid = 0;                                       \ | 
| Sean Callanan | e7e1cf9 | 2010-05-06 20:59:00 +0000 | [diff] [blame] | 1248 | return prefix##_CR0 + index;                        \ | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1249 | }                                                     \ | 
|  | 1250 | } | 
|  | 1251 |  | 
|  | 1252 | /* | 
|  | 1253 | * fixup*Value - Consults an operand type to determine the meaning of the | 
|  | 1254 | *   reg or R/M field.  If the operand is an XMM operand, for example, an | 
|  | 1255 | *   operand would be XMM0 instead of AX, which readModRM() would otherwise | 
|  | 1256 | *   misinterpret it as. | 
|  | 1257 | * | 
|  | 1258 | * @param insn  - The instruction containing the operand. | 
|  | 1259 | * @param type  - The operand type. | 
|  | 1260 | * @param index - The existing value of the field as reported by readModRM(). | 
|  | 1261 | * @param valid - The address of a uint8_t.  The target is set to 1 if the | 
|  | 1262 | *                field is valid for the register class; 0 if not. | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1263 | * @return      - The proper value. | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1264 | */ | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1265 | GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1266 | GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG) | 
|  | 1267 |  | 
|  | 1268 | /* | 
|  | 1269 | * fixupReg - Consults an operand specifier to determine which of the | 
|  | 1270 | *   fixup*Value functions to use in correcting readModRM()'ss interpretation. | 
|  | 1271 | * | 
|  | 1272 | * @param insn  - See fixup*Value(). | 
|  | 1273 | * @param op    - The operand specifier. | 
|  | 1274 | * @return      - 0 if fixup was successful; -1 if the register returned was | 
|  | 1275 | *                invalid for its class. | 
|  | 1276 | */ | 
|  | 1277 | static int fixupReg(struct InternalInstruction *insn, | 
| Benjamin Kramer | de0a4fb | 2010-10-23 09:10:44 +0000 | [diff] [blame] | 1278 | const struct OperandSpecifier *op) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1279 | uint8_t valid; | 
|  | 1280 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1281 | dbgprintf(insn, "fixupReg()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1282 |  | 
|  | 1283 | switch ((OperandEncoding)op->encoding) { | 
|  | 1284 | default: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1285 | debug("Expected a REG or R/M encoding in fixupReg"); | 
|  | 1286 | return -1; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1287 | case ENCODING_VVVV: | 
|  | 1288 | insn->vvvv = (Reg)fixupRegValue(insn, | 
|  | 1289 | (OperandType)op->type, | 
|  | 1290 | insn->vvvv, | 
|  | 1291 | &valid); | 
|  | 1292 | if (!valid) | 
|  | 1293 | return -1; | 
|  | 1294 | break; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1295 | case ENCODING_REG: | 
|  | 1296 | insn->reg = (Reg)fixupRegValue(insn, | 
|  | 1297 | (OperandType)op->type, | 
|  | 1298 | insn->reg - insn->regBase, | 
|  | 1299 | &valid); | 
|  | 1300 | if (!valid) | 
|  | 1301 | return -1; | 
|  | 1302 | break; | 
|  | 1303 | case ENCODING_RM: | 
|  | 1304 | if (insn->eaBase >= insn->eaRegBase) { | 
|  | 1305 | insn->eaBase = (EABase)fixupRMValue(insn, | 
|  | 1306 | (OperandType)op->type, | 
|  | 1307 | insn->eaBase - insn->eaRegBase, | 
|  | 1308 | &valid); | 
|  | 1309 | if (!valid) | 
|  | 1310 | return -1; | 
|  | 1311 | } | 
|  | 1312 | break; | 
|  | 1313 | } | 
|  | 1314 |  | 
|  | 1315 | return 0; | 
|  | 1316 | } | 
|  | 1317 |  | 
|  | 1318 | /* | 
|  | 1319 | * readOpcodeModifier - Reads an operand from the opcode field of an | 
|  | 1320 | *   instruction.  Handles AddRegFrm instructions. | 
|  | 1321 | * | 
|  | 1322 | * @param insn    - The instruction whose opcode field is to be read. | 
|  | 1323 | * @param inModRM - Indicates that the opcode field is to be read from the | 
|  | 1324 | *                  ModR/M extension; useful for escape opcodes | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1325 | * @return        - 0 on success; nonzero otherwise. | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1326 | */ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1327 | static int readOpcodeModifier(struct InternalInstruction* insn) { | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1328 | dbgprintf(insn, "readOpcodeModifier()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1329 |  | 
|  | 1330 | if (insn->consumedOpcodeModifier) | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1331 | return 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1332 |  | 
|  | 1333 | insn->consumedOpcodeModifier = TRUE; | 
|  | 1334 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1335 | switch (insn->spec->modifierType) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1336 | default: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1337 | debug("Unknown modifier type."); | 
|  | 1338 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1339 | case MODIFIER_NONE: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1340 | debug("No modifier but an operand expects one."); | 
|  | 1341 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1342 | case MODIFIER_OPCODE: | 
|  | 1343 | insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1344 | return 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1345 | case MODIFIER_MODRM: | 
|  | 1346 | insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1347 | return 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1348 | } | 
|  | 1349 | } | 
|  | 1350 |  | 
|  | 1351 | /* | 
|  | 1352 | * readOpcodeRegister - Reads an operand from the opcode field of an | 
|  | 1353 | *   instruction and interprets it appropriately given the operand width. | 
|  | 1354 | *   Handles AddRegFrm instructions. | 
|  | 1355 | * | 
|  | 1356 | * @param insn  - See readOpcodeModifier(). | 
|  | 1357 | * @param size  - The width (in bytes) of the register being specified. | 
|  | 1358 | *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means | 
|  | 1359 | *                RAX. | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1360 | * @return      - 0 on success; nonzero otherwise. | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1361 | */ | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1362 | static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1363 | dbgprintf(insn, "readOpcodeRegister()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1364 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1365 | if (readOpcodeModifier(insn)) | 
|  | 1366 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1367 |  | 
|  | 1368 | if (size == 0) | 
|  | 1369 | size = insn->registerSize; | 
|  | 1370 |  | 
|  | 1371 | switch (size) { | 
|  | 1372 | case 1: | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1373 | insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | 
|  | 1374 | | insn->opcodeModifier)); | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1375 | if (insn->rexPrefix && | 
|  | 1376 | insn->opcodeRegister >= MODRM_REG_AL + 0x4 && | 
|  | 1377 | insn->opcodeRegister < MODRM_REG_AL + 0x8) { | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1378 | insn->opcodeRegister = (Reg)(MODRM_REG_SPL | 
|  | 1379 | + (insn->opcodeRegister - MODRM_REG_AL - 4)); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1380 | } | 
|  | 1381 |  | 
|  | 1382 | break; | 
|  | 1383 | case 2: | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1384 | insn->opcodeRegister = (Reg)(MODRM_REG_AX | 
|  | 1385 | + ((bFromREX(insn->rexPrefix) << 3) | 
|  | 1386 | | insn->opcodeModifier)); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1387 | break; | 
|  | 1388 | case 4: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1389 | insn->opcodeRegister = (Reg)(MODRM_REG_EAX | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1390 | + ((bFromREX(insn->rexPrefix) << 3) | 
|  | 1391 | | insn->opcodeModifier)); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1392 | break; | 
|  | 1393 | case 8: | 
| Sean Callanan | 2f9443f | 2009-12-22 02:07:42 +0000 | [diff] [blame] | 1394 | insn->opcodeRegister = (Reg)(MODRM_REG_RAX | 
|  | 1395 | + ((bFromREX(insn->rexPrefix) << 3) | 
|  | 1396 | | insn->opcodeModifier)); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1397 | break; | 
|  | 1398 | } | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1399 |  | 
|  | 1400 | return 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1401 | } | 
|  | 1402 |  | 
|  | 1403 | /* | 
|  | 1404 | * readImmediate - Consumes an immediate operand from an instruction, given the | 
|  | 1405 | *   desired operand size. | 
|  | 1406 | * | 
|  | 1407 | * @param insn  - The instruction whose operand is to be read. | 
|  | 1408 | * @param size  - The width (in bytes) of the operand. | 
|  | 1409 | * @return      - 0 if the immediate was successfully consumed; nonzero | 
|  | 1410 | *                otherwise. | 
|  | 1411 | */ | 
|  | 1412 | static int readImmediate(struct InternalInstruction* insn, uint8_t size) { | 
|  | 1413 | uint8_t imm8; | 
|  | 1414 | uint16_t imm16; | 
|  | 1415 | uint32_t imm32; | 
|  | 1416 | uint64_t imm64; | 
|  | 1417 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1418 | dbgprintf(insn, "readImmediate()"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1419 |  | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1420 | if (insn->numImmediatesConsumed == 2) { | 
|  | 1421 | debug("Already consumed two immediates"); | 
|  | 1422 | return -1; | 
|  | 1423 | } | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1424 |  | 
|  | 1425 | if (size == 0) | 
|  | 1426 | size = insn->immediateSize; | 
|  | 1427 | else | 
|  | 1428 | insn->immediateSize = size; | 
| Kevin Enderby | 6fbcd8d | 2012-02-23 18:18:17 +0000 | [diff] [blame] | 1429 | insn->immediateOffset = insn->readerCursor - insn->startLocation; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1430 |  | 
|  | 1431 | switch (size) { | 
|  | 1432 | case 1: | 
|  | 1433 | if (consumeByte(insn, &imm8)) | 
|  | 1434 | return -1; | 
|  | 1435 | insn->immediates[insn->numImmediatesConsumed] = imm8; | 
|  | 1436 | break; | 
|  | 1437 | case 2: | 
|  | 1438 | if (consumeUInt16(insn, &imm16)) | 
|  | 1439 | return -1; | 
|  | 1440 | insn->immediates[insn->numImmediatesConsumed] = imm16; | 
|  | 1441 | break; | 
|  | 1442 | case 4: | 
|  | 1443 | if (consumeUInt32(insn, &imm32)) | 
|  | 1444 | return -1; | 
|  | 1445 | insn->immediates[insn->numImmediatesConsumed] = imm32; | 
|  | 1446 | break; | 
|  | 1447 | case 8: | 
|  | 1448 | if (consumeUInt64(insn, &imm64)) | 
|  | 1449 | return -1; | 
|  | 1450 | insn->immediates[insn->numImmediatesConsumed] = imm64; | 
|  | 1451 | break; | 
|  | 1452 | } | 
|  | 1453 |  | 
|  | 1454 | insn->numImmediatesConsumed++; | 
|  | 1455 |  | 
|  | 1456 | return 0; | 
|  | 1457 | } | 
|  | 1458 |  | 
|  | 1459 | /* | 
| Craig Topper | 8dd7bbc | 2011-09-13 07:37:44 +0000 | [diff] [blame] | 1460 | * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1461 | * | 
|  | 1462 | * @param insn  - The instruction whose operand is to be read. | 
| Craig Topper | 8dd7bbc | 2011-09-13 07:37:44 +0000 | [diff] [blame] | 1463 | * @return      - 0 if the vvvv was successfully consumed; nonzero | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1464 | *                otherwise. | 
|  | 1465 | */ | 
|  | 1466 | static int readVVVV(struct InternalInstruction* insn) { | 
|  | 1467 | dbgprintf(insn, "readVVVV()"); | 
|  | 1468 |  | 
|  | 1469 | if (insn->vexSize == 3) | 
|  | 1470 | insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); | 
|  | 1471 | else if (insn->vexSize == 2) | 
|  | 1472 | insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); | 
|  | 1473 | else | 
|  | 1474 | return -1; | 
|  | 1475 |  | 
| Craig Topper | 0d0be47 | 2011-10-03 08:14:29 +0000 | [diff] [blame] | 1476 | if (insn->mode != MODE_64BIT) | 
|  | 1477 | insn->vvvv &= 0x7; | 
|  | 1478 |  | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1479 | return 0; | 
|  | 1480 | } | 
|  | 1481 |  | 
|  | 1482 | /* | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1483 | * readOperands - Consults the specifier for an instruction and consumes all | 
|  | 1484 | *   operands for that instruction, interpreting them as it goes. | 
|  | 1485 | * | 
|  | 1486 | * @param insn  - The instruction whose operands are to be read and interpreted. | 
|  | 1487 | * @return      - 0 if all operands could be read; nonzero otherwise. | 
|  | 1488 | */ | 
|  | 1489 | static int readOperands(struct InternalInstruction* insn) { | 
|  | 1490 | int index; | 
| Craig Topper | 8dd7bbc | 2011-09-13 07:37:44 +0000 | [diff] [blame] | 1491 | int hasVVVV, needVVVV; | 
| Craig Topper | 2ba766a | 2011-12-30 06:23:39 +0000 | [diff] [blame] | 1492 | int sawRegImm = 0; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1493 |  | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1494 | dbgprintf(insn, "readOperands()"); | 
| Craig Topper | 8dd7bbc | 2011-09-13 07:37:44 +0000 | [diff] [blame] | 1495 |  | 
|  | 1496 | /* If non-zero vvvv specified, need to make sure one of the operands | 
|  | 1497 | uses it. */ | 
|  | 1498 | hasVVVV = !readVVVV(insn); | 
|  | 1499 | needVVVV = hasVVVV && (insn->vvvv != 0); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1500 |  | 
|  | 1501 | for (index = 0; index < X86_MAX_OPERANDS; ++index) { | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1502 | switch (x86OperandSets[insn->spec->operands][index].encoding) { | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1503 | case ENCODING_NONE: | 
|  | 1504 | break; | 
|  | 1505 | case ENCODING_REG: | 
|  | 1506 | case ENCODING_RM: | 
|  | 1507 | if (readModRM(insn)) | 
|  | 1508 | return -1; | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1509 | if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1510 | return -1; | 
|  | 1511 | break; | 
|  | 1512 | case ENCODING_CB: | 
|  | 1513 | case ENCODING_CW: | 
|  | 1514 | case ENCODING_CD: | 
|  | 1515 | case ENCODING_CP: | 
|  | 1516 | case ENCODING_CO: | 
|  | 1517 | case ENCODING_CT: | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1518 | dbgprintf(insn, "We currently don't hande code-offset encodings"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1519 | return -1; | 
|  | 1520 | case ENCODING_IB: | 
| Craig Topper | 2ba766a | 2011-12-30 06:23:39 +0000 | [diff] [blame] | 1521 | if (sawRegImm) { | 
| Benjamin Kramer | 9c48f26 | 2012-01-04 22:06:45 +0000 | [diff] [blame] | 1522 | /* Saw a register immediate so don't read again and instead split the | 
|  | 1523 | previous immediate.  FIXME: This is a hack. */ | 
| Benjamin Kramer | 47aecca | 2012-01-01 17:55:36 +0000 | [diff] [blame] | 1524 | insn->immediates[insn->numImmediatesConsumed] = | 
|  | 1525 | insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; | 
|  | 1526 | ++insn->numImmediatesConsumed; | 
| Craig Topper | 2ba766a | 2011-12-30 06:23:39 +0000 | [diff] [blame] | 1527 | break; | 
|  | 1528 | } | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1529 | if (readImmediate(insn, 1)) | 
|  | 1530 | return -1; | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1531 | if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && | 
| Sean Callanan | 1efe661 | 2010-04-07 21:42:19 +0000 | [diff] [blame] | 1532 | insn->immediates[insn->numImmediatesConsumed - 1] > 7) | 
|  | 1533 | return -1; | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1534 | if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && | 
| Craig Topper | 7629d63 | 2012-04-03 05:20:24 +0000 | [diff] [blame] | 1535 | insn->immediates[insn->numImmediatesConsumed - 1] > 31) | 
|  | 1536 | return -1; | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1537 | if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || | 
|  | 1538 | x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) | 
| Craig Topper | 2ba766a | 2011-12-30 06:23:39 +0000 | [diff] [blame] | 1539 | sawRegImm = 1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1540 | break; | 
|  | 1541 | case ENCODING_IW: | 
|  | 1542 | if (readImmediate(insn, 2)) | 
|  | 1543 | return -1; | 
|  | 1544 | break; | 
|  | 1545 | case ENCODING_ID: | 
|  | 1546 | if (readImmediate(insn, 4)) | 
|  | 1547 | return -1; | 
|  | 1548 | break; | 
|  | 1549 | case ENCODING_IO: | 
|  | 1550 | if (readImmediate(insn, 8)) | 
|  | 1551 | return -1; | 
|  | 1552 | break; | 
|  | 1553 | case ENCODING_Iv: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1554 | if (readImmediate(insn, insn->immediateSize)) | 
|  | 1555 | return -1; | 
| Chris Lattner | d4758fc | 2010-04-16 21:15:15 +0000 | [diff] [blame] | 1556 | break; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1557 | case ENCODING_Ia: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1558 | if (readImmediate(insn, insn->addressSize)) | 
|  | 1559 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1560 | break; | 
|  | 1561 | case ENCODING_RB: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1562 | if (readOpcodeRegister(insn, 1)) | 
|  | 1563 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1564 | break; | 
|  | 1565 | case ENCODING_RW: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1566 | if (readOpcodeRegister(insn, 2)) | 
|  | 1567 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1568 | break; | 
|  | 1569 | case ENCODING_RD: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1570 | if (readOpcodeRegister(insn, 4)) | 
|  | 1571 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1572 | break; | 
|  | 1573 | case ENCODING_RO: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1574 | if (readOpcodeRegister(insn, 8)) | 
|  | 1575 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1576 | break; | 
|  | 1577 | case ENCODING_Rv: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1578 | if (readOpcodeRegister(insn, 0)) | 
|  | 1579 | return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1580 | break; | 
|  | 1581 | case ENCODING_I: | 
| Sean Callanan | 010b373 | 2010-04-02 21:23:51 +0000 | [diff] [blame] | 1582 | if (readOpcodeModifier(insn)) | 
|  | 1583 | return -1; | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1584 | break; | 
|  | 1585 | case ENCODING_VVVV: | 
| Craig Topper | 8dd7bbc | 2011-09-13 07:37:44 +0000 | [diff] [blame] | 1586 | needVVVV = 0; /* Mark that we have found a VVVV operand. */ | 
|  | 1587 | if (!hasVVVV) | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1588 | return -1; | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1589 | if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) | 
| Sean Callanan | c3fd523 | 2011-03-15 01:23:15 +0000 | [diff] [blame] | 1590 | return -1; | 
|  | 1591 | break; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1592 | case ENCODING_DUP: | 
|  | 1593 | break; | 
|  | 1594 | default: | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1595 | dbgprintf(insn, "Encountered an operand with an unknown encoding."); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1596 | return -1; | 
|  | 1597 | } | 
|  | 1598 | } | 
| Craig Topper | 8dd7bbc | 2011-09-13 07:37:44 +0000 | [diff] [blame] | 1599 |  | 
|  | 1600 | /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ | 
|  | 1601 | if (needVVVV) return -1; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1602 |  | 
|  | 1603 | return 0; | 
|  | 1604 | } | 
|  | 1605 |  | 
|  | 1606 | /* | 
|  | 1607 | * decodeInstruction - Reads and interprets a full instruction provided by the | 
|  | 1608 | *   user. | 
|  | 1609 | * | 
|  | 1610 | * @param insn      - A pointer to the instruction to be populated.  Must be | 
|  | 1611 | *                    pre-allocated. | 
|  | 1612 | * @param reader    - The function to be used to read the instruction's bytes. | 
|  | 1613 | * @param readerArg - A generic argument to be passed to the reader to store | 
|  | 1614 | *                    any internal state. | 
|  | 1615 | * @param logger    - If non-NULL, the function to be used to write log messages | 
|  | 1616 | *                    and warnings. | 
|  | 1617 | * @param loggerArg - A generic argument to be passed to the logger to store | 
|  | 1618 | *                    any internal state. | 
|  | 1619 | * @param startLoc  - The address (in the reader's address space) of the first | 
|  | 1620 | *                    byte in the instruction. | 
|  | 1621 | * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to | 
|  | 1622 | *                    decode the instruction in. | 
|  | 1623 | * @return          - 0 if the instruction's memory could be read; nonzero if | 
|  | 1624 | *                    not. | 
|  | 1625 | */ | 
|  | 1626 | int decodeInstruction(struct InternalInstruction* insn, | 
|  | 1627 | byteReader_t reader, | 
| Roman Divacky | 6792380 | 2012-09-05 21:17:34 +0000 | [diff] [blame] | 1628 | const void* readerArg, | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1629 | dlog_t logger, | 
|  | 1630 | void* loggerArg, | 
| Roman Divacky | 6792380 | 2012-09-05 21:17:34 +0000 | [diff] [blame] | 1631 | const void* miiArg, | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1632 | uint64_t startLoc, | 
|  | 1633 | DisassemblerMode mode) { | 
| Daniel Dunbar | c745a62 | 2009-12-19 03:31:50 +0000 | [diff] [blame] | 1634 | memset(insn, 0, sizeof(struct InternalInstruction)); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1635 |  | 
|  | 1636 | insn->reader = reader; | 
|  | 1637 | insn->readerArg = readerArg; | 
|  | 1638 | insn->dlog = logger; | 
|  | 1639 | insn->dlogArg = loggerArg; | 
|  | 1640 | insn->startLocation = startLoc; | 
|  | 1641 | insn->readerCursor = startLoc; | 
|  | 1642 | insn->mode = mode; | 
|  | 1643 | insn->numImmediatesConsumed = 0; | 
|  | 1644 |  | 
|  | 1645 | if (readPrefixes(insn)       || | 
|  | 1646 | readOpcode(insn)         || | 
| Benjamin Kramer | 478e8de | 2012-02-11 14:50:54 +0000 | [diff] [blame] | 1647 | getID(insn, miiArg)      || | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1648 | insn->instructionID == 0 || | 
|  | 1649 | readOperands(insn)) | 
|  | 1650 | return -1; | 
| Craig Topper | b8aec08 | 2012-08-01 07:39:18 +0000 | [diff] [blame] | 1651 |  | 
|  | 1652 | insn->operands = &x86OperandSets[insn->spec->operands][0]; | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1653 |  | 
|  | 1654 | insn->length = insn->readerCursor - insn->startLocation; | 
|  | 1655 |  | 
| Benjamin Kramer | 4f67227 | 2010-03-18 12:18:36 +0000 | [diff] [blame] | 1656 | dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", | 
|  | 1657 | startLoc, insn->readerCursor, insn->length); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1658 |  | 
|  | 1659 | if (insn->length > 15) | 
| Nuno Lopes | 3ed6d60 | 2009-12-19 12:07:00 +0000 | [diff] [blame] | 1660 | dbgprintf(insn, "Instruction exceeds 15-byte limit"); | 
| Sean Callanan | 04cc307 | 2009-12-19 02:59:52 +0000 | [diff] [blame] | 1661 |  | 
|  | 1662 | return 0; | 
|  | 1663 | } |