blob: 70315ed572b40c3f37e954ea98da2e40859a8dc6 [file] [log] [blame]
Sean Callanan8ed9f512009-12-19 02:59:52 +00001/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains common definitions used by both the disassembler and the table
12 * generator.
13 * Documentation for the disassembler can be found in X86Disassembler.h.
14 *
15 *===----------------------------------------------------------------------===*/
16
17/*
18 * This header file provides those definitions that need to be shared between
19 * the decoder and the table generator in a C-friendly manner.
20 */
21
22#ifndef X86DISASSEMBLERDECODERCOMMON_H
23#define X86DISASSEMBLERDECODERCOMMON_H
24
Michael J. Spencer1f6efa32010-11-29 18:16:10 +000025#include "llvm/Support/DataTypes.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000026
27#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
28#define CONTEXTS_SYM x86DisassemblerContexts
29#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
30#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
31#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
32#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
Joerg Sonnenberger4a8ac8d2011-04-04 16:58:13 +000033#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
34#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
Sean Callanan8ed9f512009-12-19 02:59:52 +000035
36#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
37#define CONTEXTS_STR "x86DisassemblerContexts"
38#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"
39#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
40#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
41#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
Joerg Sonnenberger4a8ac8d2011-04-04 16:58:13 +000042#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
43#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
Sean Callanan8ed9f512009-12-19 02:59:52 +000044
45/*
46 * Attributes of an instruction that must be known before the opcode can be
47 * processed correctly. Most of these indicate the presence of particular
48 * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
49 */
50#define ATTRIBUTE_BITS \
51 ENUM_ENTRY(ATTR_NONE, 0x00) \
52 ENUM_ENTRY(ATTR_64BIT, 0x01) \
53 ENUM_ENTRY(ATTR_XS, 0x02) \
54 ENUM_ENTRY(ATTR_XD, 0x04) \
55 ENUM_ENTRY(ATTR_REXW, 0x08) \
Sean Callanana21e2ea2011-03-15 01:23:15 +000056 ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
57 ENUM_ENTRY(ATTR_VEX, 0x20) \
58 ENUM_ENTRY(ATTR_VEXL, 0x40)
Sean Callanan8ed9f512009-12-19 02:59:52 +000059
60#define ENUM_ENTRY(n, v) n = v,
61enum attributeBits {
62 ATTRIBUTE_BITS
63 ATTR_max
64};
65#undef ENUM_ENTRY
66
67/*
68 * Combinations of the above attributes that are relevant to instruction
69 * decode. Although other combinations are possible, they can be reduced to
70 * these without affecting the ultimately decoded instruction.
71 */
72
73/* Class name Rank Rationale for rank assignment */
74#define INSTRUCTION_CONTEXTS \
75 ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
76 ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
77 "64-bit mode but no more") \
78 ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \
79 "operands change width") \
80 ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \
81 "but not the operands") \
82 ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \
83 "but not the operands") \
84 ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\
85 "change width; overrides IC_OPSIZE") \
86 ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \
87 ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \
88 "secondary") \
89 ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \
90 ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \
91 "opcode") \
92 ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \
93 "IC_64BIT_REXW_XS") \
94 ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
95 "else because this changes most " \
Sean Callanana21e2ea2011-03-15 01:23:15 +000096 "operands' meaning") \
97 ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
98 ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
99 ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
100 ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
101 ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
102 ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
103 ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
104 ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
105 ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
106 ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
107 ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
108 ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
109
Sean Callanan8ed9f512009-12-19 02:59:52 +0000110
111#define ENUM_ENTRY(n, r, d) n,
112typedef enum {
113 INSTRUCTION_CONTEXTS
114 IC_max
115} InstructionContext;
116#undef ENUM_ENTRY
117
118/*
119 * Opcode types, which determine which decode table to use, both in the Intel
120 * manual and also for the decoder.
121 */
122typedef enum {
123 ONEBYTE = 0,
124 TWOBYTE = 1,
125 THREEBYTE_38 = 2,
Joerg Sonnenberger4a8ac8d2011-04-04 16:58:13 +0000126 THREEBYTE_3A = 3,
127 THREEBYTE_A6 = 4,
128 THREEBYTE_A7 = 5
Sean Callanan8ed9f512009-12-19 02:59:52 +0000129} OpcodeType;
130
131/*
132 * The following structs are used for the hierarchical decode table. After
133 * determining the instruction's class (i.e., which IC_* constant applies to
134 * it), the decoder reads the opcode. Some instructions require specific
135 * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
136 *
137 * If a ModR/M byte is not required, "required" is left unset, and the values
138 * for each instructionID are identical.
139 */
140
141typedef uint16_t InstrUID;
142
143/*
144 * ModRMDecisionType - describes the type of ModR/M decision, allowing the
145 * consumer to determine the number of entries in it.
146 *
147 * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
148 * instruction is the same.
149 * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
150 * corresponds to one instruction; otherwise, it corresponds to
151 * a different instruction.
152 * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
153 * to a different instruction.
154 */
155
156#define MODRMTYPES \
157 ENUM_ENTRY(MODRM_ONEENTRY) \
158 ENUM_ENTRY(MODRM_SPLITRM) \
159 ENUM_ENTRY(MODRM_FULL)
160
161#define ENUM_ENTRY(n) n,
162typedef enum {
163 MODRMTYPES
164 MODRM_max
165} ModRMDecisionType;
166#undef ENUM_ENTRY
167
168/*
169 * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
170 * instruction each possible value of the ModR/M byte corresponds to. Once
171 * this information is known, we have narrowed down to a single instruction.
172 */
173struct ModRMDecision {
174 uint8_t modrm_type;
175
176 /* The macro below must be defined wherever this file is included. */
177 INSTRUCTION_IDS
178};
179
180/*
181 * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
182 * given a particular opcode.
183 */
184struct OpcodeDecision {
185 struct ModRMDecision modRMDecisions[256];
186};
187
188/*
189 * ContextDecision - Specifies which opcode->instruction tables to look at given
190 * a particular context (set of attributes). Since there are many possible
191 * contexts, the decoder first uses CONTEXTS_SYM to determine which context
192 * applies given a specific set of attributes. Hence there are only IC_max
193 * entries in this table, rather than 2^(ATTR_max).
194 */
195struct ContextDecision {
196 struct OpcodeDecision opcodeDecisions[IC_max];
197};
198
199/*
200 * Physical encodings of instruction operands.
201 */
202
203#define ENCODINGS \
204 ENUM_ENTRY(ENCODING_NONE, "") \
205 ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
206 ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
Sean Callanana21e2ea2011-03-15 01:23:15 +0000207 ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000208 ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
209 ENUM_ENTRY(ENCODING_CW, "2-byte") \
210 ENUM_ENTRY(ENCODING_CD, "4-byte") \
211 ENUM_ENTRY(ENCODING_CP, "6-byte") \
212 ENUM_ENTRY(ENCODING_CO, "8-byte") \
213 ENUM_ENTRY(ENCODING_CT, "10-byte") \
214 ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \
215 ENUM_ENTRY(ENCODING_IW, "2-byte") \
216 ENUM_ENTRY(ENCODING_ID, "4-byte") \
217 ENUM_ENTRY(ENCODING_IO, "8-byte") \
218 ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \
219 "the opcode byte") \
220 ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \
221 ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \
222 ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \
223 ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \
224 "opcode byte") \
225 \
226 ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \
227 ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \
228 ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \
229 "opcode byte") \
230 ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \
231 "in type")
232
233#define ENUM_ENTRY(n, d) n,
234 typedef enum {
235 ENCODINGS
236 ENCODING_max
237 } OperandEncoding;
238#undef ENUM_ENTRY
239
240/*
241 * Semantic interpretations of instruction operands.
242 */
243
244#define TYPES \
245 ENUM_ENTRY(TYPE_NONE, "") \
246 ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
247 ENUM_ENTRY(TYPE_REL16, "2-byte") \
248 ENUM_ENTRY(TYPE_REL32, "4-byte") \
249 ENUM_ENTRY(TYPE_REL64, "8-byte") \
250 ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \
251 ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \
252 ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \
253 ENUM_ENTRY(TYPE_R8, "1-byte register operand") \
254 ENUM_ENTRY(TYPE_R16, "2-byte") \
255 ENUM_ENTRY(TYPE_R32, "4-byte") \
256 ENUM_ENTRY(TYPE_R64, "8-byte") \
257 ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \
258 ENUM_ENTRY(TYPE_IMM16, "2-byte") \
259 ENUM_ENTRY(TYPE_IMM32, "4-byte") \
260 ENUM_ENTRY(TYPE_IMM64, "8-byte") \
Sean Callanan5edca812010-04-07 21:42:19 +0000261 ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000262 ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \
263 ENUM_ENTRY(TYPE_RM16, "2-byte") \
264 ENUM_ENTRY(TYPE_RM32, "4-byte") \
265 ENUM_ENTRY(TYPE_RM64, "8-byte") \
266 ENUM_ENTRY(TYPE_M, "Memory operand") \
267 ENUM_ENTRY(TYPE_M8, "1-byte") \
268 ENUM_ENTRY(TYPE_M16, "2-byte") \
269 ENUM_ENTRY(TYPE_M32, "4-byte") \
270 ENUM_ENTRY(TYPE_M64, "8-byte") \
Sean Callanan7fb35a22009-12-22 21:12:55 +0000271 ENUM_ENTRY(TYPE_LEA, "Effective address") \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000272 ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \
Chris Lattnerb2ef4c12010-09-29 02:57:56 +0000273 ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000274 ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \
275 ENUM_ENTRY(TYPE_M1632, "2+4-byte") \
276 ENUM_ENTRY(TYPE_M1664, "2+8-byte") \
277 ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \
278 ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \
279 ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \
280 ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \
281 ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \
282 "base)") \
283 ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \
284 ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \
285 ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \
286 ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \
287 "2 = SS, 3 = DS, 4 = FS, 5 = GS") \
288 ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \
289 ENUM_ENTRY(TYPE_M64FP, "64-bit") \
290 ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \
291 ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \
292 "floating-point instructions") \
293 ENUM_ENTRY(TYPE_M32INT, "4-byte") \
294 ENUM_ENTRY(TYPE_M64INT, "8-byte") \
295 ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \
296 ENUM_ENTRY(TYPE_MM, "MMX register operand") \
297 ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \
298 ENUM_ENTRY(TYPE_MM64, "8-byte") \
299 ENUM_ENTRY(TYPE_XMM, "XMM register operand") \
300 ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
301 ENUM_ENTRY(TYPE_XMM64, "8-byte") \
302 ENUM_ENTRY(TYPE_XMM128, "16-byte") \
Sean Callanana21e2ea2011-03-15 01:23:15 +0000303 ENUM_ENTRY(TYPE_XMM256, "32-byte") \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000304 ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
305 ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
306 ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
Sean Callanan1a8b7892010-05-06 20:59:00 +0000307 ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
Sean Callanan8ed9f512009-12-19 02:59:52 +0000308 \
309 ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
310 ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
311 ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \
312 ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \
313 ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \
314 ENUM_ENTRY(TYPE_DUP1, "operand 1") \
315 ENUM_ENTRY(TYPE_DUP2, "operand 2") \
316 ENUM_ENTRY(TYPE_DUP3, "operand 3") \
317 ENUM_ENTRY(TYPE_DUP4, "operand 4") \
318 ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
319
320#define ENUM_ENTRY(n, d) n,
321typedef enum {
322 TYPES
323 TYPE_max
324} OperandType;
325#undef ENUM_ENTRY
326
327/*
328 * OperandSpecifier - The specification for how to extract and interpret one
329 * operand.
330 */
331struct OperandSpecifier {
332 OperandEncoding encoding;
333 OperandType type;
334};
335
336/*
337 * Indicates where the opcode modifier (if any) is to be found. Extended
338 * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
339 */
340
341#define MODIFIER_TYPES \
342 ENUM_ENTRY(MODIFIER_NONE) \
343 ENUM_ENTRY(MODIFIER_OPCODE) \
344 ENUM_ENTRY(MODIFIER_MODRM)
345
346#define ENUM_ENTRY(n) n,
347typedef enum {
348 MODIFIER_TYPES
349 MODIFIER_max
350} ModifierType;
351#undef ENUM_ENTRY
352
353#define X86_MAX_OPERANDS 5
354
355/*
356 * The specification for how to extract and interpret a full instruction and
357 * its operands.
358 */
359struct InstructionSpecifier {
360 ModifierType modifierType;
361 uint8_t modifierBase;
362 struct OperandSpecifier operands[X86_MAX_OPERANDS];
363
364 /* The macro below must be defined wherever this file is included. */
365 INSTRUCTION_SPECIFIER_FIELDS
366};
367
368/*
369 * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
370 * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
371 * respectively.
372 */
373typedef enum {
374 MODE_16BIT,
375 MODE_32BIT,
376 MODE_64BIT
377} DisassemblerMode;
378
379#endif