blob: 46a74ab4e5291a609e4664bdd50c2892a43538e9 [file] [log] [blame]
Richard Smith89ee75d2014-04-20 21:07:34 +00001//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains the public interface of the instruction decoder.
12// Documentation for the disassembler can be found in X86Disassembler.h.
13//
14//===----------------------------------------------------------------------===//
Sean Callanan04cc3072009-12-19 02:59:52 +000015
Benjamin Kramera7c40ef2014-08-13 16:26:38 +000016#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
17#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
Sean Callanan04cc3072009-12-19 02:59:52 +000018
Sean Callanan04cc3072009-12-19 02:59:52 +000019#include "X86DisassemblerDecoderCommon.h"
Patrik Hagglund31998382014-04-28 12:12:27 +000020#include "llvm/ADT/ArrayRef.h"
Craig Topperfb39f972012-07-31 04:58:05 +000021
Richard Smith89ee75d2014-04-20 21:07:34 +000022namespace llvm {
23namespace X86Disassembler {
24
Richard Smith6a6967e2014-04-20 22:10:16 +000025// Accessor functions for various fields of an Intel instruction
Sean Callananc3fd5232011-03-15 01:23:15 +000026#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
27#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
28#define rmFromModRM(modRM) ((modRM) & 0x7)
29#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
30#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
31#define baseFromSIB(sib) ((sib) & 0x7)
32#define wFromREX(rex) (((rex) & 0x8) >> 3)
33#define rFromREX(rex) (((rex) & 0x4) >> 2)
34#define xFromREX(rex) (((rex) & 0x2) >> 1)
35#define bFromREX(rex) ((rex) & 0x1)
Craig Topperfb39f972012-07-31 04:58:05 +000036
Elena Demikhovsky371e3632013-12-25 11:40:51 +000037#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
38#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
39#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
40#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
41#define mmFromEVEX2of4(evex) ((evex) & 0x3)
42#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
43#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
44#define ppFromEVEX3of4(evex) ((evex) & 0x3)
45#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
46#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
47#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
48#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
49#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
50#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
51
Sean Callananc3fd5232011-03-15 01:23:15 +000052#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
53#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
54#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
55#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
56#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
57#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
58#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
59#define ppFromVEX3of3(vex) ((vex) & 0x3)
60
61#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
62#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
63#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
64#define ppFromVEX2of2(vex) ((vex) & 0x3)
Sean Callanan04cc3072009-12-19 02:59:52 +000065
Craig Topper9e3e38a2013-10-03 05:17:48 +000066#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
67#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
68#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
69#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
70#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
71#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
72#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
73#define ppFromXOP3of3(xop) ((xop) & 0x3)
74
Richard Smith6a6967e2014-04-20 22:10:16 +000075// These enums represent Intel registers for use by the decoder.
Sean Callanan04cc3072009-12-19 02:59:52 +000076#define REGS_8BIT \
77 ENTRY(AL) \
78 ENTRY(CL) \
79 ENTRY(DL) \
80 ENTRY(BL) \
81 ENTRY(AH) \
82 ENTRY(CH) \
83 ENTRY(DH) \
84 ENTRY(BH) \
85 ENTRY(R8B) \
86 ENTRY(R9B) \
87 ENTRY(R10B) \
88 ENTRY(R11B) \
89 ENTRY(R12B) \
90 ENTRY(R13B) \
91 ENTRY(R14B) \
92 ENTRY(R15B) \
93 ENTRY(SPL) \
94 ENTRY(BPL) \
95 ENTRY(SIL) \
96 ENTRY(DIL)
97
98#define EA_BASES_16BIT \
99 ENTRY(BX_SI) \
100 ENTRY(BX_DI) \
101 ENTRY(BP_SI) \
102 ENTRY(BP_DI) \
103 ENTRY(SI) \
104 ENTRY(DI) \
105 ENTRY(BP) \
106 ENTRY(BX) \
107 ENTRY(R8W) \
108 ENTRY(R9W) \
109 ENTRY(R10W) \
110 ENTRY(R11W) \
111 ENTRY(R12W) \
112 ENTRY(R13W) \
113 ENTRY(R14W) \
114 ENTRY(R15W)
115
116#define REGS_16BIT \
117 ENTRY(AX) \
118 ENTRY(CX) \
119 ENTRY(DX) \
120 ENTRY(BX) \
121 ENTRY(SP) \
122 ENTRY(BP) \
123 ENTRY(SI) \
124 ENTRY(DI) \
125 ENTRY(R8W) \
126 ENTRY(R9W) \
127 ENTRY(R10W) \
128 ENTRY(R11W) \
129 ENTRY(R12W) \
130 ENTRY(R13W) \
131 ENTRY(R14W) \
132 ENTRY(R15W)
133
134#define EA_BASES_32BIT \
135 ENTRY(EAX) \
136 ENTRY(ECX) \
137 ENTRY(EDX) \
138 ENTRY(EBX) \
139 ENTRY(sib) \
140 ENTRY(EBP) \
141 ENTRY(ESI) \
142 ENTRY(EDI) \
143 ENTRY(R8D) \
144 ENTRY(R9D) \
145 ENTRY(R10D) \
146 ENTRY(R11D) \
147 ENTRY(R12D) \
148 ENTRY(R13D) \
149 ENTRY(R14D) \
150 ENTRY(R15D)
151
152#define REGS_32BIT \
153 ENTRY(EAX) \
154 ENTRY(ECX) \
155 ENTRY(EDX) \
156 ENTRY(EBX) \
157 ENTRY(ESP) \
158 ENTRY(EBP) \
159 ENTRY(ESI) \
160 ENTRY(EDI) \
161 ENTRY(R8D) \
162 ENTRY(R9D) \
163 ENTRY(R10D) \
164 ENTRY(R11D) \
165 ENTRY(R12D) \
166 ENTRY(R13D) \
167 ENTRY(R14D) \
168 ENTRY(R15D)
169
170#define EA_BASES_64BIT \
171 ENTRY(RAX) \
172 ENTRY(RCX) \
173 ENTRY(RDX) \
174 ENTRY(RBX) \
175 ENTRY(sib64) \
176 ENTRY(RBP) \
177 ENTRY(RSI) \
178 ENTRY(RDI) \
179 ENTRY(R8) \
180 ENTRY(R9) \
181 ENTRY(R10) \
182 ENTRY(R11) \
183 ENTRY(R12) \
184 ENTRY(R13) \
185 ENTRY(R14) \
186 ENTRY(R15)
187
188#define REGS_64BIT \
189 ENTRY(RAX) \
190 ENTRY(RCX) \
191 ENTRY(RDX) \
192 ENTRY(RBX) \
193 ENTRY(RSP) \
194 ENTRY(RBP) \
195 ENTRY(RSI) \
196 ENTRY(RDI) \
197 ENTRY(R8) \
198 ENTRY(R9) \
199 ENTRY(R10) \
200 ENTRY(R11) \
201 ENTRY(R12) \
202 ENTRY(R13) \
203 ENTRY(R14) \
204 ENTRY(R15)
205
206#define REGS_MMX \
207 ENTRY(MM0) \
208 ENTRY(MM1) \
209 ENTRY(MM2) \
210 ENTRY(MM3) \
211 ENTRY(MM4) \
212 ENTRY(MM5) \
213 ENTRY(MM6) \
214 ENTRY(MM7)
215
216#define REGS_XMM \
217 ENTRY(XMM0) \
218 ENTRY(XMM1) \
219 ENTRY(XMM2) \
220 ENTRY(XMM3) \
221 ENTRY(XMM4) \
222 ENTRY(XMM5) \
223 ENTRY(XMM6) \
224 ENTRY(XMM7) \
225 ENTRY(XMM8) \
226 ENTRY(XMM9) \
227 ENTRY(XMM10) \
228 ENTRY(XMM11) \
229 ENTRY(XMM12) \
230 ENTRY(XMM13) \
231 ENTRY(XMM14) \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000232 ENTRY(XMM15) \
233 ENTRY(XMM16) \
234 ENTRY(XMM17) \
235 ENTRY(XMM18) \
236 ENTRY(XMM19) \
237 ENTRY(XMM20) \
238 ENTRY(XMM21) \
239 ENTRY(XMM22) \
240 ENTRY(XMM23) \
241 ENTRY(XMM24) \
242 ENTRY(XMM25) \
243 ENTRY(XMM26) \
244 ENTRY(XMM27) \
245 ENTRY(XMM28) \
246 ENTRY(XMM29) \
247 ENTRY(XMM30) \
248 ENTRY(XMM31)
Sean Callananc3fd5232011-03-15 01:23:15 +0000249
250#define REGS_YMM \
251 ENTRY(YMM0) \
252 ENTRY(YMM1) \
253 ENTRY(YMM2) \
254 ENTRY(YMM3) \
255 ENTRY(YMM4) \
256 ENTRY(YMM5) \
257 ENTRY(YMM6) \
258 ENTRY(YMM7) \
259 ENTRY(YMM8) \
260 ENTRY(YMM9) \
261 ENTRY(YMM10) \
262 ENTRY(YMM11) \
263 ENTRY(YMM12) \
264 ENTRY(YMM13) \
265 ENTRY(YMM14) \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000266 ENTRY(YMM15) \
267 ENTRY(YMM16) \
268 ENTRY(YMM17) \
269 ENTRY(YMM18) \
270 ENTRY(YMM19) \
271 ENTRY(YMM20) \
272 ENTRY(YMM21) \
273 ENTRY(YMM22) \
274 ENTRY(YMM23) \
275 ENTRY(YMM24) \
276 ENTRY(YMM25) \
277 ENTRY(YMM26) \
278 ENTRY(YMM27) \
279 ENTRY(YMM28) \
280 ENTRY(YMM29) \
281 ENTRY(YMM30) \
282 ENTRY(YMM31)
283
284#define REGS_ZMM \
285 ENTRY(ZMM0) \
286 ENTRY(ZMM1) \
287 ENTRY(ZMM2) \
288 ENTRY(ZMM3) \
289 ENTRY(ZMM4) \
290 ENTRY(ZMM5) \
291 ENTRY(ZMM6) \
292 ENTRY(ZMM7) \
293 ENTRY(ZMM8) \
294 ENTRY(ZMM9) \
295 ENTRY(ZMM10) \
296 ENTRY(ZMM11) \
297 ENTRY(ZMM12) \
298 ENTRY(ZMM13) \
299 ENTRY(ZMM14) \
300 ENTRY(ZMM15) \
301 ENTRY(ZMM16) \
302 ENTRY(ZMM17) \
303 ENTRY(ZMM18) \
304 ENTRY(ZMM19) \
305 ENTRY(ZMM20) \
306 ENTRY(ZMM21) \
307 ENTRY(ZMM22) \
308 ENTRY(ZMM23) \
309 ENTRY(ZMM24) \
310 ENTRY(ZMM25) \
311 ENTRY(ZMM26) \
312 ENTRY(ZMM27) \
313 ENTRY(ZMM28) \
314 ENTRY(ZMM29) \
315 ENTRY(ZMM30) \
316 ENTRY(ZMM31)
Craig Topperfb39f972012-07-31 04:58:05 +0000317
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000318#define REGS_MASKS \
319 ENTRY(K0) \
320 ENTRY(K1) \
321 ENTRY(K2) \
322 ENTRY(K3) \
323 ENTRY(K4) \
324 ENTRY(K5) \
325 ENTRY(K6) \
326 ENTRY(K7)
327
Sean Callanan04cc3072009-12-19 02:59:52 +0000328#define REGS_SEGMENT \
329 ENTRY(ES) \
330 ENTRY(CS) \
331 ENTRY(SS) \
332 ENTRY(DS) \
333 ENTRY(FS) \
334 ENTRY(GS)
Craig Topperfb39f972012-07-31 04:58:05 +0000335
Sean Callanan04cc3072009-12-19 02:59:52 +0000336#define REGS_DEBUG \
337 ENTRY(DR0) \
338 ENTRY(DR1) \
339 ENTRY(DR2) \
340 ENTRY(DR3) \
341 ENTRY(DR4) \
342 ENTRY(DR5) \
343 ENTRY(DR6) \
344 ENTRY(DR7)
345
Sean Callanane7e1cf92010-05-06 20:59:00 +0000346#define REGS_CONTROL \
347 ENTRY(CR0) \
348 ENTRY(CR1) \
349 ENTRY(CR2) \
350 ENTRY(CR3) \
351 ENTRY(CR4) \
352 ENTRY(CR5) \
353 ENTRY(CR6) \
354 ENTRY(CR7) \
Craig Topperd5b39232014-12-26 18:19:44 +0000355 ENTRY(CR8) \
356 ENTRY(CR9) \
357 ENTRY(CR10) \
358 ENTRY(CR11) \
359 ENTRY(CR12) \
360 ENTRY(CR13) \
361 ENTRY(CR14) \
362 ENTRY(CR15)
Craig Topperfb39f972012-07-31 04:58:05 +0000363
Sean Callanan04cc3072009-12-19 02:59:52 +0000364#define ALL_EA_BASES \
365 EA_BASES_16BIT \
366 EA_BASES_32BIT \
367 EA_BASES_64BIT
Craig Topperfb39f972012-07-31 04:58:05 +0000368
Sean Callanan04cc3072009-12-19 02:59:52 +0000369#define ALL_SIB_BASES \
370 REGS_32BIT \
371 REGS_64BIT
372
373#define ALL_REGS \
374 REGS_8BIT \
375 REGS_16BIT \
376 REGS_32BIT \
377 REGS_64BIT \
378 REGS_MMX \
379 REGS_XMM \
Sean Callananc3fd5232011-03-15 01:23:15 +0000380 REGS_YMM \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000381 REGS_ZMM \
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000382 REGS_MASKS \
Sean Callanan04cc3072009-12-19 02:59:52 +0000383 REGS_SEGMENT \
384 REGS_DEBUG \
Sean Callanane7e1cf92010-05-06 20:59:00 +0000385 REGS_CONTROL \
Sean Callanan04cc3072009-12-19 02:59:52 +0000386 ENTRY(RIP)
387
Richard Smith6a6967e2014-04-20 22:10:16 +0000388/// \brief All possible values of the base field for effective-address
389/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
390/// We distinguish between bases (EA_BASE_*) and registers that just happen
391/// to be referred to when Mod == 0b11 (EA_REG_*).
392enum EABase {
Sean Callanan04cc3072009-12-19 02:59:52 +0000393 EA_BASE_NONE,
394#define ENTRY(x) EA_BASE_##x,
395 ALL_EA_BASES
396#undef ENTRY
397#define ENTRY(x) EA_REG_##x,
398 ALL_REGS
399#undef ENTRY
400 EA_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000401};
Craig Topperfb39f972012-07-31 04:58:05 +0000402
Richard Smith6a6967e2014-04-20 22:10:16 +0000403/// \brief All possible values of the SIB index field.
404/// borrows entries from ALL_EA_BASES with the special case that
405/// sib is synonymous with NONE.
406/// Vector SIB: index can be XMM or YMM.
407enum SIBIndex {
Sean Callanan04cc3072009-12-19 02:59:52 +0000408 SIB_INDEX_NONE,
409#define ENTRY(x) SIB_INDEX_##x,
410 ALL_EA_BASES
Manman Rena0982042012-06-26 19:47:59 +0000411 REGS_XMM
412 REGS_YMM
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000413 REGS_ZMM
Sean Callanan04cc3072009-12-19 02:59:52 +0000414#undef ENTRY
415 SIB_INDEX_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000416};
Craig Topperfb39f972012-07-31 04:58:05 +0000417
Richard Smith6a6967e2014-04-20 22:10:16 +0000418/// \brief All possible values of the SIB base field.
419enum SIBBase {
Sean Callanan04cc3072009-12-19 02:59:52 +0000420 SIB_BASE_NONE,
421#define ENTRY(x) SIB_BASE_##x,
422 ALL_SIB_BASES
423#undef ENTRY
424 SIB_BASE_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000425};
Sean Callanan04cc3072009-12-19 02:59:52 +0000426
Richard Smith6a6967e2014-04-20 22:10:16 +0000427/// \brief Possible displacement types for effective-address computations.
Sean Callanan04cc3072009-12-19 02:59:52 +0000428typedef enum {
429 EA_DISP_NONE,
430 EA_DISP_8,
431 EA_DISP_16,
432 EA_DISP_32
433} EADisplacement;
434
Richard Smith6a6967e2014-04-20 22:10:16 +0000435/// \brief All possible values of the reg field in the ModR/M byte.
436enum Reg {
Sean Callanan2f9443f2009-12-22 02:07:42 +0000437#define ENTRY(x) MODRM_REG_##x,
Sean Callanan04cc3072009-12-19 02:59:52 +0000438 ALL_REGS
439#undef ENTRY
Sean Callanan2f9443f2009-12-22 02:07:42 +0000440 MODRM_REG_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000441};
Craig Topperfb39f972012-07-31 04:58:05 +0000442
Richard Smith6a6967e2014-04-20 22:10:16 +0000443/// \brief All possible segment overrides.
444enum SegmentOverride {
Sean Callanan04cc3072009-12-19 02:59:52 +0000445 SEG_OVERRIDE_NONE,
446 SEG_OVERRIDE_CS,
447 SEG_OVERRIDE_SS,
448 SEG_OVERRIDE_DS,
449 SEG_OVERRIDE_ES,
450 SEG_OVERRIDE_FS,
451 SEG_OVERRIDE_GS,
452 SEG_OVERRIDE_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000453};
Craig Topperfb39f972012-07-31 04:58:05 +0000454
Richard Smith6a6967e2014-04-20 22:10:16 +0000455/// \brief Possible values for the VEX.m-mmmm field
456enum VEXLeadingOpcodeByte {
Sean Callananc3fd5232011-03-15 01:23:15 +0000457 VEX_LOB_0F = 0x1,
458 VEX_LOB_0F38 = 0x2,
Craig Topper42e8a632013-10-03 06:18:26 +0000459 VEX_LOB_0F3A = 0x3
Richard Smith6a6967e2014-04-20 22:10:16 +0000460};
Sean Callananc3fd5232011-03-15 01:23:15 +0000461
Richard Smith6a6967e2014-04-20 22:10:16 +0000462enum XOPMapSelect {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000463 XOP_MAP_SELECT_8 = 0x8,
464 XOP_MAP_SELECT_9 = 0x9,
465 XOP_MAP_SELECT_A = 0xA
Richard Smith6a6967e2014-04-20 22:10:16 +0000466};
Craig Topper9e3e38a2013-10-03 05:17:48 +0000467
Richard Smith6a6967e2014-04-20 22:10:16 +0000468/// \brief Possible values for the VEX.pp/EVEX.pp field
469enum VEXPrefixCode {
Sean Callananc3fd5232011-03-15 01:23:15 +0000470 VEX_PREFIX_NONE = 0x0,
471 VEX_PREFIX_66 = 0x1,
472 VEX_PREFIX_F3 = 0x2,
473 VEX_PREFIX_F2 = 0x3
Richard Smith6a6967e2014-04-20 22:10:16 +0000474};
Sean Callanan04cc3072009-12-19 02:59:52 +0000475
Richard Smith6a6967e2014-04-20 22:10:16 +0000476enum VectorExtensionType {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000477 TYPE_NO_VEX_XOP = 0x0,
478 TYPE_VEX_2B = 0x1,
479 TYPE_VEX_3B = 0x2,
480 TYPE_EVEX = 0x3,
481 TYPE_XOP = 0x4
Richard Smith6a6967e2014-04-20 22:10:16 +0000482};
Craig Topper9e3e38a2013-10-03 05:17:48 +0000483
Richard Smith6a6967e2014-04-20 22:10:16 +0000484/// \brief Type for the byte reader that the consumer must provide to
485/// the decoder. Reads a single byte from the instruction's address space.
486/// \param arg A baton that the consumer can associate with any internal
487/// state that it needs.
488/// \param byte A pointer to a single byte in memory that should be set to
489/// contain the value at address.
490/// \param address The address in the instruction's address space that should
491/// be read from.
492/// \return -1 if the byte cannot be read for any reason; 0 otherwise.
493typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
Sean Callanan04cc3072009-12-19 02:59:52 +0000494
Richard Smith6a6967e2014-04-20 22:10:16 +0000495/// \brief Type for the logging function that the consumer can provide to
496/// get debugging output from the decoder.
497/// \param arg A baton that the consumer can associate with any internal
498/// state that it needs.
499/// \param log A string that contains the message. Will be reused after
500/// the logger returns.
501typedef void (*dlog_t)(void *arg, const char *log);
Sean Callanan04cc3072009-12-19 02:59:52 +0000502
Richard Smith6a6967e2014-04-20 22:10:16 +0000503/// The specification for how to extract and interpret a full instruction and
504/// its operands.
Richard Smith82b47d52014-04-20 21:35:26 +0000505struct InstructionSpecifier {
506 uint16_t operands;
507};
508
Richard Smith6a6967e2014-04-20 22:10:16 +0000509/// The x86 internal instruction, which is produced by the decoder.
Sean Callanan04cc3072009-12-19 02:59:52 +0000510struct InternalInstruction {
Richard Smith6a6967e2014-04-20 22:10:16 +0000511 // Reader interface (C)
Sean Callanan04cc3072009-12-19 02:59:52 +0000512 byteReader_t reader;
Richard Smith6a6967e2014-04-20 22:10:16 +0000513 // Opaque value passed to the reader
Roman Divacky67923802012-09-05 21:17:34 +0000514 const void* readerArg;
Richard Smith6a6967e2014-04-20 22:10:16 +0000515 // The address of the next byte to read via the reader
Sean Callanan04cc3072009-12-19 02:59:52 +0000516 uint64_t readerCursor;
517
Richard Smith6a6967e2014-04-20 22:10:16 +0000518 // Logger interface (C)
Sean Callanan04cc3072009-12-19 02:59:52 +0000519 dlog_t dlog;
Richard Smith6a6967e2014-04-20 22:10:16 +0000520 // Opaque value passed to the logger
Sean Callanan04cc3072009-12-19 02:59:52 +0000521 void* dlogArg;
522
Richard Smith6a6967e2014-04-20 22:10:16 +0000523 // General instruction information
Craig Topperfb39f972012-07-31 04:58:05 +0000524
Richard Smith6a6967e2014-04-20 22:10:16 +0000525 // The mode to disassemble for (64-bit, protected, real)
Sean Callanan04cc3072009-12-19 02:59:52 +0000526 DisassemblerMode mode;
Richard Smith6a6967e2014-04-20 22:10:16 +0000527 // The start of the instruction, usable with the reader
Sean Callanan04cc3072009-12-19 02:59:52 +0000528 uint64_t startLocation;
Richard Smith6a6967e2014-04-20 22:10:16 +0000529 // The length of the instruction, in bytes
Sean Callanan04cc3072009-12-19 02:59:52 +0000530 size_t length;
Craig Topperfb39f972012-07-31 04:58:05 +0000531
Richard Smith6a6967e2014-04-20 22:10:16 +0000532 // Prefix state
Craig Topperfb39f972012-07-31 04:58:05 +0000533
Richard Smith6a6967e2014-04-20 22:10:16 +0000534 // 1 if the prefix byte corresponding to the entry is present; 0 if not
Sean Callanan04cc3072009-12-19 02:59:52 +0000535 uint8_t prefixPresent[0x100];
Richard Smith6a6967e2014-04-20 22:10:16 +0000536 // contains the location (for use with the reader) of the prefix byte
Sean Callanan04cc3072009-12-19 02:59:52 +0000537 uint64_t prefixLocations[0x100];
Richard Smith6a6967e2014-04-20 22:10:16 +0000538 // The value of the vector extension prefix(EVEX/VEX/XOP), if present
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000539 uint8_t vectorExtensionPrefix[4];
Richard Smith6a6967e2014-04-20 22:10:16 +0000540 // The type of the vector extension prefix
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000541 VectorExtensionType vectorExtensionType;
Richard Smith6a6967e2014-04-20 22:10:16 +0000542 // The value of the REX prefix, if present
Sean Callanan04cc3072009-12-19 02:59:52 +0000543 uint8_t rexPrefix;
Richard Smith6a6967e2014-04-20 22:10:16 +0000544 // The location where a mandatory prefix would have to be (i.e., right before
545 // the opcode, or right before the REX prefix if one is present).
Sean Callanan04cc3072009-12-19 02:59:52 +0000546 uint64_t necessaryPrefixLocation;
Richard Smith6a6967e2014-04-20 22:10:16 +0000547 // The segment override type
Sean Callanan04cc3072009-12-19 02:59:52 +0000548 SegmentOverride segmentOverride;
Richard Smith6a6967e2014-04-20 22:10:16 +0000549 // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
Richard Smith5d5061032014-04-20 22:15:37 +0000550 bool xAcquireRelease;
Craig Topperfb39f972012-07-31 04:58:05 +0000551
Richard Smith6a6967e2014-04-20 22:10:16 +0000552 // Sizes of various critical pieces of data, in bytes
Sean Callanan04cc3072009-12-19 02:59:52 +0000553 uint8_t registerSize;
554 uint8_t addressSize;
555 uint8_t displacementSize;
556 uint8_t immediateSize;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +0000557
Richard Smith6a6967e2014-04-20 22:10:16 +0000558 // Offsets from the start of the instruction to the pieces of data, which is
559 // needed to find relocation entries for adding symbolic operands.
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +0000560 uint8_t displacementOffset;
561 uint8_t immediateOffset;
Craig Topperfb39f972012-07-31 04:58:05 +0000562
Richard Smith6a6967e2014-04-20 22:10:16 +0000563 // opcode state
Craig Topperfb39f972012-07-31 04:58:05 +0000564
Richard Smith6a6967e2014-04-20 22:10:16 +0000565 // The last byte of the opcode, not counting any ModR/M extension
Sean Callanan04cc3072009-12-19 02:59:52 +0000566 uint8_t opcode;
Richard Smith6a6967e2014-04-20 22:10:16 +0000567 // The ModR/M byte of the instruction, if it is an opcode extension
Sean Callanan04cc3072009-12-19 02:59:52 +0000568 uint8_t modRMExtension;
Craig Topperfb39f972012-07-31 04:58:05 +0000569
Richard Smith6a6967e2014-04-20 22:10:16 +0000570 // decode state
Craig Topperfb39f972012-07-31 04:58:05 +0000571
Richard Smith6a6967e2014-04-20 22:10:16 +0000572 // The type of opcode, used for indexing into the array of decode tables
Sean Callanan04cc3072009-12-19 02:59:52 +0000573 OpcodeType opcodeType;
Richard Smith6a6967e2014-04-20 22:10:16 +0000574 // The instruction ID, extracted from the decode table
Sean Callanan04cc3072009-12-19 02:59:52 +0000575 uint16_t instructionID;
Richard Smith6a6967e2014-04-20 22:10:16 +0000576 // The specifier for the instruction, from the instruction info table
Richard Smith3c3410f2014-04-20 21:56:02 +0000577 const InstructionSpecifier *spec;
Craig Topperfb39f972012-07-31 04:58:05 +0000578
Richard Smith6a6967e2014-04-20 22:10:16 +0000579 // state for additional bytes, consumed during operand decode. Pattern:
580 // consumed___ indicates that the byte was already consumed and does not
581 // need to be consumed again.
Sean Callananc3fd5232011-03-15 01:23:15 +0000582
Richard Smith6a6967e2014-04-20 22:10:16 +0000583 // The VEX.vvvv field, which contains a third register operand for some AVX
584 // instructions.
Sean Callananc3fd5232011-03-15 01:23:15 +0000585 Reg vvvv;
Craig Topperfb39f972012-07-31 04:58:05 +0000586
Richard Smith6a6967e2014-04-20 22:10:16 +0000587 // The writemask for AVX-512 instructions which is contained in EVEX.aaa
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000588 Reg writemask;
589
Richard Smith6a6967e2014-04-20 22:10:16 +0000590 // The ModR/M byte, which contains most register operands and some portion of
591 // all memory operands.
Richard Smith5d5061032014-04-20 22:15:37 +0000592 bool consumedModRM;
Sean Callanan04cc3072009-12-19 02:59:52 +0000593 uint8_t modRM;
Craig Topperfb39f972012-07-31 04:58:05 +0000594
Richard Smith6a6967e2014-04-20 22:10:16 +0000595 // The SIB byte, used for more complex 32- or 64-bit memory operands
Richard Smith5d5061032014-04-20 22:15:37 +0000596 bool consumedSIB;
Sean Callanan04cc3072009-12-19 02:59:52 +0000597 uint8_t sib;
598
Richard Smith6a6967e2014-04-20 22:10:16 +0000599 // The displacement, used for memory operands
Richard Smith5d5061032014-04-20 22:15:37 +0000600 bool consumedDisplacement;
Sean Callanan04cc3072009-12-19 02:59:52 +0000601 int32_t displacement;
Craig Topperfb39f972012-07-31 04:58:05 +0000602
Richard Smith6a6967e2014-04-20 22:10:16 +0000603 // Immediates. There can be two in some cases
Sean Callanan04cc3072009-12-19 02:59:52 +0000604 uint8_t numImmediatesConsumed;
605 uint8_t numImmediatesTranslated;
606 uint64_t immediates[2];
Craig Topperfb39f972012-07-31 04:58:05 +0000607
Richard Smith6a6967e2014-04-20 22:10:16 +0000608 // A register or immediate operand encoded into the opcode
Sean Callanan04cc3072009-12-19 02:59:52 +0000609 Reg opcodeRegister;
Craig Topperfb39f972012-07-31 04:58:05 +0000610
Richard Smith6a6967e2014-04-20 22:10:16 +0000611 // Portions of the ModR/M byte
Craig Topperfb39f972012-07-31 04:58:05 +0000612
Richard Smith6a6967e2014-04-20 22:10:16 +0000613 // These fields determine the allowable values for the ModR/M fields, which
614 // depend on operand and address widths.
Sean Callanan04cc3072009-12-19 02:59:52 +0000615 EABase eaBaseBase;
616 EABase eaRegBase;
617 Reg regBase;
618
Richard Smith6a6967e2014-04-20 22:10:16 +0000619 // The Mod and R/M fields can encode a base for an effective address, or a
620 // register. These are separated into two fields here.
Sean Callanan04cc3072009-12-19 02:59:52 +0000621 EABase eaBase;
622 EADisplacement eaDisplacement;
Richard Smith6a6967e2014-04-20 22:10:16 +0000623 // The reg field always encodes a register
Sean Callanan04cc3072009-12-19 02:59:52 +0000624 Reg reg;
Craig Topperfb39f972012-07-31 04:58:05 +0000625
Richard Smith6a6967e2014-04-20 22:10:16 +0000626 // SIB state
Sean Callanan04cc3072009-12-19 02:59:52 +0000627 SIBIndex sibIndex;
628 uint8_t sibScale;
629 SIBBase sibBase;
Craig Topperb8aec082012-08-01 07:39:18 +0000630
Patrik Hagglund31998382014-04-28 12:12:27 +0000631 ArrayRef<OperandSpecifier> operands;
Sean Callanan04cc3072009-12-19 02:59:52 +0000632};
633
Richard Smith6a6967e2014-04-20 22:10:16 +0000634/// \brief Decode one instruction and store the decoding results in
635/// a buffer provided by the consumer.
636/// \param insn The buffer to store the instruction in. Allocated by the
637/// consumer.
638/// \param reader The byteReader_t for the bytes to be read.
639/// \param readerArg An argument to pass to the reader for storing context
640/// specific to the consumer. May be NULL.
641/// \param logger The dlog_t to be used in printing status messages from the
642/// disassembler. May be NULL.
643/// \param loggerArg An argument to pass to the logger for storing context
644/// specific to the logger. May be NULL.
645/// \param startLoc The address (in the reader's address space) of the first
646/// byte in the instruction.
647/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
648/// \return Nonzero if there was an error during decode, 0 otherwise.
Richard Smith3c3410f2014-04-20 21:56:02 +0000649int decodeInstruction(InternalInstruction *insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000650 byteReader_t reader,
Richard Smith3c3410f2014-04-20 21:56:02 +0000651 const void *readerArg,
Sean Callanan04cc3072009-12-19 02:59:52 +0000652 dlog_t logger,
Richard Smith3c3410f2014-04-20 21:56:02 +0000653 void *loggerArg,
654 const void *miiArg,
Sean Callanan04cc3072009-12-19 02:59:52 +0000655 uint64_t startLoc,
656 DisassemblerMode mode);
657
Richard Smith6a6967e2014-04-20 22:10:16 +0000658/// \brief Print a message to debugs()
659/// \param file The name of the file printing the debug message.
660/// \param line The line number that printed the debug message.
661/// \param s The message to print.
Richard Smith89ee75d2014-04-20 21:07:34 +0000662void Debug(const char *file, unsigned line, const char *s);
Sean Callanan010b3732010-04-02 21:23:51 +0000663
Richard Smith89ee75d2014-04-20 21:07:34 +0000664const char *GetInstrName(unsigned Opcode, const void *mii);
Benjamin Kramer478e8de2012-02-11 14:50:54 +0000665
Richard Smith89ee75d2014-04-20 21:07:34 +0000666} // namespace X86Disassembler
667} // namespace llvm
Craig Topperfb39f972012-07-31 04:58:05 +0000668
Sean Callanan04cc3072009-12-19 02:59:52 +0000669#endif