blob: 8c45402ab5e1caee8ce6683f687351677e543a29 [file] [log] [blame]
Richard Smith89ee75d2014-04-20 21:07:34 +00001//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains the public interface of the instruction decoder.
12// Documentation for the disassembler can be found in X86Disassembler.h.
13//
14//===----------------------------------------------------------------------===//
Sean Callanan04cc3072009-12-19 02:59:52 +000015
16#ifndef X86DISASSEMBLERDECODER_H
17#define X86DISASSEMBLERDECODER_H
18
Sean Callanan04cc3072009-12-19 02:59:52 +000019#include "X86DisassemblerDecoderCommon.h"
Patrik Hagglund31998382014-04-28 12:12:27 +000020#include "llvm/ADT/ArrayRef.h"
Craig Topperfb39f972012-07-31 04:58:05 +000021
Richard Smith89ee75d2014-04-20 21:07:34 +000022namespace llvm {
23namespace X86Disassembler {
24
Richard Smith6a6967e2014-04-20 22:10:16 +000025// Accessor functions for various fields of an Intel instruction
Sean Callananc3fd5232011-03-15 01:23:15 +000026#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
27#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
28#define rmFromModRM(modRM) ((modRM) & 0x7)
29#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
30#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
31#define baseFromSIB(sib) ((sib) & 0x7)
32#define wFromREX(rex) (((rex) & 0x8) >> 3)
33#define rFromREX(rex) (((rex) & 0x4) >> 2)
34#define xFromREX(rex) (((rex) & 0x2) >> 1)
35#define bFromREX(rex) ((rex) & 0x1)
Craig Topperfb39f972012-07-31 04:58:05 +000036
Elena Demikhovsky371e3632013-12-25 11:40:51 +000037#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
38#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
39#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
40#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
41#define mmFromEVEX2of4(evex) ((evex) & 0x3)
42#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
43#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
44#define ppFromEVEX3of4(evex) ((evex) & 0x3)
45#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
46#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
47#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
48#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
49#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
50#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
51
Sean Callananc3fd5232011-03-15 01:23:15 +000052#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
53#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
54#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
55#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
56#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
57#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
58#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
59#define ppFromVEX3of3(vex) ((vex) & 0x3)
60
61#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
62#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
63#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
64#define ppFromVEX2of2(vex) ((vex) & 0x3)
Sean Callanan04cc3072009-12-19 02:59:52 +000065
Craig Topper9e3e38a2013-10-03 05:17:48 +000066#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
67#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
68#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
69#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
70#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
71#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
72#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
73#define ppFromXOP3of3(xop) ((xop) & 0x3)
74
Richard Smith6a6967e2014-04-20 22:10:16 +000075// These enums represent Intel registers for use by the decoder.
Sean Callanan04cc3072009-12-19 02:59:52 +000076#define REGS_8BIT \
77 ENTRY(AL) \
78 ENTRY(CL) \
79 ENTRY(DL) \
80 ENTRY(BL) \
81 ENTRY(AH) \
82 ENTRY(CH) \
83 ENTRY(DH) \
84 ENTRY(BH) \
85 ENTRY(R8B) \
86 ENTRY(R9B) \
87 ENTRY(R10B) \
88 ENTRY(R11B) \
89 ENTRY(R12B) \
90 ENTRY(R13B) \
91 ENTRY(R14B) \
92 ENTRY(R15B) \
93 ENTRY(SPL) \
94 ENTRY(BPL) \
95 ENTRY(SIL) \
96 ENTRY(DIL)
97
98#define EA_BASES_16BIT \
99 ENTRY(BX_SI) \
100 ENTRY(BX_DI) \
101 ENTRY(BP_SI) \
102 ENTRY(BP_DI) \
103 ENTRY(SI) \
104 ENTRY(DI) \
105 ENTRY(BP) \
106 ENTRY(BX) \
107 ENTRY(R8W) \
108 ENTRY(R9W) \
109 ENTRY(R10W) \
110 ENTRY(R11W) \
111 ENTRY(R12W) \
112 ENTRY(R13W) \
113 ENTRY(R14W) \
114 ENTRY(R15W)
115
116#define REGS_16BIT \
117 ENTRY(AX) \
118 ENTRY(CX) \
119 ENTRY(DX) \
120 ENTRY(BX) \
121 ENTRY(SP) \
122 ENTRY(BP) \
123 ENTRY(SI) \
124 ENTRY(DI) \
125 ENTRY(R8W) \
126 ENTRY(R9W) \
127 ENTRY(R10W) \
128 ENTRY(R11W) \
129 ENTRY(R12W) \
130 ENTRY(R13W) \
131 ENTRY(R14W) \
132 ENTRY(R15W)
133
134#define EA_BASES_32BIT \
135 ENTRY(EAX) \
136 ENTRY(ECX) \
137 ENTRY(EDX) \
138 ENTRY(EBX) \
139 ENTRY(sib) \
140 ENTRY(EBP) \
141 ENTRY(ESI) \
142 ENTRY(EDI) \
143 ENTRY(R8D) \
144 ENTRY(R9D) \
145 ENTRY(R10D) \
146 ENTRY(R11D) \
147 ENTRY(R12D) \
148 ENTRY(R13D) \
149 ENTRY(R14D) \
150 ENTRY(R15D)
151
152#define REGS_32BIT \
153 ENTRY(EAX) \
154 ENTRY(ECX) \
155 ENTRY(EDX) \
156 ENTRY(EBX) \
157 ENTRY(ESP) \
158 ENTRY(EBP) \
159 ENTRY(ESI) \
160 ENTRY(EDI) \
161 ENTRY(R8D) \
162 ENTRY(R9D) \
163 ENTRY(R10D) \
164 ENTRY(R11D) \
165 ENTRY(R12D) \
166 ENTRY(R13D) \
167 ENTRY(R14D) \
168 ENTRY(R15D)
169
170#define EA_BASES_64BIT \
171 ENTRY(RAX) \
172 ENTRY(RCX) \
173 ENTRY(RDX) \
174 ENTRY(RBX) \
175 ENTRY(sib64) \
176 ENTRY(RBP) \
177 ENTRY(RSI) \
178 ENTRY(RDI) \
179 ENTRY(R8) \
180 ENTRY(R9) \
181 ENTRY(R10) \
182 ENTRY(R11) \
183 ENTRY(R12) \
184 ENTRY(R13) \
185 ENTRY(R14) \
186 ENTRY(R15)
187
188#define REGS_64BIT \
189 ENTRY(RAX) \
190 ENTRY(RCX) \
191 ENTRY(RDX) \
192 ENTRY(RBX) \
193 ENTRY(RSP) \
194 ENTRY(RBP) \
195 ENTRY(RSI) \
196 ENTRY(RDI) \
197 ENTRY(R8) \
198 ENTRY(R9) \
199 ENTRY(R10) \
200 ENTRY(R11) \
201 ENTRY(R12) \
202 ENTRY(R13) \
203 ENTRY(R14) \
204 ENTRY(R15)
205
206#define REGS_MMX \
207 ENTRY(MM0) \
208 ENTRY(MM1) \
209 ENTRY(MM2) \
210 ENTRY(MM3) \
211 ENTRY(MM4) \
212 ENTRY(MM5) \
213 ENTRY(MM6) \
214 ENTRY(MM7)
215
216#define REGS_XMM \
217 ENTRY(XMM0) \
218 ENTRY(XMM1) \
219 ENTRY(XMM2) \
220 ENTRY(XMM3) \
221 ENTRY(XMM4) \
222 ENTRY(XMM5) \
223 ENTRY(XMM6) \
224 ENTRY(XMM7) \
225 ENTRY(XMM8) \
226 ENTRY(XMM9) \
227 ENTRY(XMM10) \
228 ENTRY(XMM11) \
229 ENTRY(XMM12) \
230 ENTRY(XMM13) \
231 ENTRY(XMM14) \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000232 ENTRY(XMM15) \
233 ENTRY(XMM16) \
234 ENTRY(XMM17) \
235 ENTRY(XMM18) \
236 ENTRY(XMM19) \
237 ENTRY(XMM20) \
238 ENTRY(XMM21) \
239 ENTRY(XMM22) \
240 ENTRY(XMM23) \
241 ENTRY(XMM24) \
242 ENTRY(XMM25) \
243 ENTRY(XMM26) \
244 ENTRY(XMM27) \
245 ENTRY(XMM28) \
246 ENTRY(XMM29) \
247 ENTRY(XMM30) \
248 ENTRY(XMM31)
Sean Callananc3fd5232011-03-15 01:23:15 +0000249
250#define REGS_YMM \
251 ENTRY(YMM0) \
252 ENTRY(YMM1) \
253 ENTRY(YMM2) \
254 ENTRY(YMM3) \
255 ENTRY(YMM4) \
256 ENTRY(YMM5) \
257 ENTRY(YMM6) \
258 ENTRY(YMM7) \
259 ENTRY(YMM8) \
260 ENTRY(YMM9) \
261 ENTRY(YMM10) \
262 ENTRY(YMM11) \
263 ENTRY(YMM12) \
264 ENTRY(YMM13) \
265 ENTRY(YMM14) \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000266 ENTRY(YMM15) \
267 ENTRY(YMM16) \
268 ENTRY(YMM17) \
269 ENTRY(YMM18) \
270 ENTRY(YMM19) \
271 ENTRY(YMM20) \
272 ENTRY(YMM21) \
273 ENTRY(YMM22) \
274 ENTRY(YMM23) \
275 ENTRY(YMM24) \
276 ENTRY(YMM25) \
277 ENTRY(YMM26) \
278 ENTRY(YMM27) \
279 ENTRY(YMM28) \
280 ENTRY(YMM29) \
281 ENTRY(YMM30) \
282 ENTRY(YMM31)
283
284#define REGS_ZMM \
285 ENTRY(ZMM0) \
286 ENTRY(ZMM1) \
287 ENTRY(ZMM2) \
288 ENTRY(ZMM3) \
289 ENTRY(ZMM4) \
290 ENTRY(ZMM5) \
291 ENTRY(ZMM6) \
292 ENTRY(ZMM7) \
293 ENTRY(ZMM8) \
294 ENTRY(ZMM9) \
295 ENTRY(ZMM10) \
296 ENTRY(ZMM11) \
297 ENTRY(ZMM12) \
298 ENTRY(ZMM13) \
299 ENTRY(ZMM14) \
300 ENTRY(ZMM15) \
301 ENTRY(ZMM16) \
302 ENTRY(ZMM17) \
303 ENTRY(ZMM18) \
304 ENTRY(ZMM19) \
305 ENTRY(ZMM20) \
306 ENTRY(ZMM21) \
307 ENTRY(ZMM22) \
308 ENTRY(ZMM23) \
309 ENTRY(ZMM24) \
310 ENTRY(ZMM25) \
311 ENTRY(ZMM26) \
312 ENTRY(ZMM27) \
313 ENTRY(ZMM28) \
314 ENTRY(ZMM29) \
315 ENTRY(ZMM30) \
316 ENTRY(ZMM31)
Craig Topperfb39f972012-07-31 04:58:05 +0000317
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000318#define REGS_MASKS \
319 ENTRY(K0) \
320 ENTRY(K1) \
321 ENTRY(K2) \
322 ENTRY(K3) \
323 ENTRY(K4) \
324 ENTRY(K5) \
325 ENTRY(K6) \
326 ENTRY(K7)
327
Sean Callanan04cc3072009-12-19 02:59:52 +0000328#define REGS_SEGMENT \
329 ENTRY(ES) \
330 ENTRY(CS) \
331 ENTRY(SS) \
332 ENTRY(DS) \
333 ENTRY(FS) \
334 ENTRY(GS)
Craig Topperfb39f972012-07-31 04:58:05 +0000335
Sean Callanan04cc3072009-12-19 02:59:52 +0000336#define REGS_DEBUG \
337 ENTRY(DR0) \
338 ENTRY(DR1) \
339 ENTRY(DR2) \
340 ENTRY(DR3) \
341 ENTRY(DR4) \
342 ENTRY(DR5) \
343 ENTRY(DR6) \
344 ENTRY(DR7)
345
Sean Callanane7e1cf92010-05-06 20:59:00 +0000346#define REGS_CONTROL \
347 ENTRY(CR0) \
348 ENTRY(CR1) \
349 ENTRY(CR2) \
350 ENTRY(CR3) \
351 ENTRY(CR4) \
352 ENTRY(CR5) \
353 ENTRY(CR6) \
354 ENTRY(CR7) \
355 ENTRY(CR8)
Craig Topperfb39f972012-07-31 04:58:05 +0000356
Sean Callanan04cc3072009-12-19 02:59:52 +0000357#define ALL_EA_BASES \
358 EA_BASES_16BIT \
359 EA_BASES_32BIT \
360 EA_BASES_64BIT
Craig Topperfb39f972012-07-31 04:58:05 +0000361
Sean Callanan04cc3072009-12-19 02:59:52 +0000362#define ALL_SIB_BASES \
363 REGS_32BIT \
364 REGS_64BIT
365
366#define ALL_REGS \
367 REGS_8BIT \
368 REGS_16BIT \
369 REGS_32BIT \
370 REGS_64BIT \
371 REGS_MMX \
372 REGS_XMM \
Sean Callananc3fd5232011-03-15 01:23:15 +0000373 REGS_YMM \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000374 REGS_ZMM \
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000375 REGS_MASKS \
Sean Callanan04cc3072009-12-19 02:59:52 +0000376 REGS_SEGMENT \
377 REGS_DEBUG \
Sean Callanane7e1cf92010-05-06 20:59:00 +0000378 REGS_CONTROL \
Sean Callanan04cc3072009-12-19 02:59:52 +0000379 ENTRY(RIP)
380
Richard Smith6a6967e2014-04-20 22:10:16 +0000381/// \brief All possible values of the base field for effective-address
382/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
383/// We distinguish between bases (EA_BASE_*) and registers that just happen
384/// to be referred to when Mod == 0b11 (EA_REG_*).
385enum EABase {
Sean Callanan04cc3072009-12-19 02:59:52 +0000386 EA_BASE_NONE,
387#define ENTRY(x) EA_BASE_##x,
388 ALL_EA_BASES
389#undef ENTRY
390#define ENTRY(x) EA_REG_##x,
391 ALL_REGS
392#undef ENTRY
393 EA_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000394};
Craig Topperfb39f972012-07-31 04:58:05 +0000395
Richard Smith6a6967e2014-04-20 22:10:16 +0000396/// \brief All possible values of the SIB index field.
397/// borrows entries from ALL_EA_BASES with the special case that
398/// sib is synonymous with NONE.
399/// Vector SIB: index can be XMM or YMM.
400enum SIBIndex {
Sean Callanan04cc3072009-12-19 02:59:52 +0000401 SIB_INDEX_NONE,
402#define ENTRY(x) SIB_INDEX_##x,
403 ALL_EA_BASES
Manman Rena0982042012-06-26 19:47:59 +0000404 REGS_XMM
405 REGS_YMM
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000406 REGS_ZMM
Sean Callanan04cc3072009-12-19 02:59:52 +0000407#undef ENTRY
408 SIB_INDEX_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000409};
Craig Topperfb39f972012-07-31 04:58:05 +0000410
Richard Smith6a6967e2014-04-20 22:10:16 +0000411/// \brief All possible values of the SIB base field.
412enum SIBBase {
Sean Callanan04cc3072009-12-19 02:59:52 +0000413 SIB_BASE_NONE,
414#define ENTRY(x) SIB_BASE_##x,
415 ALL_SIB_BASES
416#undef ENTRY
417 SIB_BASE_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000418};
Sean Callanan04cc3072009-12-19 02:59:52 +0000419
Richard Smith6a6967e2014-04-20 22:10:16 +0000420/// \brief Possible displacement types for effective-address computations.
Sean Callanan04cc3072009-12-19 02:59:52 +0000421typedef enum {
422 EA_DISP_NONE,
423 EA_DISP_8,
424 EA_DISP_16,
425 EA_DISP_32
426} EADisplacement;
427
Richard Smith6a6967e2014-04-20 22:10:16 +0000428/// \brief All possible values of the reg field in the ModR/M byte.
429enum Reg {
Sean Callanan2f9443f2009-12-22 02:07:42 +0000430#define ENTRY(x) MODRM_REG_##x,
Sean Callanan04cc3072009-12-19 02:59:52 +0000431 ALL_REGS
432#undef ENTRY
Sean Callanan2f9443f2009-12-22 02:07:42 +0000433 MODRM_REG_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000434};
Craig Topperfb39f972012-07-31 04:58:05 +0000435
Richard Smith6a6967e2014-04-20 22:10:16 +0000436/// \brief All possible segment overrides.
437enum SegmentOverride {
Sean Callanan04cc3072009-12-19 02:59:52 +0000438 SEG_OVERRIDE_NONE,
439 SEG_OVERRIDE_CS,
440 SEG_OVERRIDE_SS,
441 SEG_OVERRIDE_DS,
442 SEG_OVERRIDE_ES,
443 SEG_OVERRIDE_FS,
444 SEG_OVERRIDE_GS,
445 SEG_OVERRIDE_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000446};
Craig Topperfb39f972012-07-31 04:58:05 +0000447
Richard Smith6a6967e2014-04-20 22:10:16 +0000448/// \brief Possible values for the VEX.m-mmmm field
449enum VEXLeadingOpcodeByte {
Sean Callananc3fd5232011-03-15 01:23:15 +0000450 VEX_LOB_0F = 0x1,
451 VEX_LOB_0F38 = 0x2,
Craig Topper42e8a632013-10-03 06:18:26 +0000452 VEX_LOB_0F3A = 0x3
Richard Smith6a6967e2014-04-20 22:10:16 +0000453};
Sean Callananc3fd5232011-03-15 01:23:15 +0000454
Richard Smith6a6967e2014-04-20 22:10:16 +0000455enum XOPMapSelect {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000456 XOP_MAP_SELECT_8 = 0x8,
457 XOP_MAP_SELECT_9 = 0x9,
458 XOP_MAP_SELECT_A = 0xA
Richard Smith6a6967e2014-04-20 22:10:16 +0000459};
Craig Topper9e3e38a2013-10-03 05:17:48 +0000460
Richard Smith6a6967e2014-04-20 22:10:16 +0000461/// \brief Possible values for the VEX.pp/EVEX.pp field
462enum VEXPrefixCode {
Sean Callananc3fd5232011-03-15 01:23:15 +0000463 VEX_PREFIX_NONE = 0x0,
464 VEX_PREFIX_66 = 0x1,
465 VEX_PREFIX_F3 = 0x2,
466 VEX_PREFIX_F2 = 0x3
Richard Smith6a6967e2014-04-20 22:10:16 +0000467};
Sean Callanan04cc3072009-12-19 02:59:52 +0000468
Richard Smith6a6967e2014-04-20 22:10:16 +0000469enum VectorExtensionType {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000470 TYPE_NO_VEX_XOP = 0x0,
471 TYPE_VEX_2B = 0x1,
472 TYPE_VEX_3B = 0x2,
473 TYPE_EVEX = 0x3,
474 TYPE_XOP = 0x4
Richard Smith6a6967e2014-04-20 22:10:16 +0000475};
Craig Topper9e3e38a2013-10-03 05:17:48 +0000476
Richard Smith6a6967e2014-04-20 22:10:16 +0000477/// \brief Type for the byte reader that the consumer must provide to
478/// the decoder. Reads a single byte from the instruction's address space.
479/// \param arg A baton that the consumer can associate with any internal
480/// state that it needs.
481/// \param byte A pointer to a single byte in memory that should be set to
482/// contain the value at address.
483/// \param address The address in the instruction's address space that should
484/// be read from.
485/// \return -1 if the byte cannot be read for any reason; 0 otherwise.
486typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
Sean Callanan04cc3072009-12-19 02:59:52 +0000487
Richard Smith6a6967e2014-04-20 22:10:16 +0000488/// \brief Type for the logging function that the consumer can provide to
489/// get debugging output from the decoder.
490/// \param arg A baton that the consumer can associate with any internal
491/// state that it needs.
492/// \param log A string that contains the message. Will be reused after
493/// the logger returns.
494typedef void (*dlog_t)(void *arg, const char *log);
Sean Callanan04cc3072009-12-19 02:59:52 +0000495
Richard Smith6a6967e2014-04-20 22:10:16 +0000496/// The specification for how to extract and interpret a full instruction and
497/// its operands.
Richard Smith82b47d52014-04-20 21:35:26 +0000498struct InstructionSpecifier {
499 uint16_t operands;
500};
501
Richard Smith6a6967e2014-04-20 22:10:16 +0000502/// The x86 internal instruction, which is produced by the decoder.
Sean Callanan04cc3072009-12-19 02:59:52 +0000503struct InternalInstruction {
Richard Smith6a6967e2014-04-20 22:10:16 +0000504 // Reader interface (C)
Sean Callanan04cc3072009-12-19 02:59:52 +0000505 byteReader_t reader;
Richard Smith6a6967e2014-04-20 22:10:16 +0000506 // Opaque value passed to the reader
Roman Divacky67923802012-09-05 21:17:34 +0000507 const void* readerArg;
Richard Smith6a6967e2014-04-20 22:10:16 +0000508 // The address of the next byte to read via the reader
Sean Callanan04cc3072009-12-19 02:59:52 +0000509 uint64_t readerCursor;
510
Richard Smith6a6967e2014-04-20 22:10:16 +0000511 // Logger interface (C)
Sean Callanan04cc3072009-12-19 02:59:52 +0000512 dlog_t dlog;
Richard Smith6a6967e2014-04-20 22:10:16 +0000513 // Opaque value passed to the logger
Sean Callanan04cc3072009-12-19 02:59:52 +0000514 void* dlogArg;
515
Richard Smith6a6967e2014-04-20 22:10:16 +0000516 // General instruction information
Craig Topperfb39f972012-07-31 04:58:05 +0000517
Richard Smith6a6967e2014-04-20 22:10:16 +0000518 // The mode to disassemble for (64-bit, protected, real)
Sean Callanan04cc3072009-12-19 02:59:52 +0000519 DisassemblerMode mode;
Richard Smith6a6967e2014-04-20 22:10:16 +0000520 // The start of the instruction, usable with the reader
Sean Callanan04cc3072009-12-19 02:59:52 +0000521 uint64_t startLocation;
Richard Smith6a6967e2014-04-20 22:10:16 +0000522 // The length of the instruction, in bytes
Sean Callanan04cc3072009-12-19 02:59:52 +0000523 size_t length;
Craig Topperfb39f972012-07-31 04:58:05 +0000524
Richard Smith6a6967e2014-04-20 22:10:16 +0000525 // Prefix state
Craig Topperfb39f972012-07-31 04:58:05 +0000526
Richard Smith6a6967e2014-04-20 22:10:16 +0000527 // 1 if the prefix byte corresponding to the entry is present; 0 if not
Sean Callanan04cc3072009-12-19 02:59:52 +0000528 uint8_t prefixPresent[0x100];
Richard Smith6a6967e2014-04-20 22:10:16 +0000529 // contains the location (for use with the reader) of the prefix byte
Sean Callanan04cc3072009-12-19 02:59:52 +0000530 uint64_t prefixLocations[0x100];
Richard Smith6a6967e2014-04-20 22:10:16 +0000531 // The value of the vector extension prefix(EVEX/VEX/XOP), if present
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000532 uint8_t vectorExtensionPrefix[4];
Richard Smith6a6967e2014-04-20 22:10:16 +0000533 // The type of the vector extension prefix
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000534 VectorExtensionType vectorExtensionType;
Richard Smith6a6967e2014-04-20 22:10:16 +0000535 // The value of the REX prefix, if present
Sean Callanan04cc3072009-12-19 02:59:52 +0000536 uint8_t rexPrefix;
Richard Smith6a6967e2014-04-20 22:10:16 +0000537 // The location where a mandatory prefix would have to be (i.e., right before
538 // the opcode, or right before the REX prefix if one is present).
Sean Callanan04cc3072009-12-19 02:59:52 +0000539 uint64_t necessaryPrefixLocation;
Richard Smith6a6967e2014-04-20 22:10:16 +0000540 // The segment override type
Sean Callanan04cc3072009-12-19 02:59:52 +0000541 SegmentOverride segmentOverride;
Richard Smith6a6967e2014-04-20 22:10:16 +0000542 // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
Richard Smith5d5061032014-04-20 22:15:37 +0000543 bool xAcquireRelease;
Craig Topperfb39f972012-07-31 04:58:05 +0000544
Richard Smith6a6967e2014-04-20 22:10:16 +0000545 // Sizes of various critical pieces of data, in bytes
Sean Callanan04cc3072009-12-19 02:59:52 +0000546 uint8_t registerSize;
547 uint8_t addressSize;
548 uint8_t displacementSize;
549 uint8_t immediateSize;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +0000550
Richard Smith6a6967e2014-04-20 22:10:16 +0000551 // Offsets from the start of the instruction to the pieces of data, which is
552 // needed to find relocation entries for adding symbolic operands.
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +0000553 uint8_t displacementOffset;
554 uint8_t immediateOffset;
Craig Topperfb39f972012-07-31 04:58:05 +0000555
Richard Smith6a6967e2014-04-20 22:10:16 +0000556 // opcode state
Craig Topperfb39f972012-07-31 04:58:05 +0000557
Richard Smith6a6967e2014-04-20 22:10:16 +0000558 // The last byte of the opcode, not counting any ModR/M extension
Sean Callanan04cc3072009-12-19 02:59:52 +0000559 uint8_t opcode;
Richard Smith6a6967e2014-04-20 22:10:16 +0000560 // The ModR/M byte of the instruction, if it is an opcode extension
Sean Callanan04cc3072009-12-19 02:59:52 +0000561 uint8_t modRMExtension;
Craig Topperfb39f972012-07-31 04:58:05 +0000562
Richard Smith6a6967e2014-04-20 22:10:16 +0000563 // decode state
Craig Topperfb39f972012-07-31 04:58:05 +0000564
Richard Smith6a6967e2014-04-20 22:10:16 +0000565 // The type of opcode, used for indexing into the array of decode tables
Sean Callanan04cc3072009-12-19 02:59:52 +0000566 OpcodeType opcodeType;
Richard Smith6a6967e2014-04-20 22:10:16 +0000567 // The instruction ID, extracted from the decode table
Sean Callanan04cc3072009-12-19 02:59:52 +0000568 uint16_t instructionID;
Richard Smith6a6967e2014-04-20 22:10:16 +0000569 // The specifier for the instruction, from the instruction info table
Richard Smith3c3410f2014-04-20 21:56:02 +0000570 const InstructionSpecifier *spec;
Craig Topperfb39f972012-07-31 04:58:05 +0000571
Richard Smith6a6967e2014-04-20 22:10:16 +0000572 // state for additional bytes, consumed during operand decode. Pattern:
573 // consumed___ indicates that the byte was already consumed and does not
574 // need to be consumed again.
Sean Callananc3fd5232011-03-15 01:23:15 +0000575
Richard Smith6a6967e2014-04-20 22:10:16 +0000576 // The VEX.vvvv field, which contains a third register operand for some AVX
577 // instructions.
Sean Callananc3fd5232011-03-15 01:23:15 +0000578 Reg vvvv;
Craig Topperfb39f972012-07-31 04:58:05 +0000579
Richard Smith6a6967e2014-04-20 22:10:16 +0000580 // The writemask for AVX-512 instructions which is contained in EVEX.aaa
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000581 Reg writemask;
582
Richard Smith6a6967e2014-04-20 22:10:16 +0000583 // The ModR/M byte, which contains most register operands and some portion of
584 // all memory operands.
Richard Smith5d5061032014-04-20 22:15:37 +0000585 bool consumedModRM;
Sean Callanan04cc3072009-12-19 02:59:52 +0000586 uint8_t modRM;
Craig Topperfb39f972012-07-31 04:58:05 +0000587
Richard Smith6a6967e2014-04-20 22:10:16 +0000588 // The SIB byte, used for more complex 32- or 64-bit memory operands
Richard Smith5d5061032014-04-20 22:15:37 +0000589 bool consumedSIB;
Sean Callanan04cc3072009-12-19 02:59:52 +0000590 uint8_t sib;
591
Richard Smith6a6967e2014-04-20 22:10:16 +0000592 // The displacement, used for memory operands
Richard Smith5d5061032014-04-20 22:15:37 +0000593 bool consumedDisplacement;
Sean Callanan04cc3072009-12-19 02:59:52 +0000594 int32_t displacement;
Craig Topperfb39f972012-07-31 04:58:05 +0000595
Richard Smith6a6967e2014-04-20 22:10:16 +0000596 // Immediates. There can be two in some cases
Sean Callanan04cc3072009-12-19 02:59:52 +0000597 uint8_t numImmediatesConsumed;
598 uint8_t numImmediatesTranslated;
599 uint64_t immediates[2];
Craig Topperfb39f972012-07-31 04:58:05 +0000600
Richard Smith6a6967e2014-04-20 22:10:16 +0000601 // A register or immediate operand encoded into the opcode
Sean Callanan04cc3072009-12-19 02:59:52 +0000602 Reg opcodeRegister;
Craig Topperfb39f972012-07-31 04:58:05 +0000603
Richard Smith6a6967e2014-04-20 22:10:16 +0000604 // Portions of the ModR/M byte
Craig Topperfb39f972012-07-31 04:58:05 +0000605
Richard Smith6a6967e2014-04-20 22:10:16 +0000606 // These fields determine the allowable values for the ModR/M fields, which
607 // depend on operand and address widths.
Sean Callanan04cc3072009-12-19 02:59:52 +0000608 EABase eaBaseBase;
609 EABase eaRegBase;
610 Reg regBase;
611
Richard Smith6a6967e2014-04-20 22:10:16 +0000612 // The Mod and R/M fields can encode a base for an effective address, or a
613 // register. These are separated into two fields here.
Sean Callanan04cc3072009-12-19 02:59:52 +0000614 EABase eaBase;
615 EADisplacement eaDisplacement;
Richard Smith6a6967e2014-04-20 22:10:16 +0000616 // The reg field always encodes a register
Sean Callanan04cc3072009-12-19 02:59:52 +0000617 Reg reg;
Craig Topperfb39f972012-07-31 04:58:05 +0000618
Richard Smith6a6967e2014-04-20 22:10:16 +0000619 // SIB state
Sean Callanan04cc3072009-12-19 02:59:52 +0000620 SIBIndex sibIndex;
621 uint8_t sibScale;
622 SIBBase sibBase;
Craig Topperb8aec082012-08-01 07:39:18 +0000623
Patrik Hagglund31998382014-04-28 12:12:27 +0000624 ArrayRef<OperandSpecifier> operands;
Sean Callanan04cc3072009-12-19 02:59:52 +0000625};
626
Richard Smith6a6967e2014-04-20 22:10:16 +0000627/// \brief Decode one instruction and store the decoding results in
628/// a buffer provided by the consumer.
629/// \param insn The buffer to store the instruction in. Allocated by the
630/// consumer.
631/// \param reader The byteReader_t for the bytes to be read.
632/// \param readerArg An argument to pass to the reader for storing context
633/// specific to the consumer. May be NULL.
634/// \param logger The dlog_t to be used in printing status messages from the
635/// disassembler. May be NULL.
636/// \param loggerArg An argument to pass to the logger for storing context
637/// specific to the logger. May be NULL.
638/// \param startLoc The address (in the reader's address space) of the first
639/// byte in the instruction.
640/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
641/// \return Nonzero if there was an error during decode, 0 otherwise.
Richard Smith3c3410f2014-04-20 21:56:02 +0000642int decodeInstruction(InternalInstruction *insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000643 byteReader_t reader,
Richard Smith3c3410f2014-04-20 21:56:02 +0000644 const void *readerArg,
Sean Callanan04cc3072009-12-19 02:59:52 +0000645 dlog_t logger,
Richard Smith3c3410f2014-04-20 21:56:02 +0000646 void *loggerArg,
647 const void *miiArg,
Sean Callanan04cc3072009-12-19 02:59:52 +0000648 uint64_t startLoc,
649 DisassemblerMode mode);
650
Richard Smith6a6967e2014-04-20 22:10:16 +0000651/// \brief Print a message to debugs()
652/// \param file The name of the file printing the debug message.
653/// \param line The line number that printed the debug message.
654/// \param s The message to print.
Richard Smith89ee75d2014-04-20 21:07:34 +0000655void Debug(const char *file, unsigned line, const char *s);
Sean Callanan010b3732010-04-02 21:23:51 +0000656
Richard Smith89ee75d2014-04-20 21:07:34 +0000657const char *GetInstrName(unsigned Opcode, const void *mii);
Benjamin Kramer478e8de2012-02-11 14:50:54 +0000658
Richard Smith89ee75d2014-04-20 21:07:34 +0000659} // namespace X86Disassembler
660} // namespace llvm
Craig Topperfb39f972012-07-31 04:58:05 +0000661
Sean Callanan04cc3072009-12-19 02:59:52 +0000662#endif