blob: b07fd0b17d352333331424f27915d2710391353f [file] [log] [blame]
Richard Smith89ee75d2014-04-20 21:07:34 +00001//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is part of the X86 Disassembler.
11// It contains the public interface of the instruction decoder.
12// Documentation for the disassembler can be found in X86Disassembler.h.
13//
14//===----------------------------------------------------------------------===//
Sean Callanan04cc3072009-12-19 02:59:52 +000015
Benjamin Kramera7c40ef2014-08-13 16:26:38 +000016#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
17#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H
Sean Callanan04cc3072009-12-19 02:59:52 +000018
Sean Callanan04cc3072009-12-19 02:59:52 +000019#include "X86DisassemblerDecoderCommon.h"
Patrik Hagglund31998382014-04-28 12:12:27 +000020#include "llvm/ADT/ArrayRef.h"
Craig Topperfb39f972012-07-31 04:58:05 +000021
Richard Smith89ee75d2014-04-20 21:07:34 +000022namespace llvm {
23namespace X86Disassembler {
24
Richard Smith6a6967e2014-04-20 22:10:16 +000025// Accessor functions for various fields of an Intel instruction
Sean Callananc3fd5232011-03-15 01:23:15 +000026#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
27#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
28#define rmFromModRM(modRM) ((modRM) & 0x7)
29#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
30#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
31#define baseFromSIB(sib) ((sib) & 0x7)
32#define wFromREX(rex) (((rex) & 0x8) >> 3)
33#define rFromREX(rex) (((rex) & 0x4) >> 2)
34#define xFromREX(rex) (((rex) & 0x2) >> 1)
35#define bFromREX(rex) ((rex) & 0x1)
Craig Topperfb39f972012-07-31 04:58:05 +000036
Elena Demikhovsky371e3632013-12-25 11:40:51 +000037#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
38#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
39#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
40#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
41#define mmFromEVEX2of4(evex) ((evex) & 0x3)
42#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
43#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
44#define ppFromEVEX3of4(evex) ((evex) & 0x3)
45#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
46#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
47#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
48#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
49#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
50#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
51
Sean Callananc3fd5232011-03-15 01:23:15 +000052#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
53#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
54#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
55#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
56#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
57#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
58#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
59#define ppFromVEX3of3(vex) ((vex) & 0x3)
60
61#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
62#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
63#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
64#define ppFromVEX2of2(vex) ((vex) & 0x3)
Sean Callanan04cc3072009-12-19 02:59:52 +000065
Craig Topper9e3e38a2013-10-03 05:17:48 +000066#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
67#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
68#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
69#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
70#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
71#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
72#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
73#define ppFromXOP3of3(xop) ((xop) & 0x3)
74
Richard Smith6a6967e2014-04-20 22:10:16 +000075// These enums represent Intel registers for use by the decoder.
Sean Callanan04cc3072009-12-19 02:59:52 +000076#define REGS_8BIT \
77 ENTRY(AL) \
78 ENTRY(CL) \
79 ENTRY(DL) \
80 ENTRY(BL) \
81 ENTRY(AH) \
82 ENTRY(CH) \
83 ENTRY(DH) \
84 ENTRY(BH) \
85 ENTRY(R8B) \
86 ENTRY(R9B) \
87 ENTRY(R10B) \
88 ENTRY(R11B) \
89 ENTRY(R12B) \
90 ENTRY(R13B) \
91 ENTRY(R14B) \
92 ENTRY(R15B) \
93 ENTRY(SPL) \
94 ENTRY(BPL) \
95 ENTRY(SIL) \
96 ENTRY(DIL)
97
98#define EA_BASES_16BIT \
99 ENTRY(BX_SI) \
100 ENTRY(BX_DI) \
101 ENTRY(BP_SI) \
102 ENTRY(BP_DI) \
103 ENTRY(SI) \
104 ENTRY(DI) \
105 ENTRY(BP) \
106 ENTRY(BX) \
107 ENTRY(R8W) \
108 ENTRY(R9W) \
109 ENTRY(R10W) \
110 ENTRY(R11W) \
111 ENTRY(R12W) \
112 ENTRY(R13W) \
113 ENTRY(R14W) \
114 ENTRY(R15W)
115
116#define REGS_16BIT \
117 ENTRY(AX) \
118 ENTRY(CX) \
119 ENTRY(DX) \
120 ENTRY(BX) \
121 ENTRY(SP) \
122 ENTRY(BP) \
123 ENTRY(SI) \
124 ENTRY(DI) \
125 ENTRY(R8W) \
126 ENTRY(R9W) \
127 ENTRY(R10W) \
128 ENTRY(R11W) \
129 ENTRY(R12W) \
130 ENTRY(R13W) \
131 ENTRY(R14W) \
132 ENTRY(R15W)
133
134#define EA_BASES_32BIT \
135 ENTRY(EAX) \
136 ENTRY(ECX) \
137 ENTRY(EDX) \
138 ENTRY(EBX) \
139 ENTRY(sib) \
140 ENTRY(EBP) \
141 ENTRY(ESI) \
142 ENTRY(EDI) \
143 ENTRY(R8D) \
144 ENTRY(R9D) \
145 ENTRY(R10D) \
146 ENTRY(R11D) \
147 ENTRY(R12D) \
148 ENTRY(R13D) \
149 ENTRY(R14D) \
150 ENTRY(R15D)
151
152#define REGS_32BIT \
153 ENTRY(EAX) \
154 ENTRY(ECX) \
155 ENTRY(EDX) \
156 ENTRY(EBX) \
157 ENTRY(ESP) \
158 ENTRY(EBP) \
159 ENTRY(ESI) \
160 ENTRY(EDI) \
161 ENTRY(R8D) \
162 ENTRY(R9D) \
163 ENTRY(R10D) \
164 ENTRY(R11D) \
165 ENTRY(R12D) \
166 ENTRY(R13D) \
167 ENTRY(R14D) \
168 ENTRY(R15D)
169
170#define EA_BASES_64BIT \
171 ENTRY(RAX) \
172 ENTRY(RCX) \
173 ENTRY(RDX) \
174 ENTRY(RBX) \
175 ENTRY(sib64) \
176 ENTRY(RBP) \
177 ENTRY(RSI) \
178 ENTRY(RDI) \
179 ENTRY(R8) \
180 ENTRY(R9) \
181 ENTRY(R10) \
182 ENTRY(R11) \
183 ENTRY(R12) \
184 ENTRY(R13) \
185 ENTRY(R14) \
186 ENTRY(R15)
187
188#define REGS_64BIT \
189 ENTRY(RAX) \
190 ENTRY(RCX) \
191 ENTRY(RDX) \
192 ENTRY(RBX) \
193 ENTRY(RSP) \
194 ENTRY(RBP) \
195 ENTRY(RSI) \
196 ENTRY(RDI) \
197 ENTRY(R8) \
198 ENTRY(R9) \
199 ENTRY(R10) \
200 ENTRY(R11) \
201 ENTRY(R12) \
202 ENTRY(R13) \
203 ENTRY(R14) \
204 ENTRY(R15)
205
206#define REGS_MMX \
207 ENTRY(MM0) \
208 ENTRY(MM1) \
209 ENTRY(MM2) \
210 ENTRY(MM3) \
211 ENTRY(MM4) \
212 ENTRY(MM5) \
213 ENTRY(MM6) \
214 ENTRY(MM7)
215
216#define REGS_XMM \
217 ENTRY(XMM0) \
218 ENTRY(XMM1) \
219 ENTRY(XMM2) \
220 ENTRY(XMM3) \
221 ENTRY(XMM4) \
222 ENTRY(XMM5) \
223 ENTRY(XMM6) \
224 ENTRY(XMM7) \
225 ENTRY(XMM8) \
226 ENTRY(XMM9) \
227 ENTRY(XMM10) \
228 ENTRY(XMM11) \
229 ENTRY(XMM12) \
230 ENTRY(XMM13) \
231 ENTRY(XMM14) \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000232 ENTRY(XMM15) \
233 ENTRY(XMM16) \
234 ENTRY(XMM17) \
235 ENTRY(XMM18) \
236 ENTRY(XMM19) \
237 ENTRY(XMM20) \
238 ENTRY(XMM21) \
239 ENTRY(XMM22) \
240 ENTRY(XMM23) \
241 ENTRY(XMM24) \
242 ENTRY(XMM25) \
243 ENTRY(XMM26) \
244 ENTRY(XMM27) \
245 ENTRY(XMM28) \
246 ENTRY(XMM29) \
247 ENTRY(XMM30) \
248 ENTRY(XMM31)
Sean Callananc3fd5232011-03-15 01:23:15 +0000249
250#define REGS_YMM \
251 ENTRY(YMM0) \
252 ENTRY(YMM1) \
253 ENTRY(YMM2) \
254 ENTRY(YMM3) \
255 ENTRY(YMM4) \
256 ENTRY(YMM5) \
257 ENTRY(YMM6) \
258 ENTRY(YMM7) \
259 ENTRY(YMM8) \
260 ENTRY(YMM9) \
261 ENTRY(YMM10) \
262 ENTRY(YMM11) \
263 ENTRY(YMM12) \
264 ENTRY(YMM13) \
265 ENTRY(YMM14) \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000266 ENTRY(YMM15) \
267 ENTRY(YMM16) \
268 ENTRY(YMM17) \
269 ENTRY(YMM18) \
270 ENTRY(YMM19) \
271 ENTRY(YMM20) \
272 ENTRY(YMM21) \
273 ENTRY(YMM22) \
274 ENTRY(YMM23) \
275 ENTRY(YMM24) \
276 ENTRY(YMM25) \
277 ENTRY(YMM26) \
278 ENTRY(YMM27) \
279 ENTRY(YMM28) \
280 ENTRY(YMM29) \
281 ENTRY(YMM30) \
282 ENTRY(YMM31)
283
284#define REGS_ZMM \
285 ENTRY(ZMM0) \
286 ENTRY(ZMM1) \
287 ENTRY(ZMM2) \
288 ENTRY(ZMM3) \
289 ENTRY(ZMM4) \
290 ENTRY(ZMM5) \
291 ENTRY(ZMM6) \
292 ENTRY(ZMM7) \
293 ENTRY(ZMM8) \
294 ENTRY(ZMM9) \
295 ENTRY(ZMM10) \
296 ENTRY(ZMM11) \
297 ENTRY(ZMM12) \
298 ENTRY(ZMM13) \
299 ENTRY(ZMM14) \
300 ENTRY(ZMM15) \
301 ENTRY(ZMM16) \
302 ENTRY(ZMM17) \
303 ENTRY(ZMM18) \
304 ENTRY(ZMM19) \
305 ENTRY(ZMM20) \
306 ENTRY(ZMM21) \
307 ENTRY(ZMM22) \
308 ENTRY(ZMM23) \
309 ENTRY(ZMM24) \
310 ENTRY(ZMM25) \
311 ENTRY(ZMM26) \
312 ENTRY(ZMM27) \
313 ENTRY(ZMM28) \
314 ENTRY(ZMM29) \
315 ENTRY(ZMM30) \
316 ENTRY(ZMM31)
Craig Topperfb39f972012-07-31 04:58:05 +0000317
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000318#define REGS_MASKS \
319 ENTRY(K0) \
320 ENTRY(K1) \
321 ENTRY(K2) \
322 ENTRY(K3) \
323 ENTRY(K4) \
324 ENTRY(K5) \
325 ENTRY(K6) \
326 ENTRY(K7)
327
Sean Callanan04cc3072009-12-19 02:59:52 +0000328#define REGS_SEGMENT \
329 ENTRY(ES) \
330 ENTRY(CS) \
331 ENTRY(SS) \
332 ENTRY(DS) \
333 ENTRY(FS) \
334 ENTRY(GS)
Craig Topperfb39f972012-07-31 04:58:05 +0000335
Sean Callanan04cc3072009-12-19 02:59:52 +0000336#define REGS_DEBUG \
337 ENTRY(DR0) \
338 ENTRY(DR1) \
339 ENTRY(DR2) \
340 ENTRY(DR3) \
341 ENTRY(DR4) \
342 ENTRY(DR5) \
343 ENTRY(DR6) \
Craig Topperc4b12162014-12-26 18:20:05 +0000344 ENTRY(DR7) \
345 ENTRY(DR8) \
346 ENTRY(DR9) \
347 ENTRY(DR10) \
348 ENTRY(DR11) \
349 ENTRY(DR12) \
350 ENTRY(DR13) \
351 ENTRY(DR14) \
352 ENTRY(DR15)
Sean Callanan04cc3072009-12-19 02:59:52 +0000353
Sean Callanane7e1cf92010-05-06 20:59:00 +0000354#define REGS_CONTROL \
355 ENTRY(CR0) \
356 ENTRY(CR1) \
357 ENTRY(CR2) \
358 ENTRY(CR3) \
359 ENTRY(CR4) \
360 ENTRY(CR5) \
361 ENTRY(CR6) \
362 ENTRY(CR7) \
Craig Topperd5b39232014-12-26 18:19:44 +0000363 ENTRY(CR8) \
364 ENTRY(CR9) \
365 ENTRY(CR10) \
366 ENTRY(CR11) \
367 ENTRY(CR12) \
368 ENTRY(CR13) \
369 ENTRY(CR14) \
370 ENTRY(CR15)
Craig Topperfb39f972012-07-31 04:58:05 +0000371
Ahmed Bougacha85dc93c2016-07-14 14:53:21 +0000372#define REGS_BOUND \
373 ENTRY(BND0) \
374 ENTRY(BND1) \
375 ENTRY(BND2) \
376 ENTRY(BND3)
377
Sean Callanan04cc3072009-12-19 02:59:52 +0000378#define ALL_EA_BASES \
379 EA_BASES_16BIT \
380 EA_BASES_32BIT \
381 EA_BASES_64BIT
Craig Topperfb39f972012-07-31 04:58:05 +0000382
Sean Callanan04cc3072009-12-19 02:59:52 +0000383#define ALL_SIB_BASES \
384 REGS_32BIT \
385 REGS_64BIT
386
387#define ALL_REGS \
388 REGS_8BIT \
389 REGS_16BIT \
390 REGS_32BIT \
391 REGS_64BIT \
392 REGS_MMX \
393 REGS_XMM \
Sean Callananc3fd5232011-03-15 01:23:15 +0000394 REGS_YMM \
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000395 REGS_ZMM \
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000396 REGS_MASKS \
Sean Callanan04cc3072009-12-19 02:59:52 +0000397 REGS_SEGMENT \
398 REGS_DEBUG \
Sean Callanane7e1cf92010-05-06 20:59:00 +0000399 REGS_CONTROL \
Ahmed Bougacha85dc93c2016-07-14 14:53:21 +0000400 REGS_BOUND \
Sean Callanan04cc3072009-12-19 02:59:52 +0000401 ENTRY(RIP)
402
Richard Smith6a6967e2014-04-20 22:10:16 +0000403/// \brief All possible values of the base field for effective-address
404/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
405/// We distinguish between bases (EA_BASE_*) and registers that just happen
406/// to be referred to when Mod == 0b11 (EA_REG_*).
407enum EABase {
Sean Callanan04cc3072009-12-19 02:59:52 +0000408 EA_BASE_NONE,
409#define ENTRY(x) EA_BASE_##x,
410 ALL_EA_BASES
411#undef ENTRY
412#define ENTRY(x) EA_REG_##x,
413 ALL_REGS
414#undef ENTRY
415 EA_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000416};
Craig Topperfb39f972012-07-31 04:58:05 +0000417
Richard Smith6a6967e2014-04-20 22:10:16 +0000418/// \brief All possible values of the SIB index field.
419/// borrows entries from ALL_EA_BASES with the special case that
420/// sib is synonymous with NONE.
421/// Vector SIB: index can be XMM or YMM.
422enum SIBIndex {
Sean Callanan04cc3072009-12-19 02:59:52 +0000423 SIB_INDEX_NONE,
424#define ENTRY(x) SIB_INDEX_##x,
425 ALL_EA_BASES
Manman Rena0982042012-06-26 19:47:59 +0000426 REGS_XMM
427 REGS_YMM
Elena Demikhovsky003e7d72013-07-28 08:28:38 +0000428 REGS_ZMM
Sean Callanan04cc3072009-12-19 02:59:52 +0000429#undef ENTRY
430 SIB_INDEX_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000431};
Craig Topperfb39f972012-07-31 04:58:05 +0000432
Richard Smith6a6967e2014-04-20 22:10:16 +0000433/// \brief All possible values of the SIB base field.
434enum SIBBase {
Sean Callanan04cc3072009-12-19 02:59:52 +0000435 SIB_BASE_NONE,
436#define ENTRY(x) SIB_BASE_##x,
437 ALL_SIB_BASES
438#undef ENTRY
439 SIB_BASE_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000440};
Sean Callanan04cc3072009-12-19 02:59:52 +0000441
Richard Smith6a6967e2014-04-20 22:10:16 +0000442/// \brief Possible displacement types for effective-address computations.
Sean Callanan04cc3072009-12-19 02:59:52 +0000443typedef enum {
444 EA_DISP_NONE,
445 EA_DISP_8,
446 EA_DISP_16,
447 EA_DISP_32
448} EADisplacement;
449
Richard Smith6a6967e2014-04-20 22:10:16 +0000450/// \brief All possible values of the reg field in the ModR/M byte.
451enum Reg {
Sean Callanan2f9443f2009-12-22 02:07:42 +0000452#define ENTRY(x) MODRM_REG_##x,
Sean Callanan04cc3072009-12-19 02:59:52 +0000453 ALL_REGS
454#undef ENTRY
Sean Callanan2f9443f2009-12-22 02:07:42 +0000455 MODRM_REG_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000456};
Craig Topperfb39f972012-07-31 04:58:05 +0000457
Richard Smith6a6967e2014-04-20 22:10:16 +0000458/// \brief All possible segment overrides.
459enum SegmentOverride {
Sean Callanan04cc3072009-12-19 02:59:52 +0000460 SEG_OVERRIDE_NONE,
461 SEG_OVERRIDE_CS,
462 SEG_OVERRIDE_SS,
463 SEG_OVERRIDE_DS,
464 SEG_OVERRIDE_ES,
465 SEG_OVERRIDE_FS,
466 SEG_OVERRIDE_GS,
467 SEG_OVERRIDE_max
Richard Smith6a6967e2014-04-20 22:10:16 +0000468};
Craig Topperfb39f972012-07-31 04:58:05 +0000469
Richard Smith6a6967e2014-04-20 22:10:16 +0000470/// \brief Possible values for the VEX.m-mmmm field
471enum VEXLeadingOpcodeByte {
Sean Callananc3fd5232011-03-15 01:23:15 +0000472 VEX_LOB_0F = 0x1,
473 VEX_LOB_0F38 = 0x2,
Craig Topper42e8a632013-10-03 06:18:26 +0000474 VEX_LOB_0F3A = 0x3
Richard Smith6a6967e2014-04-20 22:10:16 +0000475};
Sean Callananc3fd5232011-03-15 01:23:15 +0000476
Richard Smith6a6967e2014-04-20 22:10:16 +0000477enum XOPMapSelect {
Craig Topper9e3e38a2013-10-03 05:17:48 +0000478 XOP_MAP_SELECT_8 = 0x8,
479 XOP_MAP_SELECT_9 = 0x9,
480 XOP_MAP_SELECT_A = 0xA
Richard Smith6a6967e2014-04-20 22:10:16 +0000481};
Craig Topper9e3e38a2013-10-03 05:17:48 +0000482
Richard Smith6a6967e2014-04-20 22:10:16 +0000483/// \brief Possible values for the VEX.pp/EVEX.pp field
484enum VEXPrefixCode {
Sean Callananc3fd5232011-03-15 01:23:15 +0000485 VEX_PREFIX_NONE = 0x0,
486 VEX_PREFIX_66 = 0x1,
487 VEX_PREFIX_F3 = 0x2,
488 VEX_PREFIX_F2 = 0x3
Richard Smith6a6967e2014-04-20 22:10:16 +0000489};
Sean Callanan04cc3072009-12-19 02:59:52 +0000490
Richard Smith6a6967e2014-04-20 22:10:16 +0000491enum VectorExtensionType {
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000492 TYPE_NO_VEX_XOP = 0x0,
493 TYPE_VEX_2B = 0x1,
494 TYPE_VEX_3B = 0x2,
495 TYPE_EVEX = 0x3,
496 TYPE_XOP = 0x4
Richard Smith6a6967e2014-04-20 22:10:16 +0000497};
Craig Topper9e3e38a2013-10-03 05:17:48 +0000498
Richard Smith6a6967e2014-04-20 22:10:16 +0000499/// \brief Type for the byte reader that the consumer must provide to
500/// the decoder. Reads a single byte from the instruction's address space.
501/// \param arg A baton that the consumer can associate with any internal
502/// state that it needs.
503/// \param byte A pointer to a single byte in memory that should be set to
504/// contain the value at address.
505/// \param address The address in the instruction's address space that should
506/// be read from.
507/// \return -1 if the byte cannot be read for any reason; 0 otherwise.
508typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
Sean Callanan04cc3072009-12-19 02:59:52 +0000509
Richard Smith6a6967e2014-04-20 22:10:16 +0000510/// \brief Type for the logging function that the consumer can provide to
511/// get debugging output from the decoder.
512/// \param arg A baton that the consumer can associate with any internal
513/// state that it needs.
514/// \param log A string that contains the message. Will be reused after
515/// the logger returns.
516typedef void (*dlog_t)(void *arg, const char *log);
Sean Callanan04cc3072009-12-19 02:59:52 +0000517
Richard Smith6a6967e2014-04-20 22:10:16 +0000518/// The specification for how to extract and interpret a full instruction and
519/// its operands.
Richard Smith82b47d52014-04-20 21:35:26 +0000520struct InstructionSpecifier {
521 uint16_t operands;
522};
523
Richard Smith6a6967e2014-04-20 22:10:16 +0000524/// The x86 internal instruction, which is produced by the decoder.
Sean Callanan04cc3072009-12-19 02:59:52 +0000525struct InternalInstruction {
Richard Smith6a6967e2014-04-20 22:10:16 +0000526 // Reader interface (C)
Sean Callanan04cc3072009-12-19 02:59:52 +0000527 byteReader_t reader;
Richard Smith6a6967e2014-04-20 22:10:16 +0000528 // Opaque value passed to the reader
Roman Divacky67923802012-09-05 21:17:34 +0000529 const void* readerArg;
Richard Smith6a6967e2014-04-20 22:10:16 +0000530 // The address of the next byte to read via the reader
Sean Callanan04cc3072009-12-19 02:59:52 +0000531 uint64_t readerCursor;
532
Richard Smith6a6967e2014-04-20 22:10:16 +0000533 // Logger interface (C)
Sean Callanan04cc3072009-12-19 02:59:52 +0000534 dlog_t dlog;
Richard Smith6a6967e2014-04-20 22:10:16 +0000535 // Opaque value passed to the logger
Sean Callanan04cc3072009-12-19 02:59:52 +0000536 void* dlogArg;
537
Richard Smith6a6967e2014-04-20 22:10:16 +0000538 // General instruction information
Craig Topperfb39f972012-07-31 04:58:05 +0000539
Richard Smith6a6967e2014-04-20 22:10:16 +0000540 // The mode to disassemble for (64-bit, protected, real)
Sean Callanan04cc3072009-12-19 02:59:52 +0000541 DisassemblerMode mode;
Richard Smith6a6967e2014-04-20 22:10:16 +0000542 // The start of the instruction, usable with the reader
Sean Callanan04cc3072009-12-19 02:59:52 +0000543 uint64_t startLocation;
Richard Smith6a6967e2014-04-20 22:10:16 +0000544 // The length of the instruction, in bytes
Sean Callanan04cc3072009-12-19 02:59:52 +0000545 size_t length;
Craig Topperfb39f972012-07-31 04:58:05 +0000546
Richard Smith6a6967e2014-04-20 22:10:16 +0000547 // Prefix state
Craig Topperfb39f972012-07-31 04:58:05 +0000548
Richard Smith6a6967e2014-04-20 22:10:16 +0000549 // 1 if the prefix byte corresponding to the entry is present; 0 if not
Sean Callanan04cc3072009-12-19 02:59:52 +0000550 uint8_t prefixPresent[0x100];
Richard Smith6a6967e2014-04-20 22:10:16 +0000551 // contains the location (for use with the reader) of the prefix byte
Sean Callanan04cc3072009-12-19 02:59:52 +0000552 uint64_t prefixLocations[0x100];
Richard Smith6a6967e2014-04-20 22:10:16 +0000553 // The value of the vector extension prefix(EVEX/VEX/XOP), if present
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000554 uint8_t vectorExtensionPrefix[4];
Richard Smith6a6967e2014-04-20 22:10:16 +0000555 // The type of the vector extension prefix
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000556 VectorExtensionType vectorExtensionType;
Richard Smith6a6967e2014-04-20 22:10:16 +0000557 // The value of the REX prefix, if present
Sean Callanan04cc3072009-12-19 02:59:52 +0000558 uint8_t rexPrefix;
Richard Smith6a6967e2014-04-20 22:10:16 +0000559 // The location where a mandatory prefix would have to be (i.e., right before
560 // the opcode, or right before the REX prefix if one is present).
Sean Callanan04cc3072009-12-19 02:59:52 +0000561 uint64_t necessaryPrefixLocation;
Richard Smith6a6967e2014-04-20 22:10:16 +0000562 // The segment override type
Sean Callanan04cc3072009-12-19 02:59:52 +0000563 SegmentOverride segmentOverride;
Richard Smith6a6967e2014-04-20 22:10:16 +0000564 // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
Richard Smith5d5061032014-04-20 22:15:37 +0000565 bool xAcquireRelease;
Craig Topperfb39f972012-07-31 04:58:05 +0000566
Richard Smith6a6967e2014-04-20 22:10:16 +0000567 // Sizes of various critical pieces of data, in bytes
Sean Callanan04cc3072009-12-19 02:59:52 +0000568 uint8_t registerSize;
569 uint8_t addressSize;
570 uint8_t displacementSize;
571 uint8_t immediateSize;
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +0000572
Richard Smith6a6967e2014-04-20 22:10:16 +0000573 // Offsets from the start of the instruction to the pieces of data, which is
574 // needed to find relocation entries for adding symbolic operands.
Kevin Enderby6fbcd8d2012-02-23 18:18:17 +0000575 uint8_t displacementOffset;
576 uint8_t immediateOffset;
Craig Topperfb39f972012-07-31 04:58:05 +0000577
Richard Smith6a6967e2014-04-20 22:10:16 +0000578 // opcode state
Craig Topperfb39f972012-07-31 04:58:05 +0000579
Richard Smith6a6967e2014-04-20 22:10:16 +0000580 // The last byte of the opcode, not counting any ModR/M extension
Sean Callanan04cc3072009-12-19 02:59:52 +0000581 uint8_t opcode;
Craig Topperfb39f972012-07-31 04:58:05 +0000582
Richard Smith6a6967e2014-04-20 22:10:16 +0000583 // decode state
Craig Topperfb39f972012-07-31 04:58:05 +0000584
Richard Smith6a6967e2014-04-20 22:10:16 +0000585 // The type of opcode, used for indexing into the array of decode tables
Sean Callanan04cc3072009-12-19 02:59:52 +0000586 OpcodeType opcodeType;
Richard Smith6a6967e2014-04-20 22:10:16 +0000587 // The instruction ID, extracted from the decode table
Sean Callanan04cc3072009-12-19 02:59:52 +0000588 uint16_t instructionID;
Richard Smith6a6967e2014-04-20 22:10:16 +0000589 // The specifier for the instruction, from the instruction info table
Richard Smith3c3410f2014-04-20 21:56:02 +0000590 const InstructionSpecifier *spec;
Craig Topperfb39f972012-07-31 04:58:05 +0000591
Richard Smith6a6967e2014-04-20 22:10:16 +0000592 // state for additional bytes, consumed during operand decode. Pattern:
593 // consumed___ indicates that the byte was already consumed and does not
594 // need to be consumed again.
Sean Callananc3fd5232011-03-15 01:23:15 +0000595
Richard Smith6a6967e2014-04-20 22:10:16 +0000596 // The VEX.vvvv field, which contains a third register operand for some AVX
597 // instructions.
Sean Callananc3fd5232011-03-15 01:23:15 +0000598 Reg vvvv;
Craig Topperfb39f972012-07-31 04:58:05 +0000599
Richard Smith6a6967e2014-04-20 22:10:16 +0000600 // The writemask for AVX-512 instructions which is contained in EVEX.aaa
Elena Demikhovsky371e3632013-12-25 11:40:51 +0000601 Reg writemask;
602
Richard Smith6a6967e2014-04-20 22:10:16 +0000603 // The ModR/M byte, which contains most register operands and some portion of
604 // all memory operands.
Richard Smith5d5061032014-04-20 22:15:37 +0000605 bool consumedModRM;
Sean Callanan04cc3072009-12-19 02:59:52 +0000606 uint8_t modRM;
Craig Topperfb39f972012-07-31 04:58:05 +0000607
Richard Smith6a6967e2014-04-20 22:10:16 +0000608 // The SIB byte, used for more complex 32- or 64-bit memory operands
Richard Smith5d5061032014-04-20 22:15:37 +0000609 bool consumedSIB;
Sean Callanan04cc3072009-12-19 02:59:52 +0000610 uint8_t sib;
611
Richard Smith6a6967e2014-04-20 22:10:16 +0000612 // The displacement, used for memory operands
Richard Smith5d5061032014-04-20 22:15:37 +0000613 bool consumedDisplacement;
Sean Callanan04cc3072009-12-19 02:59:52 +0000614 int32_t displacement;
Craig Topperfb39f972012-07-31 04:58:05 +0000615
Richard Smith6a6967e2014-04-20 22:10:16 +0000616 // Immediates. There can be two in some cases
Sean Callanan04cc3072009-12-19 02:59:52 +0000617 uint8_t numImmediatesConsumed;
618 uint8_t numImmediatesTranslated;
619 uint64_t immediates[2];
Craig Topperfb39f972012-07-31 04:58:05 +0000620
Richard Smith6a6967e2014-04-20 22:10:16 +0000621 // A register or immediate operand encoded into the opcode
Sean Callanan04cc3072009-12-19 02:59:52 +0000622 Reg opcodeRegister;
Craig Topperfb39f972012-07-31 04:58:05 +0000623
Richard Smith6a6967e2014-04-20 22:10:16 +0000624 // Portions of the ModR/M byte
Craig Topperfb39f972012-07-31 04:58:05 +0000625
Richard Smith6a6967e2014-04-20 22:10:16 +0000626 // These fields determine the allowable values for the ModR/M fields, which
627 // depend on operand and address widths.
Sean Callanan04cc3072009-12-19 02:59:52 +0000628 EABase eaBaseBase;
629 EABase eaRegBase;
630 Reg regBase;
631
Richard Smith6a6967e2014-04-20 22:10:16 +0000632 // The Mod and R/M fields can encode a base for an effective address, or a
633 // register. These are separated into two fields here.
Sean Callanan04cc3072009-12-19 02:59:52 +0000634 EABase eaBase;
635 EADisplacement eaDisplacement;
Richard Smith6a6967e2014-04-20 22:10:16 +0000636 // The reg field always encodes a register
Sean Callanan04cc3072009-12-19 02:59:52 +0000637 Reg reg;
Craig Topperfb39f972012-07-31 04:58:05 +0000638
Richard Smith6a6967e2014-04-20 22:10:16 +0000639 // SIB state
Sean Callanan04cc3072009-12-19 02:59:52 +0000640 SIBIndex sibIndex;
641 uint8_t sibScale;
642 SIBBase sibBase;
Craig Topperb8aec082012-08-01 07:39:18 +0000643
Patrik Hagglund31998382014-04-28 12:12:27 +0000644 ArrayRef<OperandSpecifier> operands;
Sean Callanan04cc3072009-12-19 02:59:52 +0000645};
646
Richard Smith6a6967e2014-04-20 22:10:16 +0000647/// \brief Decode one instruction and store the decoding results in
648/// a buffer provided by the consumer.
649/// \param insn The buffer to store the instruction in. Allocated by the
650/// consumer.
651/// \param reader The byteReader_t for the bytes to be read.
652/// \param readerArg An argument to pass to the reader for storing context
653/// specific to the consumer. May be NULL.
654/// \param logger The dlog_t to be used in printing status messages from the
655/// disassembler. May be NULL.
656/// \param loggerArg An argument to pass to the logger for storing context
657/// specific to the logger. May be NULL.
658/// \param startLoc The address (in the reader's address space) of the first
659/// byte in the instruction.
660/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
661/// \return Nonzero if there was an error during decode, 0 otherwise.
Richard Smith3c3410f2014-04-20 21:56:02 +0000662int decodeInstruction(InternalInstruction *insn,
Sean Callanan04cc3072009-12-19 02:59:52 +0000663 byteReader_t reader,
Richard Smith3c3410f2014-04-20 21:56:02 +0000664 const void *readerArg,
Sean Callanan04cc3072009-12-19 02:59:52 +0000665 dlog_t logger,
Richard Smith3c3410f2014-04-20 21:56:02 +0000666 void *loggerArg,
667 const void *miiArg,
Sean Callanan04cc3072009-12-19 02:59:52 +0000668 uint64_t startLoc,
669 DisassemblerMode mode);
670
Richard Smith6a6967e2014-04-20 22:10:16 +0000671/// \brief Print a message to debugs()
672/// \param file The name of the file printing the debug message.
673/// \param line The line number that printed the debug message.
674/// \param s The message to print.
Richard Smith89ee75d2014-04-20 21:07:34 +0000675void Debug(const char *file, unsigned line, const char *s);
Sean Callanan010b3732010-04-02 21:23:51 +0000676
Mehdi Amini36d33fc2016-10-01 06:46:33 +0000677StringRef GetInstrName(unsigned Opcode, const void *mii);
Benjamin Kramer478e8de2012-02-11 14:50:54 +0000678
Richard Smith89ee75d2014-04-20 21:07:34 +0000679} // namespace X86Disassembler
680} // namespace llvm
Craig Topperfb39f972012-07-31 04:58:05 +0000681
Sean Callanan04cc3072009-12-19 02:59:52 +0000682#endif