blob: cb70a935acd661bbc1f2fe2e299750ffa1fec223 [file] [log] [blame]
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08001/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
2 *
3 * The LLVM Compiler Infrastructure
4 *
5 * This file is distributed under the University of Illinois Open Source
6 * License. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===*
9 *
10 * This file is part of the X86 Disassembler.
11 * It contains the public interface of the instruction decoder.
12 * Documentation for the disassembler can be found in X86Disassembler.h.
13 *
14 *===----------------------------------------------------------------------===*/
15
Nguyen Anh Quynhf721e312014-05-27 10:45:58 +080016/* Capstone Disassembly Engine */
17/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 */
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080018
Nguyen Anh Quynh57ab21b2014-01-02 12:44:20 +080019#ifndef CS_X86_DISASSEMBLERDECODER_H
20#define CS_X86_DISASSEMBLERDECODER_H
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080021
reverserbcf09f42015-04-09 18:28:19 +010022#if defined(CAPSTONE_HAS_OSXKERNEL)
23#include <libkern/libkern.h>
24#else
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080025#include <stdio.h>
reverserbcf09f42015-04-09 18:28:19 +010026#endif
27#include <stdint.h>
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080028
29#include "X86DisassemblerDecoderCommon.h"
30
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080031#include <stdint.h>
32
33/*
34 * Accessor functions for various fields of an Intel instruction
35 */
36#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
37#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
38#define rmFromModRM(modRM) ((modRM) & 0x7)
39#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
40#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
41#define baseFromSIB(sib) ((sib) & 0x7)
42#define wFromREX(rex) (((rex) & 0x8) >> 3)
43#define rFromREX(rex) (((rex) & 0x4) >> 2)
44#define xFromREX(rex) (((rex) & 0x2) >> 1)
45#define bFromREX(rex) ((rex) & 0x1)
46
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +080047#define rFromEVEX2of4(evex) (((~(evex)) & 0x80) >> 7)
48#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
49#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
50#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
51#define mmFromEVEX2of4(evex) ((evex) & 0x3)
52#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
53#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
54#define ppFromEVEX3of4(evex) ((evex) & 0x3)
55#define zFromEVEX4of4(evex) (((evex) & 0x80) >> 7)
56#define l2FromEVEX4of4(evex) (((evex) & 0x40) >> 6)
57#define lFromEVEX4of4(evex) (((evex) & 0x20) >> 5)
58#define bFromEVEX4of4(evex) (((evex) & 0x10) >> 4)
59#define v2FromEVEX4of4(evex) (((~evex) & 0x8) >> 3)
60#define aaaFromEVEX4of4(evex) ((evex) & 0x7)
61
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080062#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
63#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
64#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
65#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
66#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
67#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
68#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
69#define ppFromVEX3of3(vex) ((vex) & 0x3)
70
71#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
72#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
73#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
74#define ppFromVEX2of2(vex) ((vex) & 0x3)
75
Nguyen Anh Quynh36d143b2013-12-01 00:05:22 +080076#define rFromXOP2of3(xop) (((~(xop)) & 0x80) >> 7)
77#define xFromXOP2of3(xop) (((~(xop)) & 0x40) >> 6)
78#define bFromXOP2of3(xop) (((~(xop)) & 0x20) >> 5)
79#define mmmmmFromXOP2of3(xop) ((xop) & 0x1f)
80#define wFromXOP3of3(xop) (((xop) & 0x80) >> 7)
81#define vvvvFromXOP3of3(vex) (((~(vex)) & 0x78) >> 3)
82#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
83#define ppFromXOP3of3(xop) ((xop) & 0x3)
84
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080085/*
86 * These enums represent Intel registers for use by the decoder.
87 */
88
89#define REGS_8BIT \
90 ENTRY(AL) \
91 ENTRY(CL) \
92 ENTRY(DL) \
93 ENTRY(BL) \
94 ENTRY(AH) \
95 ENTRY(CH) \
96 ENTRY(DH) \
97 ENTRY(BH) \
98 ENTRY(R8B) \
99 ENTRY(R9B) \
100 ENTRY(R10B) \
101 ENTRY(R11B) \
102 ENTRY(R12B) \
103 ENTRY(R13B) \
104 ENTRY(R14B) \
105 ENTRY(R15B) \
106 ENTRY(SPL) \
107 ENTRY(BPL) \
108 ENTRY(SIL) \
109 ENTRY(DIL)
110
111#define EA_BASES_16BIT \
112 ENTRY(BX_SI) \
113 ENTRY(BX_DI) \
114 ENTRY(BP_SI) \
115 ENTRY(BP_DI) \
116 ENTRY(SI) \
117 ENTRY(DI) \
118 ENTRY(BP) \
119 ENTRY(BX) \
120 ENTRY(R8W) \
121 ENTRY(R9W) \
122 ENTRY(R10W) \
123 ENTRY(R11W) \
124 ENTRY(R12W) \
125 ENTRY(R13W) \
126 ENTRY(R14W) \
127 ENTRY(R15W)
128
129#define REGS_16BIT \
130 ENTRY(AX) \
131 ENTRY(CX) \
132 ENTRY(DX) \
133 ENTRY(BX) \
134 ENTRY(SP) \
135 ENTRY(BP) \
136 ENTRY(SI) \
137 ENTRY(DI) \
138 ENTRY(R8W) \
139 ENTRY(R9W) \
140 ENTRY(R10W) \
141 ENTRY(R11W) \
142 ENTRY(R12W) \
143 ENTRY(R13W) \
144 ENTRY(R14W) \
145 ENTRY(R15W)
146
147#define EA_BASES_32BIT \
148 ENTRY(EAX) \
149 ENTRY(ECX) \
150 ENTRY(EDX) \
151 ENTRY(EBX) \
152 ENTRY(sib) \
153 ENTRY(EBP) \
154 ENTRY(ESI) \
155 ENTRY(EDI) \
156 ENTRY(R8D) \
157 ENTRY(R9D) \
158 ENTRY(R10D) \
159 ENTRY(R11D) \
160 ENTRY(R12D) \
161 ENTRY(R13D) \
162 ENTRY(R14D) \
163 ENTRY(R15D)
164
165#define REGS_32BIT \
166 ENTRY(EAX) \
167 ENTRY(ECX) \
168 ENTRY(EDX) \
169 ENTRY(EBX) \
170 ENTRY(ESP) \
171 ENTRY(EBP) \
172 ENTRY(ESI) \
173 ENTRY(EDI) \
174 ENTRY(R8D) \
175 ENTRY(R9D) \
176 ENTRY(R10D) \
177 ENTRY(R11D) \
178 ENTRY(R12D) \
179 ENTRY(R13D) \
180 ENTRY(R14D) \
181 ENTRY(R15D)
182
183#define EA_BASES_64BIT \
184 ENTRY(RAX) \
185 ENTRY(RCX) \
186 ENTRY(RDX) \
187 ENTRY(RBX) \
188 ENTRY(sib64) \
189 ENTRY(RBP) \
190 ENTRY(RSI) \
191 ENTRY(RDI) \
192 ENTRY(R8) \
193 ENTRY(R9) \
194 ENTRY(R10) \
195 ENTRY(R11) \
196 ENTRY(R12) \
197 ENTRY(R13) \
198 ENTRY(R14) \
199 ENTRY(R15)
200
201#define REGS_64BIT \
202 ENTRY(RAX) \
203 ENTRY(RCX) \
204 ENTRY(RDX) \
205 ENTRY(RBX) \
206 ENTRY(RSP) \
207 ENTRY(RBP) \
208 ENTRY(RSI) \
209 ENTRY(RDI) \
210 ENTRY(R8) \
211 ENTRY(R9) \
212 ENTRY(R10) \
213 ENTRY(R11) \
214 ENTRY(R12) \
215 ENTRY(R13) \
216 ENTRY(R14) \
217 ENTRY(R15)
218
219#define REGS_MMX \
220 ENTRY(MM0) \
221 ENTRY(MM1) \
222 ENTRY(MM2) \
223 ENTRY(MM3) \
224 ENTRY(MM4) \
225 ENTRY(MM5) \
226 ENTRY(MM6) \
227 ENTRY(MM7)
228
229#define REGS_XMM \
230 ENTRY(XMM0) \
231 ENTRY(XMM1) \
232 ENTRY(XMM2) \
233 ENTRY(XMM3) \
234 ENTRY(XMM4) \
235 ENTRY(XMM5) \
236 ENTRY(XMM6) \
237 ENTRY(XMM7) \
238 ENTRY(XMM8) \
239 ENTRY(XMM9) \
240 ENTRY(XMM10) \
241 ENTRY(XMM11) \
242 ENTRY(XMM12) \
243 ENTRY(XMM13) \
244 ENTRY(XMM14) \
245 ENTRY(XMM15) \
246 ENTRY(XMM16) \
247 ENTRY(XMM17) \
248 ENTRY(XMM18) \
249 ENTRY(XMM19) \
250 ENTRY(XMM20) \
251 ENTRY(XMM21) \
252 ENTRY(XMM22) \
253 ENTRY(XMM23) \
254 ENTRY(XMM24) \
255 ENTRY(XMM25) \
256 ENTRY(XMM26) \
257 ENTRY(XMM27) \
258 ENTRY(XMM28) \
259 ENTRY(XMM29) \
260 ENTRY(XMM30) \
261 ENTRY(XMM31)
262
263
264#define REGS_YMM \
265 ENTRY(YMM0) \
266 ENTRY(YMM1) \
267 ENTRY(YMM2) \
268 ENTRY(YMM3) \
269 ENTRY(YMM4) \
270 ENTRY(YMM5) \
271 ENTRY(YMM6) \
272 ENTRY(YMM7) \
273 ENTRY(YMM8) \
274 ENTRY(YMM9) \
275 ENTRY(YMM10) \
276 ENTRY(YMM11) \
277 ENTRY(YMM12) \
278 ENTRY(YMM13) \
279 ENTRY(YMM14) \
280 ENTRY(YMM15) \
281 ENTRY(YMM16) \
282 ENTRY(YMM17) \
283 ENTRY(YMM18) \
284 ENTRY(YMM19) \
285 ENTRY(YMM20) \
286 ENTRY(YMM21) \
287 ENTRY(YMM22) \
288 ENTRY(YMM23) \
289 ENTRY(YMM24) \
290 ENTRY(YMM25) \
291 ENTRY(YMM26) \
292 ENTRY(YMM27) \
293 ENTRY(YMM28) \
294 ENTRY(YMM29) \
295 ENTRY(YMM30) \
296 ENTRY(YMM31)
297
298#define REGS_ZMM \
299 ENTRY(ZMM0) \
300 ENTRY(ZMM1) \
301 ENTRY(ZMM2) \
302 ENTRY(ZMM3) \
303 ENTRY(ZMM4) \
304 ENTRY(ZMM5) \
305 ENTRY(ZMM6) \
306 ENTRY(ZMM7) \
307 ENTRY(ZMM8) \
308 ENTRY(ZMM9) \
309 ENTRY(ZMM10) \
310 ENTRY(ZMM11) \
311 ENTRY(ZMM12) \
312 ENTRY(ZMM13) \
313 ENTRY(ZMM14) \
314 ENTRY(ZMM15) \
315 ENTRY(ZMM16) \
316 ENTRY(ZMM17) \
317 ENTRY(ZMM18) \
318 ENTRY(ZMM19) \
319 ENTRY(ZMM20) \
320 ENTRY(ZMM21) \
321 ENTRY(ZMM22) \
322 ENTRY(ZMM23) \
323 ENTRY(ZMM24) \
324 ENTRY(ZMM25) \
325 ENTRY(ZMM26) \
326 ENTRY(ZMM27) \
327 ENTRY(ZMM28) \
328 ENTRY(ZMM29) \
329 ENTRY(ZMM30) \
330 ENTRY(ZMM31)
331
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +0800332#define REGS_MASKS \
333 ENTRY(K0) \
334 ENTRY(K1) \
335 ENTRY(K2) \
336 ENTRY(K3) \
337 ENTRY(K4) \
338 ENTRY(K5) \
339 ENTRY(K6) \
340 ENTRY(K7)
341
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800342#define REGS_SEGMENT \
343 ENTRY(ES) \
344 ENTRY(CS) \
345 ENTRY(SS) \
346 ENTRY(DS) \
347 ENTRY(FS) \
348 ENTRY(GS)
349
350#define REGS_DEBUG \
351 ENTRY(DR0) \
352 ENTRY(DR1) \
353 ENTRY(DR2) \
354 ENTRY(DR3) \
355 ENTRY(DR4) \
356 ENTRY(DR5) \
357 ENTRY(DR6) \
358 ENTRY(DR7)
359
360#define REGS_CONTROL \
361 ENTRY(CR0) \
362 ENTRY(CR1) \
363 ENTRY(CR2) \
364 ENTRY(CR3) \
365 ENTRY(CR4) \
366 ENTRY(CR5) \
367 ENTRY(CR6) \
368 ENTRY(CR7) \
Nguyen Anh Quynh5f8c4232014-12-25 01:12:56 +0800369 ENTRY(CR8) \
370 ENTRY(CR9) \
371 ENTRY(CR10) \
372 ENTRY(CR11) \
373 ENTRY(CR12) \
374 ENTRY(CR13) \
375 ENTRY(CR14) \
376 ENTRY(CR15)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800377
378#define ALL_EA_BASES \
379 EA_BASES_16BIT \
380 EA_BASES_32BIT \
381 EA_BASES_64BIT
382
383#define ALL_SIB_BASES \
384 REGS_32BIT \
385 REGS_64BIT
386
387#define ALL_REGS \
388 REGS_8BIT \
389 REGS_16BIT \
390 REGS_32BIT \
391 REGS_64BIT \
392 REGS_MMX \
393 REGS_XMM \
394 REGS_YMM \
395 REGS_ZMM \
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +0800396 REGS_MASKS \
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800397 REGS_SEGMENT \
398 REGS_DEBUG \
399 REGS_CONTROL \
400 ENTRY(RIP)
401
402/*
403 * EABase - All possible values of the base field for effective-address
404 * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
405 * distinguish between bases (EA_BASE_*) and registers that just happen to be
406 * referred to when Mod == 0b11 (EA_REG_*).
407 */
408typedef enum {
409 EA_BASE_NONE,
410#define ENTRY(x) EA_BASE_##x,
411 ALL_EA_BASES
412#undef ENTRY
413#define ENTRY(x) EA_REG_##x,
414 ALL_REGS
415#undef ENTRY
416 EA_max
417} EABase;
418
419/*
420 * SIBIndex - All possible values of the SIB index field.
421 * Borrows entries from ALL_EA_BASES with the special case that
422 * sib is synonymous with NONE.
423 * Vector SIB: index can be XMM or YMM.
424 */
425typedef enum {
426 SIB_INDEX_NONE,
427#define ENTRY(x) SIB_INDEX_##x,
428 ALL_EA_BASES
429 REGS_XMM
430 REGS_YMM
431 REGS_ZMM
432#undef ENTRY
433 SIB_INDEX_max
434} SIBIndex;
435
436/*
437 * SIBBase - All possible values of the SIB base field.
438 */
439typedef enum {
440 SIB_BASE_NONE,
441#define ENTRY(x) SIB_BASE_##x,
442 ALL_SIB_BASES
443#undef ENTRY
444 SIB_BASE_max
445} SIBBase;
446
447/*
448 * EADisplacement - Possible displacement types for effective-address
449 * computations.
450 */
451typedef enum {
452 EA_DISP_NONE,
453 EA_DISP_8,
454 EA_DISP_16,
455 EA_DISP_32
456} EADisplacement;
457
458/*
459 * Reg - All possible values of the reg field in the ModR/M byte.
460 */
461typedef enum {
462#define ENTRY(x) MODRM_REG_##x,
463 ALL_REGS
464#undef ENTRY
465 MODRM_REG_max
466} Reg;
467
468/*
469 * SegmentOverride - All possible segment overrides.
470 */
471typedef enum {
472 SEG_OVERRIDE_NONE,
473 SEG_OVERRIDE_CS,
474 SEG_OVERRIDE_SS,
475 SEG_OVERRIDE_DS,
476 SEG_OVERRIDE_ES,
477 SEG_OVERRIDE_FS,
478 SEG_OVERRIDE_GS,
479 SEG_OVERRIDE_max
480} SegmentOverride;
481
482/*
483 * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
484 */
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800485typedef enum {
486 VEX_LOB_0F = 0x1,
487 VEX_LOB_0F38 = 0x2,
488 VEX_LOB_0F3A = 0x3
489} VEXLeadingOpcodeByte;
490
Nguyen Anh Quynh36d143b2013-12-01 00:05:22 +0800491typedef enum {
492 XOP_MAP_SELECT_8 = 0x8,
493 XOP_MAP_SELECT_9 = 0x9,
494 XOP_MAP_SELECT_A = 0xA
495} XOPMapSelect;
496
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800497/*
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +0800498 * VEXPrefixCode - Possible values for the VEX.pp/EVEX.pp field
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800499 */
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800500typedef enum {
501 VEX_PREFIX_NONE = 0x0,
502 VEX_PREFIX_66 = 0x1,
503 VEX_PREFIX_F3 = 0x2,
504 VEX_PREFIX_F2 = 0x3
505} VEXPrefixCode;
506
Nguyen Anh Quynh36d143b2013-12-01 00:05:22 +0800507typedef enum {
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +0800508 TYPE_NO_VEX_XOP = 0x0,
509 TYPE_VEX_2B = 0x1,
510 TYPE_VEX_3B = 0x2,
511 TYPE_EVEX = 0x3,
512 TYPE_XOP = 0x4
513} VectorExtensionType;
Nguyen Anh Quynh36d143b2013-12-01 00:05:22 +0800514
Nguyen Anh Quynh22a5a762014-06-07 23:41:20 +0800515struct reader_info {
516 const uint8_t *code;
517 uint64_t size;
518 uint64_t offset;
519};
520
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800521/*
522 * byteReader_t - Type for the byte reader that the consumer must provide to
523 * the decoder. Reads a single byte from the instruction's address space.
524 * @param arg - A baton that the consumer can associate with any internal
525 * state that it needs.
526 * @param byte - A pointer to a single byte in memory that should be set to
527 * contain the value at address.
528 * @param address - The address in the instruction's address space that should
529 * be read from.
530 * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
531 */
Nguyen Anh Quynh22a5a762014-06-07 23:41:20 +0800532typedef int (*byteReader_t)(const struct reader_info *arg, uint8_t* byte, uint64_t address);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800533
534/*
535 * dlog_t - Type for the logging function that the consumer can provide to
536 * get debugging output from the decoder.
537 * @param arg - A baton that the consumer can associate with any internal
538 * state that it needs.
539 * @param log - A string that contains the message. Will be reused after
540 * the logger returns.
541 */
542typedef void (*dlog_t)(void* arg, const char *log);
543
Nguyen Anh Quynh0b690382014-08-13 13:01:50 +0800544/// The specification for how to extract and interpret a full instruction and
545/// its operands.
546struct InstructionSpecifier {
547 uint16_t operands;
548};
549
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800550/*
551 * The x86 internal instruction, which is produced by the decoder.
552 */
553typedef struct InternalInstruction {
Nguyen Anh Quynh22a5a762014-06-07 23:41:20 +0800554 // from here, all members must be initialized to ZERO to work properly
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800555 uint8_t operandSize;
Nguyen Anh Quynh5474d872014-06-07 12:56:44 +0800556 /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
557 uint8_t prefixPresent[0x100];
558 uint8_t prefix0, prefix1, prefix2, prefix3;
559 /* The value of the REX prefix, if present */
560 uint8_t rexPrefix;
561 /* The segment override type */
562 SegmentOverride segmentOverride;
Nguyen Anh Quynh0b690382014-08-13 13:01:50 +0800563 bool consumedModRM;
Nguyen Anh Quynh5474d872014-06-07 12:56:44 +0800564 uint8_t orgModRM; // save original modRM because we will modify modRM
565 /* The SIB byte, used for more complex 32- or 64-bit memory operands */
Nguyen Anh Quynh0b690382014-08-13 13:01:50 +0800566 bool consumedSIB;
Nguyen Anh Quynh5474d872014-06-07 12:56:44 +0800567 uint8_t sib;
568 /* The displacement, used for memory operands */
Nguyen Anh Quynh0b690382014-08-13 13:01:50 +0800569 bool consumedDisplacement;
Nguyen Anh Quynh5474d872014-06-07 12:56:44 +0800570 int32_t displacement;
571 /* The value of the two-byte escape prefix (usually 0x0f) */
572 uint8_t twoByteEscape;
573 /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
574 uint8_t threeByteEscape;
575 /* SIB state */
576 SIBIndex sibIndex;
577 uint8_t sibScale;
578 SIBBase sibBase;
Nguyen Anh Quynh22a5a762014-06-07 23:41:20 +0800579 uint8_t numImmediatesConsumed;
Nguyen Anh Quynh9cf88112014-06-16 18:32:34 +0800580 /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */
Nguyen Anh Quynh0b690382014-08-13 13:01:50 +0800581 bool xAcquireRelease;
Nguyen Anh Quynh22a5a762014-06-07 23:41:20 +0800582
Nguyen Anh Quynh4e20e8e2014-11-10 07:43:49 +0800583 /* contains the location (for use with the reader) of the prefix byte */
584 uint64_t prefixLocations[0x100];
585
Nguyen Anh Quynhbb6440c2014-06-24 21:46:54 +0800586 /* The value of the vector extension prefix(EVEX/VEX/XOP), if present */
587 uint8_t vectorExtensionPrefix[4];
588
Nguyen Anh Quynh22a5a762014-06-07 23:41:20 +0800589 // end-of-zero-members
Nguyen Anh Quynh5474d872014-06-07 12:56:44 +0800590
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800591 /* Reader interface (C) */
592 byteReader_t reader;
593 /* Opaque value passed to the reader */
594 const void* readerArg;
595 /* The address of the next byte to read via the reader */
596 uint64_t readerCursor;
597
598 /* Logger interface (C) */
599 dlog_t dlog;
600 /* Opaque value passed to the logger */
601 void* dlogArg;
602
603 /* General instruction information */
604
605 /* The mode to disassemble for (64-bit, protected, real) */
606 DisassemblerMode mode;
607 /* The start of the instruction, usable with the reader */
608 uint64_t startLocation;
609 /* The length of the instruction, in bytes */
610 size_t length;
611
612 /* Prefix state */
613
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +0800614 /* The type of the vector extension prefix */
615 VectorExtensionType vectorExtensionType;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800616
Nguyen Anh Quynhfbae42e2015-04-03 00:10:31 +0800617 /* The location where a mandatory prefix would have to be (i.e., right before
618 the opcode, or right before the REX prefix if one is present) */
619 uint64_t necessaryPrefixLocation;
620
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800621 /* Sizes of various critical pieces of data, in bytes */
622 uint8_t registerSize;
623 uint8_t addressSize;
624 uint8_t displacementSize;
625 uint8_t immediateSize;
626
Nguyen Anh Quynhf1ec5262014-06-25 22:03:18 +0800627 uint8_t immSize; // immediate size for X86_OP_IMM operand
628
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800629 /* Offsets from the start of the instruction to the pieces of data, which is
630 needed to find relocation entries for adding symbolic operands */
631 uint8_t displacementOffset;
632 uint8_t immediateOffset;
633
634 /* opcode state */
635
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800636 /* The last byte of the opcode, not counting any ModR/M extension */
637 uint8_t opcode;
638
639 /* decode state */
640
641 /* The type of opcode, used for indexing into the array of decode tables */
642 OpcodeType opcodeType;
643 /* The instruction ID, extracted from the decode table */
644 uint16_t instructionID;
645 /* The specifier for the instruction, from the instruction info table */
646 const struct InstructionSpecifier *spec;
647
648 /* state for additional bytes, consumed during operand decode. Pattern:
649 consumed___ indicates that the byte was already consumed and does not
650 need to be consumed again */
651
652 /* The VEX.vvvv field, which contains a third register operand for some AVX
653 instructions */
654 Reg vvvv;
655
Nguyen Anh Quynh13f40d22014-02-07 22:06:33 +0800656 /* The writemask for AVX-512 instructions which is contained in EVEX.aaa */
657 Reg writemask;
658
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800659 /* The ModR/M byte, which contains most register operands and some portion of
660 all memory operands */
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800661 uint8_t modRM;
662
Nguyen Anh Quynh7437a412014-04-25 10:51:37 +0800663 // special data to handle MOVcr, MOVdr, MOVrc, MOVrd
Nguyen Anh Quynh0902bf22014-04-24 22:46:25 +0800664 uint8_t firstByte; // save the first byte in stream
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800665
666 /* Immediates. There can be two in some cases */
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800667 uint8_t numImmediatesTranslated;
668 uint64_t immediates[2];
669
670 /* A register or immediate operand encoded into the opcode */
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800671 Reg opcodeRegister;
672
673 /* Portions of the ModR/M byte */
674
675 /* These fields determine the allowable values for the ModR/M fields, which
676 depend on operand and address widths */
677 EABase eaBaseBase;
678 EABase eaRegBase;
679 Reg regBase;
680
681 /* The Mod and R/M fields can encode a base for an effective address, or a
682 register. These are separated into two fields here */
683 EABase eaBase;
684 EADisplacement eaDisplacement;
685 /* The reg field always encodes a register */
686 Reg reg;
687
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800688 const struct OperandSpecifier *operands;
689} InternalInstruction;
690
691/* decodeInstruction - Decode one instruction and store the decoding results in
692 * a buffer provided by the consumer.
693 * @param insn - The buffer to store the instruction in. Allocated by the
694 * consumer.
695 * @param reader - The byteReader_t for the bytes to be read.
696 * @param readerArg - An argument to pass to the reader for storing context
697 * specific to the consumer. May be NULL.
698 * @param logger - The dlog_t to be used in printing status messages from the
699 * disassembler. May be NULL.
700 * @param loggerArg - An argument to pass to the logger for storing context
701 * specific to the logger. May be NULL.
702 * @param startLoc - The address (in the reader's address space) of the first
703 * byte in the instruction.
704 * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
705 * @return - Nonzero if there was an error during decode, 0 otherwise.
706 */
707int decodeInstruction(struct InternalInstruction* insn,
708 byteReader_t reader,
709 const void* readerArg,
710 uint64_t startLoc,
711 DisassemblerMode mode);
712
713//const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
714
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800715#endif