Blame - lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h - platform/external/llvm

Jia Liu

31d157a

2012-02-18 12:03:15 +0000

[diff] [blame^]

1

/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===*

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

2

*

3

* The LLVM Compiler Infrastructure

4

*

5

* This file is distributed under the University of Illinois Open Source

6

* License. See LICENSE.TXT for details.

7

*

8

*===----------------------------------------------------------------------===*

9

*

10

* This file is part of the X86 Disassembler.

11

* It contains common definitions used by both the disassembler and the table

12

* generator.

13

* Documentation for the disassembler can be found in X86Disassembler.h.

14

*

15

*===----------------------------------------------------------------------===*/

16

17

/*

18

* This header file provides those definitions that need to be shared between

19

* the decoder and the table generator in a C-friendly manner.

20

*/

21

22

#ifndef X86DISASSEMBLERDECODERCOMMON_H

23

#define X86DISASSEMBLERDECODERCOMMON_H

24

Michael J. Spencer

1f6efa3

2010-11-29 18:16:10 +0000

[diff] [blame]

25

#include "llvm/Support/DataTypes.h"

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

26

27

#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers

28

#define CONTEXTS_SYM x86DisassemblerContexts

29

#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes

30

#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes

31

#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes

32

#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes

Joerg Sonnenberger

4a8ac8d

2011-04-04 16:58:13 +0000

[diff] [blame]

33

#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes

34

#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

35

36

#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"

37

#define CONTEXTS_STR "x86DisassemblerContexts"

38

#define ONEBYTE_STR "x86DisassemblerOneByteOpcodes"

39

#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"

40

#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"

41

#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"

Joerg Sonnenberger

4a8ac8d

2011-04-04 16:58:13 +0000

[diff] [blame]

42

#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"

43

#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

44

45

/*

46

* Attributes of an instruction that must be known before the opcode can be

47

* processed correctly. Most of these indicate the presence of particular

48

* prefixes, but ATTR_64BIT is simply an attribute of the decoding context.

49

*/

50

#define ATTRIBUTE_BITS \

51

ENUM_ENTRY(ATTR_NONE, 0x00) \

52

ENUM_ENTRY(ATTR_64BIT, 0x01) \

53

ENUM_ENTRY(ATTR_XS, 0x02) \

54

ENUM_ENTRY(ATTR_XD, 0x04) \

55

ENUM_ENTRY(ATTR_REXW, 0x08) \

Sean Callanan

a21e2ea

2011-03-15 01:23:15 +0000

[diff] [blame]

56

ENUM_ENTRY(ATTR_OPSIZE, 0x10) \

57

ENUM_ENTRY(ATTR_VEX, 0x20) \

58

ENUM_ENTRY(ATTR_VEXL, 0x40)

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

59

60

#define ENUM_ENTRY(n, v) n = v,

enum attributeBits {

ATTRIBUTE_BITS

ATTR_max

};

#undef ENUM_ENTRY

/*

* Combinations of the above attributes that are relevant to instruction

69

* decode. Although other combinations are possible, they can be reduced to

70

* these without affecting the ultimately decoded instruction.

71

*/

72

73

/* Class name Rank Rationale for rank assignment */

74

#define INSTRUCTION_CONTEXTS \

75

ENUM_ENTRY(IC, 0, "says nothing about the instruction") \

76

ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \

77

"64-bit mode but no more") \

78

ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \

79

"operands change width") \

80

ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \

81

"but not the operands") \

82

ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \

83

"but not the operands") \

Craig Topper

e1b4a1a

2011-10-01 19:54:56 +0000

[diff] [blame]

84

ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \

85

"operands change width") \

Craig Topper

29480fd

2011-10-11 04:34:23 +0000

[diff] [blame]

86

ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \

87

"operands change width") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

88

ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\

89

"change width; overrides IC_OPSIZE") \

90

ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \

91

ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \

92

"secondary") \

93

ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \

Craig Topper

e1b4a1a

2011-10-01 19:54:56 +0000

[diff] [blame]

94

ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \

Craig Topper

29480fd

2011-10-11 04:34:23 +0000

[diff] [blame]

95

ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

96

ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \

97

"opcode") \

98

ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \

99

"IC_64BIT_REXW_XS") \

100

ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \

101

"else because this changes most " \

Sean Callanan

a21e2ea

2011-03-15 01:23:15 +0000

[diff] [blame]

102

"operands' meaning") \

103

ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \

104

ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \

105

ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \

106

ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \

107

ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \

108

ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \

109

ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \

110

ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \

111

ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \

112

ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\

Craig Topper

6744a17

2011-10-04 06:30:42 +0000

[diff] [blame]

113

ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\

Craig Topper

c8eb880

2011-11-06 23:04:08 +0000

[diff] [blame]

114

ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \

115

ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize")

Sean Callanan

a21e2ea

2011-03-15 01:23:15 +0000

[diff] [blame]

116

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

117

118

#define ENUM_ENTRY(n, r, d) n,

typedef enum {

INSTRUCTION_CONTEXTS

IC_max

} InstructionContext;

#undef ENUM_ENTRY

/*

* Opcode types, which determine which decode table to use, both in the Intel

127

* manual and also for the decoder.

*/

typedef enum {

ONEBYTE = 0,

TWOBYTE = 1,

THREEBYTE_38 = 2,

Joerg Sonnenberger

4a8ac8d

2011-04-04 16:58:13 +0000

[diff] [blame]

133

THREEBYTE_3A = 3,

134

THREEBYTE_A6 = 4,

135

THREEBYTE_A7 = 5

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

} OpcodeType;

/*

* The following structs are used for the hierarchical decode table. After

140

* determining the instruction's class (i.e., which IC_* constant applies to

141

* it), the decoder reads the opcode. Some instructions require specific

142

* values of the ModR/M byte, so the ModR/M byte indexes into the final table.

143

*

144

* If a ModR/M byte is not required, "required" is left unset, and the values

145

* for each instructionID are identical.

146

*/

147

148

typedef uint16_t InstrUID;

149

150

/*

151

* ModRMDecisionType - describes the type of ModR/M decision, allowing the

152

* consumer to determine the number of entries in it.

153

*

154

* MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded

155

* instruction is the same.

156

* MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode

157

* corresponds to one instruction; otherwise, it corresponds to

158

* a different instruction.

Craig Topper

f41ab77

2012-02-09 08:58:07 +0000

[diff] [blame]

159

* MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This

160

corresponds to instructions that use reg field as opcode

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

161

* MODRM_FULL - Potentially, each value of the ModR/M byte could correspond

162

* to a different instruction.

*/

#define MODRMTYPES \

ENUM_ENTRY(MODRM_ONEENTRY) \

167

ENUM_ENTRY(MODRM_SPLITRM) \

Craig Topper

f41ab77

2012-02-09 08:58:07 +0000

[diff] [blame]

168

ENUM_ENTRY(MODRM_SPLITREG) \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

169

ENUM_ENTRY(MODRM_FULL)

170

171

#define ENUM_ENTRY(n) n,

typedef enum {

MODRMTYPES

MODRM_max

} ModRMDecisionType;

#undef ENUM_ENTRY

/*

* ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which

180

* instruction each possible value of the ModR/M byte corresponds to. Once

181

* this information is known, we have narrowed down to a single instruction.

182

*/

183

struct ModRMDecision {

184

uint8_t modrm_type;

185

186

/* The macro below must be defined wherever this file is included. */

INSTRUCTION_IDS

};

/*

* OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at

192

* given a particular opcode.

193

*/

194

struct OpcodeDecision {

195

struct ModRMDecision modRMDecisions[256];

};

/*

* ContextDecision - Specifies which opcode->instruction tables to look at given

200

* a particular context (set of attributes). Since there are many possible

201

* contexts, the decoder first uses CONTEXTS_SYM to determine which context

202

* applies given a specific set of attributes. Hence there are only IC_max

203

* entries in this table, rather than 2^(ATTR_max).

204

*/

205

struct ContextDecision {

206

struct OpcodeDecision opcodeDecisions[IC_max];

};

/*

* Physical encodings of instruction operands.

*/

#define ENCODINGS \

ENUM_ENTRY(ENCODING_NONE, "") \

215

ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \

216

ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \

Sean Callanan

a21e2ea

2011-03-15 01:23:15 +0000

[diff] [blame]

217

ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

218

ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \

219

ENUM_ENTRY(ENCODING_CW, "2-byte") \

220

ENUM_ENTRY(ENCODING_CD, "4-byte") \

221

ENUM_ENTRY(ENCODING_CP, "6-byte") \

222

ENUM_ENTRY(ENCODING_CO, "8-byte") \

223

ENUM_ENTRY(ENCODING_CT, "10-byte") \

224

ENUM_ENTRY(ENCODING_IB, "1-byte immediate") \

225

ENUM_ENTRY(ENCODING_IW, "2-byte") \

226

ENUM_ENTRY(ENCODING_ID, "4-byte") \

227

ENUM_ENTRY(ENCODING_IO, "8-byte") \

228

ENUM_ENTRY(ENCODING_RB, "(AL..DIL, R8L..R15L) Register code added to " \

229

"the opcode byte") \

230

ENUM_ENTRY(ENCODING_RW, "(AX..DI, R8W..R15W)") \

231

ENUM_ENTRY(ENCODING_RD, "(EAX..EDI, R8D..R15D)") \

232

ENUM_ENTRY(ENCODING_RO, "(RAX..RDI, R8..R15)") \

233

ENUM_ENTRY(ENCODING_I, "Position on floating-point stack added to the " \

234

"opcode byte") \

235

\

236

ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \

237

ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \

238

ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \

239

"opcode byte") \

240

ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \

241

"in type")

242

243

#define ENUM_ENTRY(n, d) n,

typedef enum {

ENCODINGS

ENCODING_max

} OperandEncoding;

#undef ENUM_ENTRY

/*

* Semantic interpretations of instruction operands.

*/

#define TYPES \

ENUM_ENTRY(TYPE_NONE, "") \

256

ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \

257

ENUM_ENTRY(TYPE_REL16, "2-byte") \

258

ENUM_ENTRY(TYPE_REL32, "4-byte") \

259

ENUM_ENTRY(TYPE_REL64, "8-byte") \

260

ENUM_ENTRY(TYPE_PTR1616, "2+2-byte segment+offset address") \

261

ENUM_ENTRY(TYPE_PTR1632, "2+4-byte") \

262

ENUM_ENTRY(TYPE_PTR1664, "2+8-byte") \

263

ENUM_ENTRY(TYPE_R8, "1-byte register operand") \

264

ENUM_ENTRY(TYPE_R16, "2-byte") \

265

ENUM_ENTRY(TYPE_R32, "4-byte") \

266

ENUM_ENTRY(TYPE_R64, "8-byte") \

267

ENUM_ENTRY(TYPE_IMM8, "1-byte immediate operand") \

268

ENUM_ENTRY(TYPE_IMM16, "2-byte") \

269

ENUM_ENTRY(TYPE_IMM32, "4-byte") \

270

ENUM_ENTRY(TYPE_IMM64, "8-byte") \

Sean Callanan

5edca81

2010-04-07 21:42:19 +0000

[diff] [blame]

271

ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

272

ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \

273

ENUM_ENTRY(TYPE_RM16, "2-byte") \

274

ENUM_ENTRY(TYPE_RM32, "4-byte") \

275

ENUM_ENTRY(TYPE_RM64, "8-byte") \

276

ENUM_ENTRY(TYPE_M, "Memory operand") \

277

ENUM_ENTRY(TYPE_M8, "1-byte") \

278

ENUM_ENTRY(TYPE_M16, "2-byte") \

279

ENUM_ENTRY(TYPE_M32, "4-byte") \

280

ENUM_ENTRY(TYPE_M64, "8-byte") \

Sean Callanan

7fb35a2

2009-12-22 21:12:55 +0000

[diff] [blame]

281

ENUM_ENTRY(TYPE_LEA, "Effective address") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

282

ENUM_ENTRY(TYPE_M128, "16-byte (SSE/SSE2)") \

Chris Lattner

b2ef4c1

2010-09-29 02:57:56 +0000

[diff] [blame]

283

ENUM_ENTRY(TYPE_M256, "256-byte (AVX)") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

284

ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \

285

ENUM_ENTRY(TYPE_M1632, "2+4-byte") \

286

ENUM_ENTRY(TYPE_M1664, "2+8-byte") \

287

ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \

288

ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \

289

ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \

290

ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \

291

ENUM_ENTRY(TYPE_MOFFS8, "1-byte memory offset (relative to segment " \

292

"base)") \

293

ENUM_ENTRY(TYPE_MOFFS16, "2-byte") \

294

ENUM_ENTRY(TYPE_MOFFS32, "4-byte") \

295

ENUM_ENTRY(TYPE_MOFFS64, "8-byte") \

296

ENUM_ENTRY(TYPE_SREG, "Byte with single bit set: 0 = ES, 1 = CS, " \

297

"2 = SS, 3 = DS, 4 = FS, 5 = GS") \

298

ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \

299

ENUM_ENTRY(TYPE_M64FP, "64-bit") \

300

ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \

301

ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \

302

"floating-point instructions") \

303

ENUM_ENTRY(TYPE_M32INT, "4-byte") \

304

ENUM_ENTRY(TYPE_M64INT, "8-byte") \

305

ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \

306

ENUM_ENTRY(TYPE_MM, "MMX register operand") \

307

ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \

308

ENUM_ENTRY(TYPE_MM64, "8-byte") \

309

ENUM_ENTRY(TYPE_XMM, "XMM register operand") \

310

ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \

311

ENUM_ENTRY(TYPE_XMM64, "8-byte") \

312

ENUM_ENTRY(TYPE_XMM128, "16-byte") \

Sean Callanan

a21e2ea

2011-03-15 01:23:15 +0000

[diff] [blame]

313

ENUM_ENTRY(TYPE_XMM256, "32-byte") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

314

ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \

315

ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \

316

ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \

Sean Callanan

1a8b789

2010-05-06 20:59:00 +0000

[diff] [blame]

317

ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \

Sean Callanan

8ed9f51

2009-12-19 02:59:52 +0000

[diff] [blame]

318

\

319

ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \

320

ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \

321

ENUM_ENTRY(TYPE_IMMv, "Immediate operand of operand size") \

322

ENUM_ENTRY(TYPE_RELv, "Immediate address of operand size") \

323

ENUM_ENTRY(TYPE_DUP0, "Duplicate of operand 0") \

324

ENUM_ENTRY(TYPE_DUP1, "operand 1") \

325

ENUM_ENTRY(TYPE_DUP2, "operand 2") \

326

ENUM_ENTRY(TYPE_DUP3, "operand 3") \

327

ENUM_ENTRY(TYPE_DUP4, "operand 4") \

328

ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")

329

330

#define ENUM_ENTRY(n, d) n,

typedef enum {

TYPES

TYPE_max

} OperandType;

#undef ENUM_ENTRY

/*

* OperandSpecifier - The specification for how to extract and interpret one

339

* operand.

340

*/

341

struct OperandSpecifier {

342

OperandEncoding encoding;

OperandType type;

};

/*

* Indicates where the opcode modifier (if any) is to be found. Extended

348

* opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.

349

*/

350

351

#define MODIFIER_TYPES \

352

ENUM_ENTRY(MODIFIER_NONE) \

353

ENUM_ENTRY(MODIFIER_OPCODE) \

354

ENUM_ENTRY(MODIFIER_MODRM)

355

356

#define ENUM_ENTRY(n) n,

typedef enum {

MODIFIER_TYPES

MODIFIER_max

} ModifierType;

#undef ENUM_ENTRY

#define X86_MAX_OPERANDS 5

364

365

/*

366

* The specification for how to extract and interpret a full instruction and

367

* its operands.

368

*/

369

struct InstructionSpecifier {

370

ModifierType modifierType;

371

uint8_t modifierBase;

372

struct OperandSpecifier operands[X86_MAX_OPERANDS];

373

374

/* The macro below must be defined wherever this file is included. */

375

INSTRUCTION_SPECIFIER_FIELDS

};

/*

* Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode

380

* are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,

* respectively.

*/

typedef enum {

MODE_16BIT,

MODE_32BIT,

MODE_64BIT

} DisassemblerMode;

#endif