blob: 24db080b2635ae2175d07d93b92c06a895ceed3d [file] [log] [blame]
Daniel Dunbar40588742009-11-25 02:13:23 +00001//===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "DisassemblerEmitter.h"
11#include "CodeGenTarget.h"
Jim Grosbach0b6a44a2011-06-21 22:55:50 +000012#include "Error.h"
Daniel Dunbar40588742009-11-25 02:13:23 +000013#include "Record.h"
Sean Callanan8ed9f512009-12-19 02:59:52 +000014#include "X86DisassemblerTables.h"
15#include "X86RecognizableInstr.h"
Johnny Chenb68a3ee2010-04-02 22:27:38 +000016#include "ARMDecoderEmitter.h"
Owen Andersond8c87882011-02-18 21:51:29 +000017#include "FixedLenDecoderEmitter.h"
Johnny Chenb68a3ee2010-04-02 22:27:38 +000018
Daniel Dunbar40588742009-11-25 02:13:23 +000019using namespace llvm;
Sean Callanan8ed9f512009-12-19 02:59:52 +000020using namespace llvm::X86Disassembler;
21
22/// DisassemblerEmitter - Contains disassembler table emitters for various
23/// architectures.
24
25/// X86 Disassembler Emitter
26///
27/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
28/// THE END OF THIS COMMENT!
29///
30/// The X86 disassembler emitter is part of the X86 Disassembler, which is
31/// documented in lib/Target/X86/X86Disassembler.h.
32///
33/// The emitter produces the tables that the disassembler uses to translate
34/// instructions. The emitter generates the following tables:
35///
36/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
37/// instruction contexts. Although for each attribute there are cases where
38/// that attribute determines decoding, in the majority of cases decoding is
39/// the same whether or not an attribute is present. For example, a 64-bit
40/// instruction with an OPSIZE prefix and an XS prefix decodes the same way in
41/// all cases as a 64-bit instruction with only OPSIZE set. (The XS prefix
42/// may have effects on its execution, but does not change the instruction
43/// returned.) This allows considerable space savings in other tables.
Joerg Sonnenberger4a8ac8d2011-04-04 16:58:13 +000044/// - Six tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM,
45/// THREEBYTEA6_SYM, and THREEBYTEA7_SYM contain the hierarchy that the
46/// decoder traverses while decoding an instruction. At the lowest level of
47/// this hierarchy are instruction UIDs, 16-bit integers that can be used to
48/// uniquely identify the instruction and correspond exactly to its position
49/// in the list of CodeGenInstructions for the target.
Sean Callanan8ed9f512009-12-19 02:59:52 +000050/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
51/// each instruction and how to decode them.
52///
53/// During table generation, there may be conflicts between instructions that
54/// occupy the same space in the decode tables. These conflicts are resolved as
55/// follows in setTableFields() (X86DisassemblerTables.cpp)
56///
57/// - If the current context is the native context for one of the instructions
58/// (that is, the attributes specified for it in the LLVM tables specify
59/// precisely the current context), then it has priority.
60/// - If the current context isn't native for either of the instructions, then
61/// the higher-priority context wins (that is, the one that is more specific).
62/// That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
63/// - If the current context is native for both instructions, then the table
64/// emitter reports a conflict and dies.
65///
66/// *** RESOLUTION FOR "Primary decode conflict"S
67///
68/// If two instructions collide, typically the solution is (in order of
69/// likelihood):
70///
71/// (1) to filter out one of the instructions by editing filter()
72/// (X86RecognizableInstr.cpp). This is the most common resolution, but
73/// check the Intel manuals first to make sure that (2) and (3) are not the
74/// problem.
75/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
76/// accurate. Sometimes they are not.
77/// (3) to fix the tables to reflect the actual context (for example, required
78/// prefixes), and possibly to add a new context by editing
79/// lib/Target/X86/X86DisassemblerDecoderCommon.h. This is unlikely to be
80/// the cause.
81///
82/// DisassemblerEmitter.cpp contains the implementation for the emitter,
83/// which simply pulls out instructions from the CodeGenTarget and pushes them
84/// into X86DisassemblerTables.
85/// X86DisassemblerTables.h contains the interface for the instruction tables,
86/// which manage and emit the structures discussed above.
87/// X86DisassemblerTables.cpp contains the implementation for the instruction
88/// tables.
89/// X86ModRMFilters.h contains filters that can be used to determine which
90/// ModR/M values are valid for a particular instruction. These are used to
91/// populate ModRMDecisions.
92/// X86RecognizableInstr.h contains the interface for a single instruction,
93/// which knows how to translate itself from a CodeGenInstruction and provide
94/// the information necessary for integration into the tables.
95/// X86RecognizableInstr.cpp contains the implementation for a single
96/// instruction.
Daniel Dunbar40588742009-11-25 02:13:23 +000097
98void DisassemblerEmitter::run(raw_ostream &OS) {
Chris Lattner67db8832010-12-13 00:23:57 +000099 CodeGenTarget Target(Records);
Daniel Dunbar40588742009-11-25 02:13:23 +0000100
101 OS << "/*===- TableGen'erated file "
102 << "---------------------------------------*- C -*-===*\n"
103 << " *\n"
104 << " * " << Target.getName() << " Disassembler\n"
105 << " *\n"
106 << " * Automatically generated file, do not edit!\n"
107 << " *\n"
108 << " *===---------------------------------------------------------------"
109 << "-------===*/\n";
110
Sean Callanan8ed9f512009-12-19 02:59:52 +0000111 // X86 uses a custom disassembler.
112 if (Target.getName() == "X86") {
113 DisassemblerTables Tables;
114
Chris Lattnerf6502782010-03-19 00:34:35 +0000115 const std::vector<const CodeGenInstruction*> &numberedInstructions =
116 Target.getInstructionsByEnumValue();
Sean Callanan8ed9f512009-12-19 02:59:52 +0000117
118 for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
119 RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
120
121 // FIXME: As long as we are using exceptions, might as well drop this to the
122 // actual conflict site.
123 if (Tables.hasConflicts())
124 throw TGError(Target.getTargetRecord()->getLoc(),
125 "Primary decode conflict");
126
127 Tables.emit(OS);
128 return;
129 }
130
Owen Anderson83e3f672011-08-17 17:44:15 +0000131 // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
132 if (Target.getName() == "ARM" ||
133 Target.getName() == "Thumb") {
134 FixedLenDecoderEmitter(Records,
James Molloya5d58562011-09-07 19:42:28 +0000135 "ARM",
James Molloyc047dca2011-09-01 18:02:14 +0000136 "if (!Check(S, ", ")) return MCDisassembler::Fail;",
137 "S", "MCDisassembler::Fail",
James Molloya5d58562011-09-07 19:42:28 +0000138 " MCDisassembler::DecodeStatus S = MCDisassembler::Success;\n(void)S;").run(OS);
Owen Anderson83e3f672011-08-17 17:44:15 +0000139 return;
140 }
141
James Molloya5d58562011-09-07 19:42:28 +0000142 FixedLenDecoderEmitter(Records, Target.getName()).run(OS);
Daniel Dunbar40588742009-11-25 02:13:23 +0000143}