blob: 139d485e72b7bc274ad218ea2d65fe2d84b598cf [file] [log] [blame]
Chris Lattnere6c561b2009-06-23 18:41:30 +00001//===- MC-X86Specific.cpp - X86-Specific code for MC ----------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements X86-specific parsing, encoding and decoding stuff for
11// MC.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AsmParser.h"
Daniel Dunbarbc9c6472009-07-27 23:20:52 +000016#include "llvm/ADT/Twine.h"
Chris Lattnere6c561b2009-06-23 18:41:30 +000017#include "llvm/MC/MCInst.h"
Daniel Dunbarb901d872009-07-02 02:26:39 +000018#include "llvm/Support/SourceMgr.h"
Chris Lattnere6c561b2009-06-23 18:41:30 +000019using namespace llvm;
20
21/// X86Operand - Instances of this class represent one X86 machine instruction.
22struct AsmParser::X86Operand {
23 enum {
24 Register,
25 Immediate,
26 Memory
27 } Kind;
28
29 union {
30 struct {
31 unsigned RegNo;
32 } Reg;
33
34 struct {
Daniel Dunbar4967dbd2009-06-30 23:02:44 +000035 MCValue Val;
Chris Lattnere6c561b2009-06-23 18:41:30 +000036 } Imm;
37
38 struct {
39 unsigned SegReg;
Daniel Dunbar4967dbd2009-06-30 23:02:44 +000040 MCValue Disp;
Chris Lattnere6c561b2009-06-23 18:41:30 +000041 unsigned BaseReg;
Daniel Dunbar3aafe4d2009-07-02 00:51:52 +000042 unsigned IndexReg;
Chris Lattnere6c561b2009-06-23 18:41:30 +000043 unsigned Scale;
Chris Lattnere6c561b2009-06-23 18:41:30 +000044 } Mem;
45 };
46
Daniel Dunbarb336eac2009-07-02 01:58:24 +000047 unsigned getReg() const {
48 assert(Kind == Register && "Invalid access!");
49 return Reg.RegNo;
50 }
51
Chris Lattnere6c561b2009-06-23 18:41:30 +000052 static X86Operand CreateReg(unsigned RegNo) {
53 X86Operand Res;
54 Res.Kind = Register;
55 Res.Reg.RegNo = RegNo;
56 return Res;
57 }
Daniel Dunbar4967dbd2009-06-30 23:02:44 +000058 static X86Operand CreateImm(MCValue Val) {
Chris Lattnere6c561b2009-06-23 18:41:30 +000059 X86Operand Res;
60 Res.Kind = Immediate;
61 Res.Imm.Val = Val;
62 return Res;
63 }
Daniel Dunbar4967dbd2009-06-30 23:02:44 +000064 static X86Operand CreateMem(unsigned SegReg, MCValue Disp, unsigned BaseReg,
Daniel Dunbar3aafe4d2009-07-02 00:51:52 +000065 unsigned IndexReg, unsigned Scale) {
Daniel Dunbarb336eac2009-07-02 01:58:24 +000066 // If there is no index register, we should never have a scale, and we
67 // should always have a scale (in {1,2,4,8}) if we do.
68 assert(((Scale == 0 && !IndexReg) ||
69 (IndexReg && (Scale == 1 || Scale == 2 ||
70 Scale == 4 || Scale == 8))) &&
71 "Invalid scale!");
Chris Lattnere6c561b2009-06-23 18:41:30 +000072 X86Operand Res;
73 Res.Kind = Memory;
74 Res.Mem.SegReg = SegReg;
75 Res.Mem.Disp = Disp;
76 Res.Mem.BaseReg = BaseReg;
Daniel Dunbar3aafe4d2009-07-02 00:51:52 +000077 Res.Mem.IndexReg = IndexReg;
Chris Lattnere6c561b2009-06-23 18:41:30 +000078 Res.Mem.Scale = Scale;
Chris Lattnere6c561b2009-06-23 18:41:30 +000079 return Res;
80 }
Chris Lattnere6c561b2009-06-23 18:41:30 +000081};
82
Daniel Dunbarb336eac2009-07-02 01:58:24 +000083bool AsmParser::ParseX86Register(X86Operand &Op) {
Daniel Dunbar46978972009-07-28 18:17:26 +000084 assert(getLexer().is(AsmToken::Register) && "Invalid token kind!");
Daniel Dunbarb336eac2009-07-02 01:58:24 +000085
86 // FIXME: Decode register number.
87 Op = X86Operand::CreateReg(123);
Daniel Dunbar46978972009-07-28 18:17:26 +000088 getLexer().Lex(); // Eat register token.
Daniel Dunbarb336eac2009-07-02 01:58:24 +000089
90 return false;
91}
92
Chris Lattnere6c561b2009-06-23 18:41:30 +000093bool AsmParser::ParseX86Operand(X86Operand &Op) {
Daniel Dunbar46978972009-07-28 18:17:26 +000094 switch (getLexer().getKind()) {
Chris Lattnere6c561b2009-06-23 18:41:30 +000095 default:
96 return ParseX86MemOperand(Op);
Daniel Dunbarc479a542009-07-28 16:08:33 +000097 case AsmToken::Register:
Chris Lattnere6c561b2009-06-23 18:41:30 +000098 // FIXME: if a segment register, this could either be just the seg reg, or
99 // the start of a memory operand.
Daniel Dunbarb336eac2009-07-02 01:58:24 +0000100 return ParseX86Register(Op);
Daniel Dunbarc479a542009-07-28 16:08:33 +0000101 case AsmToken::Dollar: {
Chris Lattnere6c561b2009-06-23 18:41:30 +0000102 // $42 -> immediate.
Daniel Dunbar46978972009-07-28 18:17:26 +0000103 getLexer().Lex();
Daniel Dunbar4967dbd2009-06-30 23:02:44 +0000104 MCValue Val;
105 if (ParseRelocatableExpression(Val))
106 return true;
107 Op = X86Operand::CreateImm(Val);
Chris Lattnere6c561b2009-06-23 18:41:30 +0000108 return false;
Daniel Dunbar4967dbd2009-06-30 23:02:44 +0000109 }
Daniel Dunbarc479a542009-07-28 16:08:33 +0000110 case AsmToken::Star: {
Daniel Dunbar46978972009-07-28 18:17:26 +0000111 getLexer().Lex(); // Eat the star.
Chris Lattnere6c561b2009-06-23 18:41:30 +0000112
Daniel Dunbar46978972009-07-28 18:17:26 +0000113 if (getLexer().is(AsmToken::Register)) {
Daniel Dunbarb336eac2009-07-02 01:58:24 +0000114 if (ParseX86Register(Op))
115 return true;
Chris Lattnere6c561b2009-06-23 18:41:30 +0000116 } else if (ParseX86MemOperand(Op))
117 return true;
118
Daniel Dunbarb336eac2009-07-02 01:58:24 +0000119 // FIXME: Note the '*' in the operand for use by the matcher.
Chris Lattnere6c561b2009-06-23 18:41:30 +0000120 return false;
121 }
122 }
123}
124
125/// ParseX86MemOperand: segment: disp(basereg, indexreg, scale)
126bool AsmParser::ParseX86MemOperand(X86Operand &Op) {
127 // FIXME: If SegReg ':' (e.g. %gs:), eat and remember.
128 unsigned SegReg = 0;
129
130 // We have to disambiguate a parenthesized expression "(4+5)" from the start
131 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
132 // only way to do this without lookahead is to eat the ( and see what is after
133 // it.
Daniel Dunbar4967dbd2009-06-30 23:02:44 +0000134 MCValue Disp = MCValue::get(0, 0, 0);
Daniel Dunbar46978972009-07-28 18:17:26 +0000135 if (getLexer().isNot(AsmToken::LParen)) {
Daniel Dunbar4967dbd2009-06-30 23:02:44 +0000136 if (ParseRelocatableExpression(Disp)) return true;
Chris Lattnere6c561b2009-06-23 18:41:30 +0000137
138 // After parsing the base expression we could either have a parenthesized
139 // memory address or not. If not, return now. If so, eat the (.
Daniel Dunbar46978972009-07-28 18:17:26 +0000140 if (getLexer().isNot(AsmToken::LParen)) {
Chris Lattnere6c561b2009-06-23 18:41:30 +0000141 Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0);
142 return false;
143 }
144
145 // Eat the '('.
Daniel Dunbar46978972009-07-28 18:17:26 +0000146 getLexer().Lex();
Chris Lattnere6c561b2009-06-23 18:41:30 +0000147 } else {
148 // Okay, we have a '('. We don't know if this is an expression or not, but
149 // so we have to eat the ( to see beyond it.
Daniel Dunbar46978972009-07-28 18:17:26 +0000150 getLexer().Lex(); // Eat the '('.
Chris Lattnere6c561b2009-06-23 18:41:30 +0000151
Daniel Dunbar46978972009-07-28 18:17:26 +0000152 if (getLexer().is(AsmToken::Register) || getLexer().is(AsmToken::Comma)) {
Chris Lattnere6c561b2009-06-23 18:41:30 +0000153 // Nothing to do here, fall into the code below with the '(' part of the
154 // memory operand consumed.
155 } else {
156 // It must be an parenthesized expression, parse it now.
Daniel Dunbar5ebef272009-07-02 02:09:07 +0000157 if (ParseParenRelocatableExpression(Disp))
Chris Lattnere6c561b2009-06-23 18:41:30 +0000158 return true;
159
160 // After parsing the base expression we could either have a parenthesized
161 // memory address or not. If not, return now. If so, eat the (.
Daniel Dunbar46978972009-07-28 18:17:26 +0000162 if (getLexer().isNot(AsmToken::LParen)) {
Chris Lattnere6c561b2009-06-23 18:41:30 +0000163 Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0);
164 return false;
165 }
166
167 // Eat the '('.
Daniel Dunbar46978972009-07-28 18:17:26 +0000168 getLexer().Lex();
Chris Lattnere6c561b2009-06-23 18:41:30 +0000169 }
170 }
171
172 // If we reached here, then we just ate the ( of the memory operand. Process
173 // the rest of the memory operand.
Daniel Dunbar3aafe4d2009-07-02 00:51:52 +0000174 unsigned BaseReg = 0, IndexReg = 0, Scale = 0;
Chris Lattnere6c561b2009-06-23 18:41:30 +0000175
Daniel Dunbar46978972009-07-28 18:17:26 +0000176 if (getLexer().is(AsmToken::Register)) {
Daniel Dunbarb336eac2009-07-02 01:58:24 +0000177 if (ParseX86Register(Op))
178 return true;
179 BaseReg = Op.getReg();
Chris Lattnere6c561b2009-06-23 18:41:30 +0000180 }
181
Daniel Dunbar46978972009-07-28 18:17:26 +0000182 if (getLexer().is(AsmToken::Comma)) {
183 getLexer().Lex(); // Eat the comma.
Daniel Dunbarb901d872009-07-02 02:26:39 +0000184
185 // Following the comma we should have either an index register, or a scale
186 // value. We don't support the later form, but we want to parse it
187 // correctly.
188 //
189 // Not that even though it would be completely consistent to support syntax
190 // like "1(%eax,,1)", the assembler doesn't.
Daniel Dunbar46978972009-07-28 18:17:26 +0000191 if (getLexer().is(AsmToken::Register)) {
Daniel Dunbarb336eac2009-07-02 01:58:24 +0000192 if (ParseX86Register(Op))
193 return true;
194 IndexReg = Op.getReg();
Chris Lattnere6c561b2009-06-23 18:41:30 +0000195 Scale = 1; // If not specified, the scale defaults to 1.
Chris Lattnere6c561b2009-06-23 18:41:30 +0000196
Daniel Dunbar46978972009-07-28 18:17:26 +0000197 if (getLexer().isNot(AsmToken::RParen)) {
Daniel Dunbarb901d872009-07-02 02:26:39 +0000198 // Parse the scale amount:
199 // ::= ',' [scale-expression]
Daniel Dunbar46978972009-07-28 18:17:26 +0000200 if (getLexer().isNot(AsmToken::Comma))
Daniel Dunbarb901d872009-07-02 02:26:39 +0000201 return true;
Daniel Dunbar46978972009-07-28 18:17:26 +0000202 getLexer().Lex(); // Eat the comma.
Chris Lattnere6c561b2009-06-23 18:41:30 +0000203
Daniel Dunbar46978972009-07-28 18:17:26 +0000204 if (getLexer().isNot(AsmToken::RParen)) {
Daniel Dunbarb901d872009-07-02 02:26:39 +0000205 int64_t ScaleVal;
206 if (ParseAbsoluteExpression(ScaleVal))
207 return true;
208
209 // Validate the scale amount.
210 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8)
211 return TokError("scale factor in address must be 1, 2, 4 or 8");
212 Scale = (unsigned)ScaleVal;
213 }
Chris Lattnere6c561b2009-06-23 18:41:30 +0000214 }
Daniel Dunbar46978972009-07-28 18:17:26 +0000215 } else if (getLexer().isNot(AsmToken::RParen)) {
Daniel Dunbarb901d872009-07-02 02:26:39 +0000216 // Otherwise we have the unsupported form of a scale amount without an
217 // index.
Daniel Dunbar46978972009-07-28 18:17:26 +0000218 SMLoc Loc = getLexer().getTok().getLoc();
Daniel Dunbarb901d872009-07-02 02:26:39 +0000219
220 int64_t Value;
221 if (ParseAbsoluteExpression(Value))
222 return true;
223
224 return Error(Loc, "cannot have scale factor without index register");
Chris Lattnere6c561b2009-06-23 18:41:30 +0000225 }
226 }
227
228 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
Daniel Dunbar46978972009-07-28 18:17:26 +0000229 if (getLexer().isNot(AsmToken::RParen))
Chris Lattnere6c561b2009-06-23 18:41:30 +0000230 return TokError("unexpected token in memory operand");
Daniel Dunbar46978972009-07-28 18:17:26 +0000231 getLexer().Lex(); // Eat the ')'.
Chris Lattnere6c561b2009-06-23 18:41:30 +0000232
Daniel Dunbar3aafe4d2009-07-02 00:51:52 +0000233 Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale);
Chris Lattnere6c561b2009-06-23 18:41:30 +0000234 return false;
235}
236
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000237/// MatchX86Inst - Convert a parsed instruction name and operand list into a
238/// concrete instruction.
Daniel Dunbar9a7b61d2009-07-27 21:49:56 +0000239static bool MatchX86Inst(const StringRef &Name,
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000240 llvm::SmallVector<AsmParser::X86Operand, 3> &Operands,
241 MCInst &Inst) {
242 return false;
243}
244
Chris Lattnere6c561b2009-06-23 18:41:30 +0000245/// ParseX86InstOperands - Parse the operands of an X86 instruction and return
246/// them as the operands of an MCInst.
Daniel Dunbar9a7b61d2009-07-27 21:49:56 +0000247bool AsmParser::ParseX86InstOperands(const StringRef &InstName, MCInst &Inst) {
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000248 llvm::SmallVector<X86Operand, 3> Operands;
Chris Lattnere6c561b2009-06-23 18:41:30 +0000249
Daniel Dunbar46978972009-07-28 18:17:26 +0000250 if (getLexer().isNot(AsmToken::EndOfStatement)) {
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000251 // Read the first operand.
252 Operands.push_back(X86Operand());
253 if (ParseX86Operand(Operands.back()))
Chris Lattnere6c561b2009-06-23 18:41:30 +0000254 return true;
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000255
Daniel Dunbar46978972009-07-28 18:17:26 +0000256 while (getLexer().is(AsmToken::Comma)) {
257 getLexer().Lex(); // Eat the comma.
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000258
259 // Parse and remember the operand.
260 Operands.push_back(X86Operand());
261 if (ParseX86Operand(Operands.back()))
262 return true;
263 }
Chris Lattnere6c561b2009-06-23 18:41:30 +0000264 }
Daniel Dunbard1d1e832009-06-30 23:38:38 +0000265
266 return MatchX86Inst(InstName, Operands, Inst);
Chris Lattnere6c561b2009-06-23 18:41:30 +0000267}