blob: e493812e887dfae4e69ff8d241463757d9d0e612 [file] [log] [blame]
Chris Lattner27aa7d22009-06-21 20:16:42 +00001//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the parser for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AsmParser.h"
Daniel Dunbarecc63f82009-06-23 22:01:43 +000015#include "llvm/MC/MCContext.h"
Chris Lattner29dfe7c2009-06-23 18:41:30 +000016#include "llvm/MC/MCInst.h"
Daniel Dunbarecc63f82009-06-23 22:01:43 +000017#include "llvm/MC/MCStreamer.h"
Chris Lattnerb0789ed2009-06-21 20:54:55 +000018#include "llvm/Support/SourceMgr.h"
19#include "llvm/Support/raw_ostream.h"
Chris Lattner27aa7d22009-06-21 20:16:42 +000020using namespace llvm;
21
Chris Lattner14ee48a2009-06-21 21:22:11 +000022bool AsmParser::Error(SMLoc L, const char *Msg) {
23 Lexer.PrintMessage(L, Msg);
24 return true;
25}
26
27bool AsmParser::TokError(const char *Msg) {
28 Lexer.PrintMessage(Lexer.getLoc(), Msg);
29 return true;
30}
31
Chris Lattner27aa7d22009-06-21 20:16:42 +000032bool AsmParser::Run() {
Chris Lattnerb0789ed2009-06-21 20:54:55 +000033 // Prime the lexer.
34 Lexer.Lex();
35
36 while (Lexer.isNot(asmtok::Eof))
37 if (ParseStatement())
38 return true;
39
40 return false;
41}
42
Chris Lattner2cf5f142009-06-22 01:29:09 +000043/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
44void AsmParser::EatToEndOfStatement() {
45 while (Lexer.isNot(asmtok::EndOfStatement) &&
46 Lexer.isNot(asmtok::Eof))
47 Lexer.Lex();
48
49 // Eat EOL.
50 if (Lexer.is(asmtok::EndOfStatement))
51 Lexer.Lex();
52}
53
Chris Lattnerc4193832009-06-22 05:51:26 +000054
Chris Lattner74ec1a32009-06-22 06:32:03 +000055/// ParseParenExpr - Parse a paren expression and return it.
56/// NOTE: This assumes the leading '(' has already been consumed.
57///
58/// parenexpr ::= expr)
59///
60bool AsmParser::ParseParenExpr(int64_t &Res) {
61 if (ParseExpression(Res)) return true;
62 if (Lexer.isNot(asmtok::RParen))
63 return TokError("expected ')' in parentheses expression");
64 Lexer.Lex();
65 return false;
66}
Chris Lattnerc4193832009-06-22 05:51:26 +000067
Chris Lattner74ec1a32009-06-22 06:32:03 +000068/// ParsePrimaryExpr - Parse a primary expression and return it.
69/// primaryexpr ::= (parenexpr
70/// primaryexpr ::= symbol
71/// primaryexpr ::= number
72/// primaryexpr ::= ~,+,- primaryexpr
73bool AsmParser::ParsePrimaryExpr(int64_t &Res) {
Chris Lattnerc4193832009-06-22 05:51:26 +000074 switch (Lexer.getKind()) {
75 default:
76 return TokError("unknown token in expression");
77 case asmtok::Identifier:
78 // This is a label, this should be parsed as part of an expression, to
79 // handle things like LFOO+4
80 Res = 0; // FIXME.
81 Lexer.Lex(); // Eat identifier.
82 return false;
83 case asmtok::IntVal:
84 Res = Lexer.getCurIntVal();
85 Lexer.Lex(); // Eat identifier.
86 return false;
Chris Lattner74ec1a32009-06-22 06:32:03 +000087 case asmtok::LParen:
88 Lexer.Lex(); // Eat the '('.
89 return ParseParenExpr(Res);
90 case asmtok::Tilde:
91 case asmtok::Plus:
92 case asmtok::Minus:
93 Lexer.Lex(); // Eat the operator.
94 return ParsePrimaryExpr(Res);
Chris Lattnerc4193832009-06-22 05:51:26 +000095 }
96}
Chris Lattner74ec1a32009-06-22 06:32:03 +000097
98/// ParseExpression - Parse an expression and return it.
99///
100/// expr ::= expr +,- expr -> lowest.
101/// expr ::= expr |,^,&,! expr -> middle.
102/// expr ::= expr *,/,%,<<,>> expr -> highest.
103/// expr ::= primaryexpr
104///
105bool AsmParser::ParseExpression(int64_t &Res) {
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000106 return ParsePrimaryExpr(Res) ||
107 ParseBinOpRHS(1, Res);
Chris Lattner74ec1a32009-06-22 06:32:03 +0000108}
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000109
110static unsigned getBinOpPrecedence(asmtok::TokKind K) {
111 switch (K) {
112 default: return 0; // not a binop.
113 case asmtok::Plus:
114 case asmtok::Minus:
115 return 1;
116 case asmtok::Pipe:
117 case asmtok::Caret:
118 case asmtok::Amp:
119 case asmtok::Exclaim:
120 return 2;
121 case asmtok::Star:
122 case asmtok::Slash:
123 case asmtok::Percent:
124 case asmtok::LessLess:
125 case asmtok::GreaterGreater:
126 return 3;
127 }
128}
129
130
131/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
132/// Res contains the LHS of the expression on input.
133bool AsmParser::ParseBinOpRHS(unsigned Precedence, int64_t &Res) {
134 while (1) {
135 unsigned TokPrec = getBinOpPrecedence(Lexer.getKind());
136
137 // If the next token is lower precedence than we are allowed to eat, return
138 // successfully with what we ate already.
139 if (TokPrec < Precedence)
140 return false;
141
142 //asmtok::TokKind BinOp = Lexer.getKind();
143 Lexer.Lex();
144
145 // Eat the next primary expression.
146 int64_t RHS;
147 if (ParsePrimaryExpr(RHS)) return true;
148
149 // If BinOp binds less tightly with RHS than the operator after RHS, let
150 // the pending operator take RHS as its LHS.
151 unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind());
152 if (TokPrec < NextTokPrec) {
153 if (ParseBinOpRHS(Precedence+1, RHS)) return true;
154 }
155
156 // Merge LHS/RHS: fixme use the right operator etc.
157 Res += RHS;
158 }
159}
160
Chris Lattnerc4193832009-06-22 05:51:26 +0000161
162
163
Chris Lattnerb0789ed2009-06-21 20:54:55 +0000164/// ParseStatement:
165/// ::= EndOfStatement
Chris Lattner2cf5f142009-06-22 01:29:09 +0000166/// ::= Label* Directive ...Operands... EndOfStatement
167/// ::= Label* Identifier OperandList* EndOfStatement
Chris Lattnerb0789ed2009-06-21 20:54:55 +0000168bool AsmParser::ParseStatement() {
169 switch (Lexer.getKind()) {
170 default:
Chris Lattner14ee48a2009-06-21 21:22:11 +0000171 return TokError("unexpected token at start of statement");
Chris Lattnerb0789ed2009-06-21 20:54:55 +0000172 case asmtok::EndOfStatement:
173 Lexer.Lex();
174 return false;
175 case asmtok::Identifier:
176 break;
177 // TODO: Recurse on local labels etc.
178 }
179
180 // If we have an identifier, handle it as the key symbol.
Chris Lattner2cf5f142009-06-22 01:29:09 +0000181 SMLoc IDLoc = Lexer.getLoc();
Chris Lattnerfaf32c12009-06-24 00:33:19 +0000182 const char *IDVal = Lexer.getCurStrVal();
Chris Lattnerb0789ed2009-06-21 20:54:55 +0000183
184 // Consume the identifier, see what is after it.
185 if (Lexer.Lex() == asmtok::Colon) {
186 // identifier ':' -> Label.
187 Lexer.Lex();
Chris Lattnerc69485e2009-06-24 04:31:49 +0000188
189 // Since we saw a label, create a symbol and emit it.
190 // FIXME: If the label starts with L it is an assembler temporary label.
191 // Why does the client of this api need to know this?
192 Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal));
193
Chris Lattnerb0789ed2009-06-21 20:54:55 +0000194 return ParseStatement();
195 }
196
197 // Otherwise, we have a normal instruction or directive.
Chris Lattner2cf5f142009-06-22 01:29:09 +0000198 if (IDVal[0] == '.') {
Chris Lattner529fb542009-06-24 05:13:15 +0000199 // FIXME: This should be driven based on a hash lookup and callback.
Chris Lattner9a023f72009-06-24 04:43:34 +0000200 if (!strcmp(IDVal, ".section"))
Chris Lattner529fb542009-06-24 05:13:15 +0000201 return ParseDirectiveDarwinSection();
202 if (!strcmp(IDVal, ".text"))
203 // FIXME: This changes behavior based on the -static flag to the
204 // assembler.
205 return ParseDirectiveSectionSwitch("__TEXT,__text",
206 "regular,pure_instructions");
207 if (!strcmp(IDVal, ".const"))
208 return ParseDirectiveSectionSwitch("__TEXT,__const");
209 if (!strcmp(IDVal, ".static_const"))
210 return ParseDirectiveSectionSwitch("__TEXT,__static_const");
211 if (!strcmp(IDVal, ".cstring"))
212 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
213 if (!strcmp(IDVal, ".literal4"))
214 return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals");
215 if (!strcmp(IDVal, ".literal8"))
216 return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals");
217 if (!strcmp(IDVal, ".literal16"))
218 return ParseDirectiveSectionSwitch("__TEXT,__literal16",
219 "16byte_literals");
220 if (!strcmp(IDVal, ".constructor"))
221 return ParseDirectiveSectionSwitch("__TEXT,__constructor");
222 if (!strcmp(IDVal, ".destructor"))
223 return ParseDirectiveSectionSwitch("__TEXT,__destructor");
224 if (!strcmp(IDVal, ".fvmlib_init0"))
225 return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0");
226 if (!strcmp(IDVal, ".fvmlib_init1"))
227 return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1");
228 if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC.
229 return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs",
230 "self_modifying_code+pure_instructions,5");
231 // FIXME: .picsymbol_stub on PPC.
232 if (!strcmp(IDVal, ".data"))
233 return ParseDirectiveSectionSwitch("__DATA,__data");
234 if (!strcmp(IDVal, ".static_data"))
235 return ParseDirectiveSectionSwitch("__DATA,__static_data");
236 if (!strcmp(IDVal, ".non_lazy_symbol_pointer"))
237 return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer",
238 "non_lazy_symbol_pointers");
239 if (!strcmp(IDVal, ".lazy_symbol_pointer"))
240 return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer",
241 "lazy_symbol_pointers");
242 if (!strcmp(IDVal, ".dyld"))
243 return ParseDirectiveSectionSwitch("__DATA,__dyld");
244 if (!strcmp(IDVal, ".mod_init_func"))
245 return ParseDirectiveSectionSwitch("__DATA,__mod_init_func",
246 "mod_init_funcs");
247 if (!strcmp(IDVal, ".mod_term_func"))
248 return ParseDirectiveSectionSwitch("__DATA,__mod_term_func",
249 "mod_term_funcs");
250 if (!strcmp(IDVal, ".const_data"))
251 return ParseDirectiveSectionSwitch("__DATA,__const", "regular");
252
253
254 // FIXME: Verify attributes on sections.
255 if (!strcmp(IDVal, ".objc_class"))
256 return ParseDirectiveSectionSwitch("__OBJC,__class");
257 if (!strcmp(IDVal, ".objc_meta_class"))
258 return ParseDirectiveSectionSwitch("__OBJC,__meta_class");
259 if (!strcmp(IDVal, ".objc_cat_cls_meth"))
260 return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth");
261 if (!strcmp(IDVal, ".objc_cat_inst_meth"))
262 return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth");
263 if (!strcmp(IDVal, ".objc_protocol"))
264 return ParseDirectiveSectionSwitch("__OBJC,__protocol");
265 if (!strcmp(IDVal, ".objc_string_object"))
266 return ParseDirectiveSectionSwitch("__OBJC,__string_object");
267 if (!strcmp(IDVal, ".objc_cls_meth"))
268 return ParseDirectiveSectionSwitch("__OBJC,__cls_meth");
269 if (!strcmp(IDVal, ".objc_inst_meth"))
270 return ParseDirectiveSectionSwitch("__OBJC,__inst_meth");
271 if (!strcmp(IDVal, ".objc_cls_refs"))
272 return ParseDirectiveSectionSwitch("__OBJC,__cls_refs");
273 if (!strcmp(IDVal, ".objc_message_refs"))
274 return ParseDirectiveSectionSwitch("__OBJC,__message_refs");
275 if (!strcmp(IDVal, ".objc_symbols"))
276 return ParseDirectiveSectionSwitch("__OBJC,__symbols");
277 if (!strcmp(IDVal, ".objc_category"))
278 return ParseDirectiveSectionSwitch("__OBJC,__category");
279 if (!strcmp(IDVal, ".objc_class_vars"))
280 return ParseDirectiveSectionSwitch("__OBJC,__class_vars");
281 if (!strcmp(IDVal, ".objc_instance_vars"))
282 return ParseDirectiveSectionSwitch("__OBJC,__instance_vars");
283 if (!strcmp(IDVal, ".objc_module_info"))
284 return ParseDirectiveSectionSwitch("__OBJC,__module_info");
285 if (!strcmp(IDVal, ".objc_class_names"))
286 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
287 if (!strcmp(IDVal, ".objc_meth_var_types"))
288 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
289 if (!strcmp(IDVal, ".objc_meth_var_names"))
290 return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
291 if (!strcmp(IDVal, ".objc_selector_strs"))
292 return ParseDirectiveSectionSwitch("__OBJC,__selector_strs");
Chris Lattner9a023f72009-06-24 04:43:34 +0000293
294
Chris Lattner2cf5f142009-06-22 01:29:09 +0000295 Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
296 EatToEndOfStatement();
297 return false;
298 }
Chris Lattnerb0789ed2009-06-21 20:54:55 +0000299
Chris Lattner2cf5f142009-06-22 01:29:09 +0000300
Chris Lattner29dfe7c2009-06-23 18:41:30 +0000301 MCInst Inst;
302 if (ParseX86InstOperands(Inst))
303 return true;
Chris Lattner2cf5f142009-06-22 01:29:09 +0000304
305 if (Lexer.isNot(asmtok::EndOfStatement))
Chris Lattner9a023f72009-06-24 04:43:34 +0000306 return TokError("unexpected token in argument list");
Chris Lattner2cf5f142009-06-22 01:29:09 +0000307
308 // Eat the end of statement marker.
309 Lexer.Lex();
310
311 // Instruction is good, process it.
Chris Lattner29dfe7c2009-06-23 18:41:30 +0000312 outs() << "Found instruction: " << IDVal << " with " << Inst.getNumOperands()
Chris Lattner2cf5f142009-06-22 01:29:09 +0000313 << " operands.\n";
314
315 // Skip to end of line for now.
Chris Lattner27aa7d22009-06-21 20:16:42 +0000316 return false;
317}
Chris Lattner9a023f72009-06-24 04:43:34 +0000318
319/// ParseDirectiveSection:
Chris Lattner529fb542009-06-24 05:13:15 +0000320/// ::= .section identifier (',' identifier)*
321/// FIXME: This should actually parse out the segment, section, attributes and
322/// sizeof_stub fields.
323bool AsmParser::ParseDirectiveDarwinSection() {
Chris Lattner9a023f72009-06-24 04:43:34 +0000324 if (Lexer.isNot(asmtok::Identifier))
325 return TokError("expected identifier after '.section' directive");
326
327 std::string Section = Lexer.getCurStrVal();
328 Lexer.Lex();
329
330 // Accept a comma separated list of modifiers.
331 while (Lexer.is(asmtok::Comma)) {
332 Lexer.Lex();
333
334 if (Lexer.isNot(asmtok::Identifier))
335 return TokError("expected identifier in '.section' directive");
336 Section += ',';
337 Section += Lexer.getCurStrVal();
338 Lexer.Lex();
339 }
340
341 if (Lexer.isNot(asmtok::EndOfStatement))
342 return TokError("unexpected token in '.section' directive");
343 Lexer.Lex();
344
345 Out.SwitchSection(Ctx.GetSection(Section.c_str()));
346 return false;
347}
348
Chris Lattner529fb542009-06-24 05:13:15 +0000349bool AsmParser::ParseDirectiveSectionSwitch(const char *Section,
350 const char *Directives) {
351 if (Lexer.isNot(asmtok::EndOfStatement))
352 return TokError("unexpected token in section switching directive");
353 Lexer.Lex();
354
355 std::string SectionStr = Section;
356 if (Directives && Directives[0]) {
357 SectionStr += ",";
358 SectionStr += Directives;
359 }
360
361 Out.SwitchSection(Ctx.GetSection(Section));
362 return false;
363}