blob: 7b744fbde65ac8288e4d162a53d26521caaa2eaa [file] [log] [blame]
Chris Lattnera59e8772009-06-21 07:19:10 +00001//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AsmLexer.h"
Chris Lattnerfaf32c12009-06-24 00:33:19 +000015#include "llvm/ADT/StringSet.h"
Chris Lattnera59e8772009-06-21 07:19:10 +000016#include "llvm/Support/SourceMgr.h"
17#include "llvm/Support/MemoryBuffer.h"
Chris Lattnerbcd0b8d2009-06-23 00:24:36 +000018#include "llvm/Config/config.h" // for strtoull.
Chris Lattner4651bca2009-06-21 19:21:25 +000019#include <cerrno>
Duncan Sands66b9f292009-06-22 06:59:32 +000020#include <cstdio>
Chris Lattner4506bd22009-06-21 19:43:50 +000021#include <cstdlib>
Chris Lattnera59e8772009-06-21 07:19:10 +000022using namespace llvm;
23
Chris Lattnerfaf32c12009-06-24 00:33:19 +000024static StringSet<> &getSS(void *TheSS) {
25 return *(StringSet<>*)TheSS;
26}
27
Chris Lattnera59e8772009-06-21 07:19:10 +000028AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
29 CurBuffer = 0;
30 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
31 CurPtr = CurBuf->getBufferStart();
32 TokStart = 0;
Chris Lattnerfaf32c12009-06-24 00:33:19 +000033
34 TheStringSet = new StringSet<>();
35}
36
37AsmLexer::~AsmLexer() {
38 delete &getSS(TheStringSet);
Chris Lattnera59e8772009-06-21 07:19:10 +000039}
40
Chris Lattner4651bca2009-06-21 19:21:25 +000041SMLoc AsmLexer::getLoc() const {
42 return SMLoc::getFromPointer(TokStart);
43}
44
Daniel Dunbar3fb76832009-06-30 00:49:23 +000045void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
46 const char *Type) const {
47 SrcMgr.PrintMessage(Loc, Msg, Type);
Chris Lattnera59e8772009-06-21 07:19:10 +000048}
49
Chris Lattner4651bca2009-06-21 19:21:25 +000050/// ReturnError - Set the error to the specified string at the specified
51/// location. This is defined to always return asmtok::Error.
52asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
Daniel Dunbar3fb76832009-06-30 00:49:23 +000053 SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
Chris Lattner4651bca2009-06-21 19:21:25 +000054 return asmtok::Error;
55}
56
Chris Lattnera59e8772009-06-21 07:19:10 +000057int AsmLexer::getNextChar() {
58 char CurChar = *CurPtr++;
59 switch (CurChar) {
60 default:
61 return (unsigned char)CurChar;
62 case 0: {
63 // A nul character in the stream is either the end of the current buffer or
64 // a random nul in the file. Disambiguate that here.
65 if (CurPtr-1 != CurBuf->getBufferEnd())
66 return 0; // Just whitespace.
67
68 // If this is the end of an included file, pop the parent file off the
69 // include stack.
70 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
71 if (ParentIncludeLoc != SMLoc()) {
72 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
73 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
74 CurPtr = ParentIncludeLoc.getPointer();
75 return getNextChar();
76 }
77
78 // Otherwise, return end of file.
79 --CurPtr; // Another call to lex will return EOF again.
80 return EOF;
81 }
82 }
83}
84
Chris Lattner4651bca2009-06-21 19:21:25 +000085/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
86asmtok::TokKind AsmLexer::LexIdentifier() {
87 while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
88 *CurPtr == '.' || *CurPtr == '@')
89 ++CurPtr;
Chris Lattnerfaf32c12009-06-24 00:33:19 +000090 // Unique string.
91 CurStrVal =
92 getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
Chris Lattner4651bca2009-06-21 19:21:25 +000093 return asmtok::Identifier;
94}
95
96/// LexPercent: Register: %[a-zA-Z0-9]+
97asmtok::TokKind AsmLexer::LexPercent() {
98 if (!isalnum(*CurPtr))
Chris Lattner8dfbe6c2009-06-23 05:57:07 +000099 return asmtok::Percent; // Single %.
100
Chris Lattner4651bca2009-06-21 19:21:25 +0000101 while (isalnum(*CurPtr))
102 ++CurPtr;
Chris Lattnerfaf32c12009-06-24 00:33:19 +0000103
104 // Unique string.
105 CurStrVal =
106 getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
Chris Lattner4651bca2009-06-21 19:21:25 +0000107 return asmtok::Register;
108}
109
110/// LexSlash: Slash: /
111/// C-Style Comment: /* ... */
112asmtok::TokKind AsmLexer::LexSlash() {
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000113 switch (*CurPtr) {
114 case '*': break; // C style comment.
115 case '/': return ++CurPtr, LexLineComment();
116 default: return asmtok::Slash;
117 }
Chris Lattner4651bca2009-06-21 19:21:25 +0000118
119 // C Style comment.
120 ++CurPtr; // skip the star.
121 while (1) {
122 int CurChar = getNextChar();
123 switch (CurChar) {
124 case EOF:
Chris Lattner27aa7d22009-06-21 20:16:42 +0000125 return ReturnError(TokStart, "unterminated comment");
Chris Lattner4651bca2009-06-21 19:21:25 +0000126 case '*':
127 // End of the comment?
128 if (CurPtr[0] != '/') break;
129
130 ++CurPtr; // End the */.
131 return LexToken();
132 }
133 }
134}
135
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000136/// LexLineComment: Comment: #[^\n]*
137/// : //[^\n]*
138asmtok::TokKind AsmLexer::LexLineComment() {
Chris Lattner4651bca2009-06-21 19:21:25 +0000139 int CurChar = getNextChar();
140 while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
141 CurChar = getNextChar();
142
143 if (CurChar == EOF)
144 return asmtok::Eof;
145 return asmtok::EndOfStatement;
146}
147
148
149/// LexDigit: First character is [0-9].
150/// Local Label: [0-9][:]
151/// Forward/Backward Label: [0-9][fb]
152/// Binary integer: 0b[01]+
153/// Octal integer: 0[0-7]+
154/// Hex integer: 0x[0-9a-fA-F]+
155/// Decimal integer: [1-9][0-9]*
156/// TODO: FP literal.
157asmtok::TokKind AsmLexer::LexDigit() {
158 if (*CurPtr == ':')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000159 return ReturnError(TokStart, "FIXME: local label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000160 if (*CurPtr == 'f' || *CurPtr == 'b')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000161 return ReturnError(TokStart, "FIXME: directional label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000162
163 // Decimal integer: [1-9][0-9]*
164 if (CurPtr[-1] != '0') {
165 while (isdigit(*CurPtr))
166 ++CurPtr;
167 CurIntVal = strtoll(TokStart, 0, 10);
168 return asmtok::IntVal;
169 }
170
171 if (*CurPtr == 'b') {
172 ++CurPtr;
173 const char *NumStart = CurPtr;
174 while (CurPtr[0] == '0' || CurPtr[0] == '1')
175 ++CurPtr;
176
177 // Requires at least one binary digit.
178 if (CurPtr == NumStart)
179 return ReturnError(CurPtr-2, "Invalid binary number");
180 CurIntVal = strtoll(NumStart, 0, 2);
181 return asmtok::IntVal;
182 }
183
184 if (*CurPtr == 'x') {
185 ++CurPtr;
186 const char *NumStart = CurPtr;
187 while (isxdigit(CurPtr[0]))
188 ++CurPtr;
189
190 // Requires at least one hex digit.
191 if (CurPtr == NumStart)
192 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
193
194 errno = 0;
195 CurIntVal = strtoll(NumStart, 0, 16);
196 if (errno == EINVAL)
197 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
198 if (errno == ERANGE) {
199 errno = 0;
200 CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
201 if (errno == EINVAL)
202 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
203 if (errno == ERANGE)
204 return ReturnError(CurPtr-2, "Hexadecimal number out of range");
205 }
206 return asmtok::IntVal;
207 }
208
209 // Must be an octal number, it starts with 0.
210 while (*CurPtr >= '0' && *CurPtr <= '7')
211 ++CurPtr;
212 CurIntVal = strtoll(TokStart, 0, 8);
213 return asmtok::IntVal;
214}
215
Chris Lattner10a907d2009-06-21 19:56:35 +0000216/// LexQuote: String: "..."
217asmtok::TokKind AsmLexer::LexQuote() {
218 int CurChar = getNextChar();
219 // TODO: does gas allow multiline string constants?
220 while (CurChar != '"') {
221 if (CurChar == '\\') {
222 // Allow \", etc.
223 CurChar = getNextChar();
224 }
225
Chris Lattner14ee48a2009-06-21 21:22:11 +0000226 if (CurChar == EOF)
227 return ReturnError(TokStart, "unterminated string constant");
Chris Lattner10a907d2009-06-21 19:56:35 +0000228
229 CurChar = getNextChar();
230 }
231
Chris Lattnerfaf32c12009-06-24 00:33:19 +0000232 // Unique string, include quotes for now.
233 CurStrVal =
234 getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
Chris Lattner10a907d2009-06-21 19:56:35 +0000235 return asmtok::String;
236}
237
Chris Lattner4651bca2009-06-21 19:21:25 +0000238
Chris Lattnera59e8772009-06-21 07:19:10 +0000239asmtok::TokKind AsmLexer::LexToken() {
240 TokStart = CurPtr;
241 // This always consumes at least one character.
242 int CurChar = getNextChar();
243
244 switch (CurChar) {
245 default:
Chris Lattner4651bca2009-06-21 19:21:25 +0000246 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
247 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
248 return LexIdentifier();
Chris Lattnera59e8772009-06-21 07:19:10 +0000249
250 // Unknown character, emit an error.
Chris Lattner27aa7d22009-06-21 20:16:42 +0000251 return ReturnError(TokStart, "invalid character in input");
Chris Lattnera59e8772009-06-21 07:19:10 +0000252 case EOF: return asmtok::Eof;
253 case 0:
254 case ' ':
255 case '\t':
Chris Lattnera59e8772009-06-21 07:19:10 +0000256 // Ignore whitespace.
257 return LexToken();
Chris Lattner4651bca2009-06-21 19:21:25 +0000258 case '\n': // FALL THROUGH.
259 case '\r': // FALL THROUGH.
260 case ';': return asmtok::EndOfStatement;
Chris Lattnera59e8772009-06-21 07:19:10 +0000261 case ':': return asmtok::Colon;
262 case '+': return asmtok::Plus;
263 case '-': return asmtok::Minus;
Chris Lattner74ec1a32009-06-22 06:32:03 +0000264 case '~': return asmtok::Tilde;
Chris Lattner4651bca2009-06-21 19:21:25 +0000265 case '(': return asmtok::LParen;
266 case ')': return asmtok::RParen;
267 case '*': return asmtok::Star;
268 case ',': return asmtok::Comma;
269 case '$': return asmtok::Dollar;
Daniel Dunbar475839e2009-06-29 20:37:27 +0000270 case '=':
271 if (*CurPtr == '=')
272 return ++CurPtr, asmtok::EqualEqual;
273 return asmtok::Equal;
274 case '|':
275 if (*CurPtr == '|')
276 return ++CurPtr, asmtok::PipePipe;
277 return asmtok::Pipe;
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000278 case '^': return asmtok::Caret;
Daniel Dunbar475839e2009-06-29 20:37:27 +0000279 case '&':
280 if (*CurPtr == '&')
281 return ++CurPtr, asmtok::AmpAmp;
282 return asmtok::Amp;
283 case '!':
284 if (*CurPtr == '=')
285 return ++CurPtr, asmtok::ExclaimEqual;
286 return asmtok::Exclaim;
Chris Lattner4651bca2009-06-21 19:21:25 +0000287 case '%': return LexPercent();
288 case '/': return LexSlash();
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000289 case '#': return LexLineComment();
Chris Lattner10a907d2009-06-21 19:56:35 +0000290 case '"': return LexQuote();
Chris Lattner4651bca2009-06-21 19:21:25 +0000291 case '0': case '1': case '2': case '3': case '4':
292 case '5': case '6': case '7': case '8': case '9':
293 return LexDigit();
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000294 case '<':
Daniel Dunbar475839e2009-06-29 20:37:27 +0000295 switch (*CurPtr) {
296 case '<': return ++CurPtr, asmtok::LessLess;
297 case '=': return ++CurPtr, asmtok::LessEqual;
298 case '>': return ++CurPtr, asmtok::LessGreater;
299 default: return asmtok::Less;
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000300 }
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000301 case '>':
Daniel Dunbar475839e2009-06-29 20:37:27 +0000302 switch (*CurPtr) {
303 case '>': return ++CurPtr, asmtok::GreaterGreater;
304 case '=': return ++CurPtr, asmtok::GreaterEqual;
305 default: return asmtok::Greater;
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000306 }
Chris Lattner4651bca2009-06-21 19:21:25 +0000307
308 // TODO: Quoted identifiers (objc methods etc)
309 // local labels: [0-9][:]
310 // Forward/backward labels: [0-9][fb]
311 // Integers, fp constants, character constants.
Chris Lattnera59e8772009-06-21 07:19:10 +0000312 }
Duncan Sands66b9f292009-06-22 06:59:32 +0000313}