blob: 234b8f33ad08b3e21708cd59eeff8b36d08eb8d3 [file] [log] [blame]
Chris Lattnera59e8772009-06-21 07:19:10 +00001//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AsmLexer.h"
Sean Callananfd0b0282010-01-21 00:19:58 +000015#include "llvm/Support/SMLoc.h"
Chris Lattnera59e8772009-06-21 07:19:10 +000016#include "llvm/Support/MemoryBuffer.h"
Chris Lattnerbcd0b8d2009-06-23 00:24:36 +000017#include "llvm/Config/config.h" // for strtoull.
Kevin Enderby9823ca92009-09-04 21:45:34 +000018#include "llvm/MC/MCAsmInfo.h"
Chris Lattner4651bca2009-06-21 19:21:25 +000019#include <cerrno>
Duncan Sands66b9f292009-06-22 06:59:32 +000020#include <cstdio>
Chris Lattner4506bd22009-06-21 19:43:50 +000021#include <cstdlib>
Chris Lattnera59e8772009-06-21 07:19:10 +000022using namespace llvm;
23
Sean Callananfd0b0282010-01-21 00:19:58 +000024AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
25 CurBuf = NULL;
26 CurPtr = NULL;
Chris Lattnera59e8772009-06-21 07:19:10 +000027 TokStart = 0;
Chris Lattnerfaf32c12009-06-24 00:33:19 +000028}
29
30AsmLexer::~AsmLexer() {
Chris Lattnera59e8772009-06-21 07:19:10 +000031}
32
Sean Callananfd0b0282010-01-21 00:19:58 +000033void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
34 CurBuf = buf;
35
36 if (ptr)
37 CurPtr = ptr;
38 else
39 CurPtr = CurBuf->getBufferStart();
40
41 TokStart = 0;
42}
43
Chris Lattner4651bca2009-06-21 19:21:25 +000044SMLoc AsmLexer::getLoc() const {
45 return SMLoc::getFromPointer(TokStart);
46}
47
Chris Lattner4651bca2009-06-21 19:21:25 +000048/// ReturnError - Set the error to the specified string at the specified
Daniel Dunbar3f872332009-07-28 16:08:33 +000049/// location. This is defined to always return AsmToken::Error.
Daniel Dunbarcb358b62009-07-28 03:00:54 +000050AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
Sean Callanan79036e42010-01-20 22:18:24 +000051 SetError(SMLoc::getFromPointer(Loc), Msg);
52
Daniel Dunbar3f872332009-07-28 16:08:33 +000053 return AsmToken(AsmToken::Error, StringRef(Loc, 0));
Chris Lattner4651bca2009-06-21 19:21:25 +000054}
55
Chris Lattnera59e8772009-06-21 07:19:10 +000056int AsmLexer::getNextChar() {
57 char CurChar = *CurPtr++;
58 switch (CurChar) {
59 default:
60 return (unsigned char)CurChar;
Sean Callananfd0b0282010-01-21 00:19:58 +000061 case 0:
Chris Lattnera59e8772009-06-21 07:19:10 +000062 // A nul character in the stream is either the end of the current buffer or
63 // a random nul in the file. Disambiguate that here.
64 if (CurPtr-1 != CurBuf->getBufferEnd())
65 return 0; // Just whitespace.
66
Chris Lattnera59e8772009-06-21 07:19:10 +000067 // Otherwise, return end of file.
68 --CurPtr; // Another call to lex will return EOF again.
69 return EOF;
70 }
Chris Lattnera59e8772009-06-21 07:19:10 +000071}
72
Chris Lattner4651bca2009-06-21 19:21:25 +000073/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
Daniel Dunbarcb358b62009-07-28 03:00:54 +000074AsmToken AsmLexer::LexIdentifier() {
Chris Lattner4651bca2009-06-21 19:21:25 +000075 while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
76 *CurPtr == '.' || *CurPtr == '@')
77 ++CurPtr;
Daniel Dunbar3f872332009-07-28 16:08:33 +000078 return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
Chris Lattner4651bca2009-06-21 19:21:25 +000079}
80
Chris Lattner4651bca2009-06-21 19:21:25 +000081/// LexSlash: Slash: /
82/// C-Style Comment: /* ... */
Daniel Dunbarcb358b62009-07-28 03:00:54 +000083AsmToken AsmLexer::LexSlash() {
Daniel Dunbar383a4a82009-06-29 21:58:22 +000084 switch (*CurPtr) {
85 case '*': break; // C style comment.
86 case '/': return ++CurPtr, LexLineComment();
Daniel Dunbar3f872332009-07-28 16:08:33 +000087 default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
Daniel Dunbar383a4a82009-06-29 21:58:22 +000088 }
Chris Lattner4651bca2009-06-21 19:21:25 +000089
90 // C Style comment.
91 ++CurPtr; // skip the star.
92 while (1) {
93 int CurChar = getNextChar();
94 switch (CurChar) {
95 case EOF:
Chris Lattner27aa7d22009-06-21 20:16:42 +000096 return ReturnError(TokStart, "unterminated comment");
Chris Lattner4651bca2009-06-21 19:21:25 +000097 case '*':
98 // End of the comment?
99 if (CurPtr[0] != '/') break;
100
101 ++CurPtr; // End the */.
102 return LexToken();
103 }
104 }
105}
106
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000107/// LexLineComment: Comment: #[^\n]*
108/// : //[^\n]*
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000109AsmToken AsmLexer::LexLineComment() {
110 // FIXME: This is broken if we happen to a comment at the end of a file, which
111 // was .included, and which doesn't end with a newline.
Chris Lattner4651bca2009-06-21 19:21:25 +0000112 int CurChar = getNextChar();
113 while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
114 CurChar = getNextChar();
115
116 if (CurChar == EOF)
Daniel Dunbar3f872332009-07-28 16:08:33 +0000117 return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
118 return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
Chris Lattner4651bca2009-06-21 19:21:25 +0000119}
120
121
122/// LexDigit: First character is [0-9].
123/// Local Label: [0-9][:]
124/// Forward/Backward Label: [0-9][fb]
125/// Binary integer: 0b[01]+
126/// Octal integer: 0[0-7]+
127/// Hex integer: 0x[0-9a-fA-F]+
128/// Decimal integer: [1-9][0-9]*
129/// TODO: FP literal.
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000130AsmToken AsmLexer::LexDigit() {
Chris Lattner4651bca2009-06-21 19:21:25 +0000131 if (*CurPtr == ':')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000132 return ReturnError(TokStart, "FIXME: local label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000133 if (*CurPtr == 'f' || *CurPtr == 'b')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000134 return ReturnError(TokStart, "FIXME: directional label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000135
136 // Decimal integer: [1-9][0-9]*
137 if (CurPtr[-1] != '0') {
138 while (isdigit(*CurPtr))
139 ++CurPtr;
Daniel Dunbar3f872332009-07-28 16:08:33 +0000140 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000141 strtoll(TokStart, 0, 10));
Chris Lattner4651bca2009-06-21 19:21:25 +0000142 }
143
144 if (*CurPtr == 'b') {
145 ++CurPtr;
146 const char *NumStart = CurPtr;
147 while (CurPtr[0] == '0' || CurPtr[0] == '1')
148 ++CurPtr;
149
150 // Requires at least one binary digit.
151 if (CurPtr == NumStart)
152 return ReturnError(CurPtr-2, "Invalid binary number");
Daniel Dunbar3f872332009-07-28 16:08:33 +0000153 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000154 strtoll(NumStart, 0, 2));
Chris Lattner4651bca2009-06-21 19:21:25 +0000155 }
156
157 if (*CurPtr == 'x') {
158 ++CurPtr;
159 const char *NumStart = CurPtr;
160 while (isxdigit(CurPtr[0]))
161 ++CurPtr;
162
163 // Requires at least one hex digit.
164 if (CurPtr == NumStart)
165 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
166
167 errno = 0;
Chris Lattner4651bca2009-06-21 19:21:25 +0000168 if (errno == EINVAL)
169 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
170 if (errno == ERANGE) {
171 errno = 0;
Chris Lattner4651bca2009-06-21 19:21:25 +0000172 if (errno == EINVAL)
173 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
174 if (errno == ERANGE)
175 return ReturnError(CurPtr-2, "Hexadecimal number out of range");
176 }
Daniel Dunbar3f872332009-07-28 16:08:33 +0000177 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000178 (int64_t) strtoull(NumStart, 0, 16));
Chris Lattner4651bca2009-06-21 19:21:25 +0000179 }
180
181 // Must be an octal number, it starts with 0.
182 while (*CurPtr >= '0' && *CurPtr <= '7')
183 ++CurPtr;
Daniel Dunbar3f872332009-07-28 16:08:33 +0000184 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000185 strtoll(TokStart, 0, 8));
Chris Lattner4651bca2009-06-21 19:21:25 +0000186}
187
Chris Lattner10a907d2009-06-21 19:56:35 +0000188/// LexQuote: String: "..."
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000189AsmToken AsmLexer::LexQuote() {
Chris Lattner10a907d2009-06-21 19:56:35 +0000190 int CurChar = getNextChar();
191 // TODO: does gas allow multiline string constants?
192 while (CurChar != '"') {
193 if (CurChar == '\\') {
194 // Allow \", etc.
195 CurChar = getNextChar();
196 }
197
Chris Lattner14ee48a2009-06-21 21:22:11 +0000198 if (CurChar == EOF)
199 return ReturnError(TokStart, "unterminated string constant");
Chris Lattner10a907d2009-06-21 19:56:35 +0000200
201 CurChar = getNextChar();
202 }
203
Daniel Dunbar3f872332009-07-28 16:08:33 +0000204 return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
Chris Lattner10a907d2009-06-21 19:56:35 +0000205}
206
Chris Lattnerff4bc462009-08-10 01:39:42 +0000207StringRef AsmLexer::LexUntilEndOfStatement() {
208 TokStart = CurPtr;
209
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000210 while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
211 *CurPtr != ';' && // End of statement marker.
Chris Lattnerff4bc462009-08-10 01:39:42 +0000212 *CurPtr != '\n' &&
213 *CurPtr != '\r' &&
Kevin Enderby9823ca92009-09-04 21:45:34 +0000214 (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
Chris Lattnerff4bc462009-08-10 01:39:42 +0000215 ++CurPtr;
Kevin Enderby9823ca92009-09-04 21:45:34 +0000216 }
Chris Lattnerff4bc462009-08-10 01:39:42 +0000217 return StringRef(TokStart, CurPtr-TokStart);
218}
Chris Lattner4651bca2009-06-21 19:21:25 +0000219
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000220bool AsmLexer::isAtStartOfComment(char Char) {
Chris Lattnercec54502009-09-27 19:38:39 +0000221 // FIXME: This won't work for multi-character comment indicators like "//".
222 return Char == *MAI.getCommentString();
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000223}
224
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000225AsmToken AsmLexer::LexToken() {
Chris Lattnera59e8772009-06-21 07:19:10 +0000226 TokStart = CurPtr;
227 // This always consumes at least one character.
228 int CurChar = getNextChar();
229
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000230 if (isAtStartOfComment(CurChar))
231 return LexLineComment();
Kevin Enderby9823ca92009-09-04 21:45:34 +0000232
Chris Lattnera59e8772009-06-21 07:19:10 +0000233 switch (CurChar) {
234 default:
Chris Lattner4651bca2009-06-21 19:21:25 +0000235 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
236 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
237 return LexIdentifier();
Chris Lattnera59e8772009-06-21 07:19:10 +0000238
239 // Unknown character, emit an error.
Chris Lattner27aa7d22009-06-21 20:16:42 +0000240 return ReturnError(TokStart, "invalid character in input");
Daniel Dunbar3f872332009-07-28 16:08:33 +0000241 case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
Chris Lattnera59e8772009-06-21 07:19:10 +0000242 case 0:
243 case ' ':
244 case '\t':
Chris Lattnera59e8772009-06-21 07:19:10 +0000245 // Ignore whitespace.
246 return LexToken();
Chris Lattner4651bca2009-06-21 19:21:25 +0000247 case '\n': // FALL THROUGH.
248 case '\r': // FALL THROUGH.
Daniel Dunbar3f872332009-07-28 16:08:33 +0000249 case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
250 case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
251 case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
252 case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
253 case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
254 case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
255 case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
Kevin Enderbyfb0f0de2009-09-04 22:40:31 +0000256 case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
257 case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
258 case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
259 case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000260 case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
261 case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
262 case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000263 case '=':
264 if (*CurPtr == '=')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000265 return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
266 return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000267 case '|':
268 if (*CurPtr == '|')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000269 return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
270 return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
271 case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000272 case '&':
273 if (*CurPtr == '&')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000274 return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
275 return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000276 case '!':
277 if (*CurPtr == '=')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000278 return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
279 return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
Kevin Enderby7b4608d2009-09-03 17:15:07 +0000280 case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
Chris Lattner4651bca2009-06-21 19:21:25 +0000281 case '/': return LexSlash();
Kevin Enderby9823ca92009-09-04 21:45:34 +0000282 case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
Chris Lattner10a907d2009-06-21 19:56:35 +0000283 case '"': return LexQuote();
Chris Lattner4651bca2009-06-21 19:21:25 +0000284 case '0': case '1': case '2': case '3': case '4':
285 case '5': case '6': case '7': case '8': case '9':
286 return LexDigit();
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000287 case '<':
Daniel Dunbar475839e2009-06-29 20:37:27 +0000288 switch (*CurPtr) {
Daniel Dunbar3f872332009-07-28 16:08:33 +0000289 case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000290 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000291 case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000292 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000293 case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000294 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000295 default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000296 }
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000297 case '>':
Daniel Dunbar475839e2009-06-29 20:37:27 +0000298 switch (*CurPtr) {
Daniel Dunbar3f872332009-07-28 16:08:33 +0000299 case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000300 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000301 case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000302 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000303 default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000304 }
Chris Lattner4651bca2009-06-21 19:21:25 +0000305
306 // TODO: Quoted identifiers (objc methods etc)
307 // local labels: [0-9][:]
308 // Forward/backward labels: [0-9][fb]
309 // Integers, fp constants, character constants.
Chris Lattnera59e8772009-06-21 07:19:10 +0000310 }
Duncan Sands66b9f292009-06-22 06:59:32 +0000311}