blob: 19f5247a4567108e278fb4b204532dd4d14cd264 [file] [log] [blame]
Chris Lattnera59e8772009-06-21 07:19:10 +00001//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AsmLexer.h"
15#include "llvm/Support/SourceMgr.h"
16#include "llvm/Support/MemoryBuffer.h"
Chris Lattnerbcd0b8d2009-06-23 00:24:36 +000017#include "llvm/Config/config.h" // for strtoull.
Kevin Enderby9823ca92009-09-04 21:45:34 +000018#include "llvm/MC/MCAsmInfo.h"
Chris Lattner4651bca2009-06-21 19:21:25 +000019#include <cerrno>
Duncan Sands66b9f292009-06-22 06:59:32 +000020#include <cstdio>
Chris Lattner4506bd22009-06-21 19:43:50 +000021#include <cstdlib>
Chris Lattnera59e8772009-06-21 07:19:10 +000022using namespace llvm;
23
Kevin Enderby9823ca92009-09-04 21:45:34 +000024AsmLexer::AsmLexer(SourceMgr &SM, const MCAsmInfo &_MAI) : SrcMgr(SM),
25 MAI(_MAI) {
Chris Lattnera59e8772009-06-21 07:19:10 +000026 CurBuffer = 0;
27 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
28 CurPtr = CurBuf->getBufferStart();
29 TokStart = 0;
Chris Lattnerfaf32c12009-06-24 00:33:19 +000030}
31
32AsmLexer::~AsmLexer() {
Chris Lattnera59e8772009-06-21 07:19:10 +000033}
34
Chris Lattner4651bca2009-06-21 19:21:25 +000035SMLoc AsmLexer::getLoc() const {
36 return SMLoc::getFromPointer(TokStart);
37}
38
Chris Lattner4651bca2009-06-21 19:21:25 +000039/// ReturnError - Set the error to the specified string at the specified
Daniel Dunbar3f872332009-07-28 16:08:33 +000040/// location. This is defined to always return AsmToken::Error.
Daniel Dunbarcb358b62009-07-28 03:00:54 +000041AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
Sean Callanan79036e42010-01-20 22:18:24 +000042 SetError(SMLoc::getFromPointer(Loc), Msg);
43
Daniel Dunbar3f872332009-07-28 16:08:33 +000044 return AsmToken(AsmToken::Error, StringRef(Loc, 0));
Chris Lattner4651bca2009-06-21 19:21:25 +000045}
46
Chris Lattner8e25e2d2009-07-16 06:14:39 +000047/// EnterIncludeFile - Enter the specified file. This prints an error and
48/// returns true on failure.
49bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
50 int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
51 if (NewBuf == -1)
52 return true;
53
54 // Save the line number and lex buffer of the includer.
55 CurBuffer = NewBuf;
56 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
57 CurPtr = CurBuf->getBufferStart();
58 return false;
59}
60
61
Chris Lattnera59e8772009-06-21 07:19:10 +000062int AsmLexer::getNextChar() {
63 char CurChar = *CurPtr++;
64 switch (CurChar) {
65 default:
66 return (unsigned char)CurChar;
67 case 0: {
68 // A nul character in the stream is either the end of the current buffer or
69 // a random nul in the file. Disambiguate that here.
70 if (CurPtr-1 != CurBuf->getBufferEnd())
71 return 0; // Just whitespace.
72
73 // If this is the end of an included file, pop the parent file off the
74 // include stack.
75 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
76 if (ParentIncludeLoc != SMLoc()) {
77 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
78 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
79 CurPtr = ParentIncludeLoc.getPointer();
Chris Lattner8e25e2d2009-07-16 06:14:39 +000080
81 // Reset the token start pointer to the start of the new file.
82 TokStart = CurPtr;
83
Chris Lattnera59e8772009-06-21 07:19:10 +000084 return getNextChar();
85 }
86
87 // Otherwise, return end of file.
88 --CurPtr; // Another call to lex will return EOF again.
89 return EOF;
90 }
91 }
92}
93
Chris Lattner4651bca2009-06-21 19:21:25 +000094/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
Daniel Dunbarcb358b62009-07-28 03:00:54 +000095AsmToken AsmLexer::LexIdentifier() {
Chris Lattner4651bca2009-06-21 19:21:25 +000096 while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
97 *CurPtr == '.' || *CurPtr == '@')
98 ++CurPtr;
Daniel Dunbar3f872332009-07-28 16:08:33 +000099 return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
Chris Lattner4651bca2009-06-21 19:21:25 +0000100}
101
Chris Lattner4651bca2009-06-21 19:21:25 +0000102/// LexSlash: Slash: /
103/// C-Style Comment: /* ... */
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000104AsmToken AsmLexer::LexSlash() {
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000105 switch (*CurPtr) {
106 case '*': break; // C style comment.
107 case '/': return ++CurPtr, LexLineComment();
Daniel Dunbar3f872332009-07-28 16:08:33 +0000108 default: return AsmToken(AsmToken::Slash, StringRef(CurPtr, 1));
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000109 }
Chris Lattner4651bca2009-06-21 19:21:25 +0000110
111 // C Style comment.
112 ++CurPtr; // skip the star.
113 while (1) {
114 int CurChar = getNextChar();
115 switch (CurChar) {
116 case EOF:
Chris Lattner27aa7d22009-06-21 20:16:42 +0000117 return ReturnError(TokStart, "unterminated comment");
Chris Lattner4651bca2009-06-21 19:21:25 +0000118 case '*':
119 // End of the comment?
120 if (CurPtr[0] != '/') break;
121
122 ++CurPtr; // End the */.
123 return LexToken();
124 }
125 }
126}
127
Daniel Dunbar383a4a82009-06-29 21:58:22 +0000128/// LexLineComment: Comment: #[^\n]*
129/// : //[^\n]*
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000130AsmToken AsmLexer::LexLineComment() {
131 // FIXME: This is broken if we happen to a comment at the end of a file, which
132 // was .included, and which doesn't end with a newline.
Chris Lattner4651bca2009-06-21 19:21:25 +0000133 int CurChar = getNextChar();
134 while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
135 CurChar = getNextChar();
136
137 if (CurChar == EOF)
Daniel Dunbar3f872332009-07-28 16:08:33 +0000138 return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
139 return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
Chris Lattner4651bca2009-06-21 19:21:25 +0000140}
141
142
143/// LexDigit: First character is [0-9].
144/// Local Label: [0-9][:]
145/// Forward/Backward Label: [0-9][fb]
146/// Binary integer: 0b[01]+
147/// Octal integer: 0[0-7]+
148/// Hex integer: 0x[0-9a-fA-F]+
149/// Decimal integer: [1-9][0-9]*
150/// TODO: FP literal.
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000151AsmToken AsmLexer::LexDigit() {
Chris Lattner4651bca2009-06-21 19:21:25 +0000152 if (*CurPtr == ':')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000153 return ReturnError(TokStart, "FIXME: local label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000154 if (*CurPtr == 'f' || *CurPtr == 'b')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000155 return ReturnError(TokStart, "FIXME: directional label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000156
157 // Decimal integer: [1-9][0-9]*
158 if (CurPtr[-1] != '0') {
159 while (isdigit(*CurPtr))
160 ++CurPtr;
Daniel Dunbar3f872332009-07-28 16:08:33 +0000161 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000162 strtoll(TokStart, 0, 10));
Chris Lattner4651bca2009-06-21 19:21:25 +0000163 }
164
165 if (*CurPtr == 'b') {
166 ++CurPtr;
167 const char *NumStart = CurPtr;
168 while (CurPtr[0] == '0' || CurPtr[0] == '1')
169 ++CurPtr;
170
171 // Requires at least one binary digit.
172 if (CurPtr == NumStart)
173 return ReturnError(CurPtr-2, "Invalid binary number");
Daniel Dunbar3f872332009-07-28 16:08:33 +0000174 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000175 strtoll(NumStart, 0, 2));
Chris Lattner4651bca2009-06-21 19:21:25 +0000176 }
177
178 if (*CurPtr == 'x') {
179 ++CurPtr;
180 const char *NumStart = CurPtr;
181 while (isxdigit(CurPtr[0]))
182 ++CurPtr;
183
184 // Requires at least one hex digit.
185 if (CurPtr == NumStart)
186 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
187
188 errno = 0;
Chris Lattner4651bca2009-06-21 19:21:25 +0000189 if (errno == EINVAL)
190 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
191 if (errno == ERANGE) {
192 errno = 0;
Chris Lattner4651bca2009-06-21 19:21:25 +0000193 if (errno == EINVAL)
194 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
195 if (errno == ERANGE)
196 return ReturnError(CurPtr-2, "Hexadecimal number out of range");
197 }
Daniel Dunbar3f872332009-07-28 16:08:33 +0000198 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000199 (int64_t) strtoull(NumStart, 0, 16));
Chris Lattner4651bca2009-06-21 19:21:25 +0000200 }
201
202 // Must be an octal number, it starts with 0.
203 while (*CurPtr >= '0' && *CurPtr <= '7')
204 ++CurPtr;
Daniel Dunbar3f872332009-07-28 16:08:33 +0000205 return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000206 strtoll(TokStart, 0, 8));
Chris Lattner4651bca2009-06-21 19:21:25 +0000207}
208
Chris Lattner10a907d2009-06-21 19:56:35 +0000209/// LexQuote: String: "..."
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000210AsmToken AsmLexer::LexQuote() {
Chris Lattner10a907d2009-06-21 19:56:35 +0000211 int CurChar = getNextChar();
212 // TODO: does gas allow multiline string constants?
213 while (CurChar != '"') {
214 if (CurChar == '\\') {
215 // Allow \", etc.
216 CurChar = getNextChar();
217 }
218
Chris Lattner14ee48a2009-06-21 21:22:11 +0000219 if (CurChar == EOF)
220 return ReturnError(TokStart, "unterminated string constant");
Chris Lattner10a907d2009-06-21 19:56:35 +0000221
222 CurChar = getNextChar();
223 }
224
Daniel Dunbar3f872332009-07-28 16:08:33 +0000225 return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
Chris Lattner10a907d2009-06-21 19:56:35 +0000226}
227
Chris Lattnerff4bc462009-08-10 01:39:42 +0000228StringRef AsmLexer::LexUntilEndOfStatement() {
229 TokStart = CurPtr;
230
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000231 while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
232 *CurPtr != ';' && // End of statement marker.
Chris Lattnerff4bc462009-08-10 01:39:42 +0000233 *CurPtr != '\n' &&
234 *CurPtr != '\r' &&
Kevin Enderby9823ca92009-09-04 21:45:34 +0000235 (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
Chris Lattnerff4bc462009-08-10 01:39:42 +0000236 ++CurPtr;
Kevin Enderby9823ca92009-09-04 21:45:34 +0000237 }
Chris Lattnerff4bc462009-08-10 01:39:42 +0000238 return StringRef(TokStart, CurPtr-TokStart);
239}
Chris Lattner4651bca2009-06-21 19:21:25 +0000240
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000241bool AsmLexer::isAtStartOfComment(char Char) {
Chris Lattnercec54502009-09-27 19:38:39 +0000242 // FIXME: This won't work for multi-character comment indicators like "//".
243 return Char == *MAI.getCommentString();
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000244}
245
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000246AsmToken AsmLexer::LexToken() {
Chris Lattnera59e8772009-06-21 07:19:10 +0000247 TokStart = CurPtr;
248 // This always consumes at least one character.
249 int CurChar = getNextChar();
250
Kevin Enderbyb5db8302009-09-16 18:08:00 +0000251 if (isAtStartOfComment(CurChar))
252 return LexLineComment();
Kevin Enderby9823ca92009-09-04 21:45:34 +0000253
Chris Lattnera59e8772009-06-21 07:19:10 +0000254 switch (CurChar) {
255 default:
Chris Lattner4651bca2009-06-21 19:21:25 +0000256 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
257 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
258 return LexIdentifier();
Chris Lattnera59e8772009-06-21 07:19:10 +0000259
260 // Unknown character, emit an error.
Chris Lattner27aa7d22009-06-21 20:16:42 +0000261 return ReturnError(TokStart, "invalid character in input");
Daniel Dunbar3f872332009-07-28 16:08:33 +0000262 case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
Chris Lattnera59e8772009-06-21 07:19:10 +0000263 case 0:
264 case ' ':
265 case '\t':
Chris Lattnera59e8772009-06-21 07:19:10 +0000266 // Ignore whitespace.
267 return LexToken();
Chris Lattner4651bca2009-06-21 19:21:25 +0000268 case '\n': // FALL THROUGH.
269 case '\r': // FALL THROUGH.
Daniel Dunbar3f872332009-07-28 16:08:33 +0000270 case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
271 case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
272 case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
273 case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
274 case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
275 case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
276 case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
Kevin Enderbyfb0f0de2009-09-04 22:40:31 +0000277 case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
278 case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
279 case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
280 case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000281 case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
282 case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
283 case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000284 case '=':
285 if (*CurPtr == '=')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000286 return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
287 return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000288 case '|':
289 if (*CurPtr == '|')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000290 return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
291 return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
292 case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000293 case '&':
294 if (*CurPtr == '&')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000295 return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
296 return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
Daniel Dunbar475839e2009-06-29 20:37:27 +0000297 case '!':
298 if (*CurPtr == '=')
Daniel Dunbar3f872332009-07-28 16:08:33 +0000299 return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
300 return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
Kevin Enderby7b4608d2009-09-03 17:15:07 +0000301 case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
Chris Lattner4651bca2009-06-21 19:21:25 +0000302 case '/': return LexSlash();
Kevin Enderby9823ca92009-09-04 21:45:34 +0000303 case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
Chris Lattner10a907d2009-06-21 19:56:35 +0000304 case '"': return LexQuote();
Chris Lattner4651bca2009-06-21 19:21:25 +0000305 case '0': case '1': case '2': case '3': case '4':
306 case '5': case '6': case '7': case '8': case '9':
307 return LexDigit();
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000308 case '<':
Daniel Dunbar475839e2009-06-29 20:37:27 +0000309 switch (*CurPtr) {
Daniel Dunbar3f872332009-07-28 16:08:33 +0000310 case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000311 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000312 case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000313 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000314 case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000315 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000316 default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000317 }
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000318 case '>':
Daniel Dunbar475839e2009-06-29 20:37:27 +0000319 switch (*CurPtr) {
Daniel Dunbar3f872332009-07-28 16:08:33 +0000320 case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000321 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000322 case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000323 StringRef(TokStart, 2));
Daniel Dunbar3f872332009-07-28 16:08:33 +0000324 default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
Chris Lattner8dfbe6c2009-06-23 05:57:07 +0000325 }
Chris Lattner4651bca2009-06-21 19:21:25 +0000326
327 // TODO: Quoted identifiers (objc methods etc)
328 // local labels: [0-9][:]
329 // Forward/backward labels: [0-9][fb]
330 // Integers, fp constants, character constants.
Chris Lattnera59e8772009-06-21 07:19:10 +0000331 }
Duncan Sands66b9f292009-06-22 06:59:32 +0000332}