blob: e872090c685a5e2075339247442f75e7d145e1f1 [file] [log] [blame]
Chris Lattner22b67fb2009-06-21 07:19:10 +00001//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AsmLexer.h"
Chris Lattner592e3bb2009-06-24 00:33:19 +000015#include "llvm/ADT/StringSet.h"
Chris Lattner22b67fb2009-06-21 07:19:10 +000016#include "llvm/Support/SourceMgr.h"
17#include "llvm/Support/MemoryBuffer.h"
Chris Lattner216ec512009-06-23 00:24:36 +000018#include "llvm/Config/config.h" // for strtoull.
Chris Lattnerc688c232009-06-21 19:21:25 +000019#include <cerrno>
Duncan Sands64194912009-06-22 06:59:32 +000020#include <cstdio>
Chris Lattnerc3e6b202009-06-21 19:43:50 +000021#include <cstdlib>
Chris Lattner22b67fb2009-06-21 07:19:10 +000022using namespace llvm;
23
Chris Lattner592e3bb2009-06-24 00:33:19 +000024static StringSet<> &getSS(void *TheSS) {
25 return *(StringSet<>*)TheSS;
26}
27
Chris Lattner22b67fb2009-06-21 07:19:10 +000028AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
29 CurBuffer = 0;
30 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
31 CurPtr = CurBuf->getBufferStart();
32 TokStart = 0;
Chris Lattner592e3bb2009-06-24 00:33:19 +000033
34 TheStringSet = new StringSet<>();
35}
36
37AsmLexer::~AsmLexer() {
38 delete &getSS(TheStringSet);
Chris Lattner22b67fb2009-06-21 07:19:10 +000039}
40
Chris Lattnerc688c232009-06-21 19:21:25 +000041SMLoc AsmLexer::getLoc() const {
42 return SMLoc::getFromPointer(TokStart);
43}
44
Daniel Dunbarb5aec992009-06-30 00:49:23 +000045void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg,
46 const char *Type) const {
47 SrcMgr.PrintMessage(Loc, Msg, Type);
Chris Lattner22b67fb2009-06-21 07:19:10 +000048}
49
Chris Lattnerc688c232009-06-21 19:21:25 +000050/// ReturnError - Set the error to the specified string at the specified
51/// location. This is defined to always return asmtok::Error.
52asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
Daniel Dunbarb5aec992009-06-30 00:49:23 +000053 SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
Chris Lattnerc688c232009-06-21 19:21:25 +000054 return asmtok::Error;
55}
56
Chris Lattner7aca1522009-07-16 06:14:39 +000057/// EnterIncludeFile - Enter the specified file. This prints an error and
58/// returns true on failure.
59bool AsmLexer::EnterIncludeFile(const std::string &Filename) {
60 int NewBuf = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
61 if (NewBuf == -1)
62 return true;
63
64 // Save the line number and lex buffer of the includer.
65 CurBuffer = NewBuf;
66 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
67 CurPtr = CurBuf->getBufferStart();
68 return false;
69}
70
71
Chris Lattner22b67fb2009-06-21 07:19:10 +000072int AsmLexer::getNextChar() {
73 char CurChar = *CurPtr++;
74 switch (CurChar) {
75 default:
76 return (unsigned char)CurChar;
77 case 0: {
78 // A nul character in the stream is either the end of the current buffer or
79 // a random nul in the file. Disambiguate that here.
80 if (CurPtr-1 != CurBuf->getBufferEnd())
81 return 0; // Just whitespace.
82
83 // If this is the end of an included file, pop the parent file off the
84 // include stack.
85 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
86 if (ParentIncludeLoc != SMLoc()) {
87 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
88 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
89 CurPtr = ParentIncludeLoc.getPointer();
Chris Lattner7aca1522009-07-16 06:14:39 +000090
91 // Reset the token start pointer to the start of the new file.
92 TokStart = CurPtr;
93
Chris Lattner22b67fb2009-06-21 07:19:10 +000094 return getNextChar();
95 }
96
97 // Otherwise, return end of file.
98 --CurPtr; // Another call to lex will return EOF again.
99 return EOF;
100 }
101 }
102}
103
Chris Lattnerc688c232009-06-21 19:21:25 +0000104/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
105asmtok::TokKind AsmLexer::LexIdentifier() {
106 while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
107 *CurPtr == '.' || *CurPtr == '@')
108 ++CurPtr;
Chris Lattner592e3bb2009-06-24 00:33:19 +0000109 // Unique string.
Daniel Dunbar4d10add2009-07-23 18:17:34 +0000110 CurStrVal = getSS(TheStringSet).GetOrCreateValue(StringRef(TokStart,
111 CurPtr - TokStart),
112 0).getKeyData();
Chris Lattnerc688c232009-06-21 19:21:25 +0000113 return asmtok::Identifier;
114}
115
116/// LexPercent: Register: %[a-zA-Z0-9]+
117asmtok::TokKind AsmLexer::LexPercent() {
118 if (!isalnum(*CurPtr))
Chris Lattnerd8817ef2009-06-23 05:57:07 +0000119 return asmtok::Percent; // Single %.
120
Chris Lattnerc688c232009-06-21 19:21:25 +0000121 while (isalnum(*CurPtr))
122 ++CurPtr;
Chris Lattner592e3bb2009-06-24 00:33:19 +0000123
124 // Unique string.
Daniel Dunbar4d10add2009-07-23 18:17:34 +0000125 CurStrVal = getSS(TheStringSet).GetOrCreateValue(StringRef(TokStart,
126 CurPtr - TokStart),
127 0).getKeyData();
Chris Lattnerc688c232009-06-21 19:21:25 +0000128 return asmtok::Register;
129}
130
131/// LexSlash: Slash: /
132/// C-Style Comment: /* ... */
133asmtok::TokKind AsmLexer::LexSlash() {
Daniel Dunbarb6dab772009-06-29 21:58:22 +0000134 switch (*CurPtr) {
135 case '*': break; // C style comment.
136 case '/': return ++CurPtr, LexLineComment();
137 default: return asmtok::Slash;
138 }
Chris Lattnerc688c232009-06-21 19:21:25 +0000139
140 // C Style comment.
141 ++CurPtr; // skip the star.
142 while (1) {
143 int CurChar = getNextChar();
144 switch (CurChar) {
145 case EOF:
Chris Lattner3b4bfbd2009-06-21 20:16:42 +0000146 return ReturnError(TokStart, "unterminated comment");
Chris Lattnerc688c232009-06-21 19:21:25 +0000147 case '*':
148 // End of the comment?
149 if (CurPtr[0] != '/') break;
150
151 ++CurPtr; // End the */.
152 return LexToken();
153 }
154 }
155}
156
Daniel Dunbarb6dab772009-06-29 21:58:22 +0000157/// LexLineComment: Comment: #[^\n]*
158/// : //[^\n]*
159asmtok::TokKind AsmLexer::LexLineComment() {
Chris Lattnerc688c232009-06-21 19:21:25 +0000160 int CurChar = getNextChar();
161 while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
162 CurChar = getNextChar();
163
164 if (CurChar == EOF)
165 return asmtok::Eof;
166 return asmtok::EndOfStatement;
167}
168
169
170/// LexDigit: First character is [0-9].
171/// Local Label: [0-9][:]
172/// Forward/Backward Label: [0-9][fb]
173/// Binary integer: 0b[01]+
174/// Octal integer: 0[0-7]+
175/// Hex integer: 0x[0-9a-fA-F]+
176/// Decimal integer: [1-9][0-9]*
177/// TODO: FP literal.
178asmtok::TokKind AsmLexer::LexDigit() {
179 if (*CurPtr == ':')
Chris Lattner3b4bfbd2009-06-21 20:16:42 +0000180 return ReturnError(TokStart, "FIXME: local label not implemented");
Chris Lattnerc688c232009-06-21 19:21:25 +0000181 if (*CurPtr == 'f' || *CurPtr == 'b')
Chris Lattner3b4bfbd2009-06-21 20:16:42 +0000182 return ReturnError(TokStart, "FIXME: directional label not implemented");
Chris Lattnerc688c232009-06-21 19:21:25 +0000183
184 // Decimal integer: [1-9][0-9]*
185 if (CurPtr[-1] != '0') {
186 while (isdigit(*CurPtr))
187 ++CurPtr;
188 CurIntVal = strtoll(TokStart, 0, 10);
189 return asmtok::IntVal;
190 }
191
192 if (*CurPtr == 'b') {
193 ++CurPtr;
194 const char *NumStart = CurPtr;
195 while (CurPtr[0] == '0' || CurPtr[0] == '1')
196 ++CurPtr;
197
198 // Requires at least one binary digit.
199 if (CurPtr == NumStart)
200 return ReturnError(CurPtr-2, "Invalid binary number");
201 CurIntVal = strtoll(NumStart, 0, 2);
202 return asmtok::IntVal;
203 }
204
205 if (*CurPtr == 'x') {
206 ++CurPtr;
207 const char *NumStart = CurPtr;
208 while (isxdigit(CurPtr[0]))
209 ++CurPtr;
210
211 // Requires at least one hex digit.
212 if (CurPtr == NumStart)
213 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
214
215 errno = 0;
216 CurIntVal = strtoll(NumStart, 0, 16);
217 if (errno == EINVAL)
218 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
219 if (errno == ERANGE) {
220 errno = 0;
221 CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
222 if (errno == EINVAL)
223 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
224 if (errno == ERANGE)
225 return ReturnError(CurPtr-2, "Hexadecimal number out of range");
226 }
227 return asmtok::IntVal;
228 }
229
230 // Must be an octal number, it starts with 0.
231 while (*CurPtr >= '0' && *CurPtr <= '7')
232 ++CurPtr;
233 CurIntVal = strtoll(TokStart, 0, 8);
234 return asmtok::IntVal;
235}
236
Chris Lattnerba605b72009-06-21 19:56:35 +0000237/// LexQuote: String: "..."
238asmtok::TokKind AsmLexer::LexQuote() {
239 int CurChar = getNextChar();
240 // TODO: does gas allow multiline string constants?
241 while (CurChar != '"') {
242 if (CurChar == '\\') {
243 // Allow \", etc.
244 CurChar = getNextChar();
245 }
246
Chris Lattnerf6611852009-06-21 21:22:11 +0000247 if (CurChar == EOF)
248 return ReturnError(TokStart, "unterminated string constant");
Chris Lattnerba605b72009-06-21 19:56:35 +0000249
250 CurChar = getNextChar();
251 }
252
Chris Lattner592e3bb2009-06-24 00:33:19 +0000253 // Unique string, include quotes for now.
Daniel Dunbar4d10add2009-07-23 18:17:34 +0000254 CurStrVal = getSS(TheStringSet).GetOrCreateValue(StringRef(TokStart,
255 CurPtr - TokStart),
256 0).getKeyData();
Chris Lattnerba605b72009-06-21 19:56:35 +0000257 return asmtok::String;
258}
259
Chris Lattnerc688c232009-06-21 19:21:25 +0000260
Chris Lattner22b67fb2009-06-21 07:19:10 +0000261asmtok::TokKind AsmLexer::LexToken() {
262 TokStart = CurPtr;
263 // This always consumes at least one character.
264 int CurChar = getNextChar();
265
266 switch (CurChar) {
267 default:
Chris Lattnerc688c232009-06-21 19:21:25 +0000268 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
269 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
270 return LexIdentifier();
Chris Lattner22b67fb2009-06-21 07:19:10 +0000271
272 // Unknown character, emit an error.
Chris Lattner3b4bfbd2009-06-21 20:16:42 +0000273 return ReturnError(TokStart, "invalid character in input");
Chris Lattner22b67fb2009-06-21 07:19:10 +0000274 case EOF: return asmtok::Eof;
275 case 0:
276 case ' ':
277 case '\t':
Chris Lattner22b67fb2009-06-21 07:19:10 +0000278 // Ignore whitespace.
279 return LexToken();
Chris Lattnerc688c232009-06-21 19:21:25 +0000280 case '\n': // FALL THROUGH.
281 case '\r': // FALL THROUGH.
282 case ';': return asmtok::EndOfStatement;
Chris Lattner22b67fb2009-06-21 07:19:10 +0000283 case ':': return asmtok::Colon;
284 case '+': return asmtok::Plus;
285 case '-': return asmtok::Minus;
Chris Lattnere8164362009-06-22 06:32:03 +0000286 case '~': return asmtok::Tilde;
Chris Lattnerc688c232009-06-21 19:21:25 +0000287 case '(': return asmtok::LParen;
288 case ')': return asmtok::RParen;
289 case '*': return asmtok::Star;
290 case ',': return asmtok::Comma;
291 case '$': return asmtok::Dollar;
Daniel Dunbarc3b8a4b2009-06-29 20:37:27 +0000292 case '=':
293 if (*CurPtr == '=')
294 return ++CurPtr, asmtok::EqualEqual;
295 return asmtok::Equal;
296 case '|':
297 if (*CurPtr == '|')
298 return ++CurPtr, asmtok::PipePipe;
299 return asmtok::Pipe;
Chris Lattnerd8817ef2009-06-23 05:57:07 +0000300 case '^': return asmtok::Caret;
Daniel Dunbarc3b8a4b2009-06-29 20:37:27 +0000301 case '&':
302 if (*CurPtr == '&')
303 return ++CurPtr, asmtok::AmpAmp;
304 return asmtok::Amp;
305 case '!':
306 if (*CurPtr == '=')
307 return ++CurPtr, asmtok::ExclaimEqual;
308 return asmtok::Exclaim;
Chris Lattnerc688c232009-06-21 19:21:25 +0000309 case '%': return LexPercent();
310 case '/': return LexSlash();
Daniel Dunbarb6dab772009-06-29 21:58:22 +0000311 case '#': return LexLineComment();
Chris Lattnerba605b72009-06-21 19:56:35 +0000312 case '"': return LexQuote();
Chris Lattnerc688c232009-06-21 19:21:25 +0000313 case '0': case '1': case '2': case '3': case '4':
314 case '5': case '6': case '7': case '8': case '9':
315 return LexDigit();
Chris Lattnerd8817ef2009-06-23 05:57:07 +0000316 case '<':
Daniel Dunbarc3b8a4b2009-06-29 20:37:27 +0000317 switch (*CurPtr) {
318 case '<': return ++CurPtr, asmtok::LessLess;
319 case '=': return ++CurPtr, asmtok::LessEqual;
320 case '>': return ++CurPtr, asmtok::LessGreater;
321 default: return asmtok::Less;
Chris Lattnerd8817ef2009-06-23 05:57:07 +0000322 }
Chris Lattnerd8817ef2009-06-23 05:57:07 +0000323 case '>':
Daniel Dunbarc3b8a4b2009-06-29 20:37:27 +0000324 switch (*CurPtr) {
325 case '>': return ++CurPtr, asmtok::GreaterGreater;
326 case '=': return ++CurPtr, asmtok::GreaterEqual;
327 default: return asmtok::Greater;
Chris Lattnerd8817ef2009-06-23 05:57:07 +0000328 }
Chris Lattnerc688c232009-06-21 19:21:25 +0000329
330 // TODO: Quoted identifiers (objc methods etc)
331 // local labels: [0-9][:]
332 // Forward/backward labels: [0-9][fb]
333 // Integers, fp constants, character constants.
Chris Lattner22b67fb2009-06-21 07:19:10 +0000334 }
Duncan Sands64194912009-06-22 06:59:32 +0000335}