blob: 36faeb3facf8f5a2158d14989a037be42153f7bd [file] [log] [blame]
Chris Lattnera59e8772009-06-21 07:19:10 +00001//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class implements the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AsmLexer.h"
15#include "llvm/Support/SourceMgr.h"
16#include "llvm/Support/MemoryBuffer.h"
Chris Lattner4651bca2009-06-21 19:21:25 +000017#include <cerrno>
Chris Lattner4506bd22009-06-21 19:43:50 +000018#include <cstdlib>
Chris Lattnera59e8772009-06-21 07:19:10 +000019using namespace llvm;
20
21AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
22 CurBuffer = 0;
23 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
24 CurPtr = CurBuf->getBufferStart();
25 TokStart = 0;
26}
27
Chris Lattner4651bca2009-06-21 19:21:25 +000028SMLoc AsmLexer::getLoc() const {
29 return SMLoc::getFromPointer(TokStart);
30}
31
Chris Lattner14ee48a2009-06-21 21:22:11 +000032void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const {
33 SrcMgr.PrintMessage(Loc, Msg);
Chris Lattnera59e8772009-06-21 07:19:10 +000034}
35
Chris Lattner4651bca2009-06-21 19:21:25 +000036/// ReturnError - Set the error to the specified string at the specified
37/// location. This is defined to always return asmtok::Error.
38asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
Chris Lattner14ee48a2009-06-21 21:22:11 +000039 SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg);
Chris Lattner4651bca2009-06-21 19:21:25 +000040 return asmtok::Error;
41}
42
Chris Lattnera59e8772009-06-21 07:19:10 +000043int AsmLexer::getNextChar() {
44 char CurChar = *CurPtr++;
45 switch (CurChar) {
46 default:
47 return (unsigned char)CurChar;
48 case 0: {
49 // A nul character in the stream is either the end of the current buffer or
50 // a random nul in the file. Disambiguate that here.
51 if (CurPtr-1 != CurBuf->getBufferEnd())
52 return 0; // Just whitespace.
53
54 // If this is the end of an included file, pop the parent file off the
55 // include stack.
56 SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
57 if (ParentIncludeLoc != SMLoc()) {
58 CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
59 CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
60 CurPtr = ParentIncludeLoc.getPointer();
61 return getNextChar();
62 }
63
64 // Otherwise, return end of file.
65 --CurPtr; // Another call to lex will return EOF again.
66 return EOF;
67 }
68 }
69}
70
Chris Lattner4651bca2009-06-21 19:21:25 +000071/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
72asmtok::TokKind AsmLexer::LexIdentifier() {
73 while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
74 *CurPtr == '.' || *CurPtr == '@')
75 ++CurPtr;
Chris Lattner10a907d2009-06-21 19:56:35 +000076 CurStrVal.assign(TokStart, CurPtr); // Include %
Chris Lattner4651bca2009-06-21 19:21:25 +000077 return asmtok::Identifier;
78}
79
80/// LexPercent: Register: %[a-zA-Z0-9]+
81asmtok::TokKind AsmLexer::LexPercent() {
82 if (!isalnum(*CurPtr))
Chris Lattner27aa7d22009-06-21 20:16:42 +000083 return ReturnError(TokStart, "invalid register name");
Chris Lattner4651bca2009-06-21 19:21:25 +000084 while (isalnum(*CurPtr))
85 ++CurPtr;
86 CurStrVal.assign(TokStart, CurPtr); // Skip %
87 return asmtok::Register;
88}
89
90/// LexSlash: Slash: /
91/// C-Style Comment: /* ... */
92asmtok::TokKind AsmLexer::LexSlash() {
93 if (*CurPtr != '*')
94 return asmtok::Slash;
95
96 // C Style comment.
97 ++CurPtr; // skip the star.
98 while (1) {
99 int CurChar = getNextChar();
100 switch (CurChar) {
101 case EOF:
Chris Lattner27aa7d22009-06-21 20:16:42 +0000102 return ReturnError(TokStart, "unterminated comment");
Chris Lattner4651bca2009-06-21 19:21:25 +0000103 case '*':
104 // End of the comment?
105 if (CurPtr[0] != '/') break;
106
107 ++CurPtr; // End the */.
108 return LexToken();
109 }
110 }
111}
112
113/// LexHash: Comment: #[^\n]*
114asmtok::TokKind AsmLexer::LexHash() {
115 int CurChar = getNextChar();
116 while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
117 CurChar = getNextChar();
118
119 if (CurChar == EOF)
120 return asmtok::Eof;
121 return asmtok::EndOfStatement;
122}
123
124
125/// LexDigit: First character is [0-9].
126/// Local Label: [0-9][:]
127/// Forward/Backward Label: [0-9][fb]
128/// Binary integer: 0b[01]+
129/// Octal integer: 0[0-7]+
130/// Hex integer: 0x[0-9a-fA-F]+
131/// Decimal integer: [1-9][0-9]*
132/// TODO: FP literal.
133asmtok::TokKind AsmLexer::LexDigit() {
134 if (*CurPtr == ':')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000135 return ReturnError(TokStart, "FIXME: local label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000136 if (*CurPtr == 'f' || *CurPtr == 'b')
Chris Lattner27aa7d22009-06-21 20:16:42 +0000137 return ReturnError(TokStart, "FIXME: directional label not implemented");
Chris Lattner4651bca2009-06-21 19:21:25 +0000138
139 // Decimal integer: [1-9][0-9]*
140 if (CurPtr[-1] != '0') {
141 while (isdigit(*CurPtr))
142 ++CurPtr;
143 CurIntVal = strtoll(TokStart, 0, 10);
144 return asmtok::IntVal;
145 }
146
147 if (*CurPtr == 'b') {
148 ++CurPtr;
149 const char *NumStart = CurPtr;
150 while (CurPtr[0] == '0' || CurPtr[0] == '1')
151 ++CurPtr;
152
153 // Requires at least one binary digit.
154 if (CurPtr == NumStart)
155 return ReturnError(CurPtr-2, "Invalid binary number");
156 CurIntVal = strtoll(NumStart, 0, 2);
157 return asmtok::IntVal;
158 }
159
160 if (*CurPtr == 'x') {
161 ++CurPtr;
162 const char *NumStart = CurPtr;
163 while (isxdigit(CurPtr[0]))
164 ++CurPtr;
165
166 // Requires at least one hex digit.
167 if (CurPtr == NumStart)
168 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
169
170 errno = 0;
171 CurIntVal = strtoll(NumStart, 0, 16);
172 if (errno == EINVAL)
173 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
174 if (errno == ERANGE) {
175 errno = 0;
176 CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
177 if (errno == EINVAL)
178 return ReturnError(CurPtr-2, "Invalid hexadecimal number");
179 if (errno == ERANGE)
180 return ReturnError(CurPtr-2, "Hexadecimal number out of range");
181 }
182 return asmtok::IntVal;
183 }
184
185 // Must be an octal number, it starts with 0.
186 while (*CurPtr >= '0' && *CurPtr <= '7')
187 ++CurPtr;
188 CurIntVal = strtoll(TokStart, 0, 8);
189 return asmtok::IntVal;
190}
191
Chris Lattner10a907d2009-06-21 19:56:35 +0000192/// LexQuote: String: "..."
193asmtok::TokKind AsmLexer::LexQuote() {
194 int CurChar = getNextChar();
195 // TODO: does gas allow multiline string constants?
196 while (CurChar != '"') {
197 if (CurChar == '\\') {
198 // Allow \", etc.
199 CurChar = getNextChar();
200 }
201
Chris Lattner14ee48a2009-06-21 21:22:11 +0000202 if (CurChar == EOF)
203 return ReturnError(TokStart, "unterminated string constant");
Chris Lattner10a907d2009-06-21 19:56:35 +0000204
205 CurChar = getNextChar();
206 }
207
208 CurStrVal.assign(TokStart, CurPtr); // include quotes.
209 return asmtok::String;
210}
211
Chris Lattner4651bca2009-06-21 19:21:25 +0000212
Chris Lattnera59e8772009-06-21 07:19:10 +0000213asmtok::TokKind AsmLexer::LexToken() {
214 TokStart = CurPtr;
215 // This always consumes at least one character.
216 int CurChar = getNextChar();
217
218 switch (CurChar) {
219 default:
Chris Lattner4651bca2009-06-21 19:21:25 +0000220 // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
221 if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
222 return LexIdentifier();
Chris Lattnera59e8772009-06-21 07:19:10 +0000223
224 // Unknown character, emit an error.
Chris Lattner27aa7d22009-06-21 20:16:42 +0000225 return ReturnError(TokStart, "invalid character in input");
Chris Lattnera59e8772009-06-21 07:19:10 +0000226 case EOF: return asmtok::Eof;
227 case 0:
228 case ' ':
229 case '\t':
Chris Lattnera59e8772009-06-21 07:19:10 +0000230 // Ignore whitespace.
231 return LexToken();
Chris Lattner4651bca2009-06-21 19:21:25 +0000232 case '\n': // FALL THROUGH.
233 case '\r': // FALL THROUGH.
234 case ';': return asmtok::EndOfStatement;
Chris Lattnera59e8772009-06-21 07:19:10 +0000235 case ':': return asmtok::Colon;
236 case '+': return asmtok::Plus;
237 case '-': return asmtok::Minus;
Chris Lattner74ec1a32009-06-22 06:32:03 +0000238 case '~': return asmtok::Tilde;
Chris Lattner4651bca2009-06-21 19:21:25 +0000239 case '(': return asmtok::LParen;
240 case ')': return asmtok::RParen;
241 case '*': return asmtok::Star;
242 case ',': return asmtok::Comma;
243 case '$': return asmtok::Dollar;
244 case '%': return LexPercent();
245 case '/': return LexSlash();
246 case '#': return LexHash();
Chris Lattner10a907d2009-06-21 19:56:35 +0000247 case '"': return LexQuote();
Chris Lattner4651bca2009-06-21 19:21:25 +0000248 case '0': case '1': case '2': case '3': case '4':
249 case '5': case '6': case '7': case '8': case '9':
250 return LexDigit();
251
252 // TODO: Quoted identifiers (objc methods etc)
253 // local labels: [0-9][:]
254 // Forward/backward labels: [0-9][fb]
255 // Integers, fp constants, character constants.
Chris Lattnera59e8772009-06-21 07:19:10 +0000256 }
257}