Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 1 | //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This class implements the lexer for assembly files. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Chris Lattner | be343b3 | 2010-01-22 01:58:08 +0000 | [diff] [blame] | 14 | #include "llvm/MC/MCParser/AsmLexer.h" |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 15 | #include "llvm/Support/SMLoc.h" |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 16 | #include "llvm/Support/MemoryBuffer.h" |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 17 | #include "llvm/MC/MCAsmInfo.h" |
Nick Lewycky | 476b242 | 2010-12-19 20:43:38 +0000 | [diff] [blame^] | 18 | #include <cctype> |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 19 | #include <cerrno> |
Duncan Sands | 66b9f29 | 2009-06-22 06:59:32 +0000 | [diff] [blame] | 20 | #include <cstdio> |
Chris Lattner | 4506bd2 | 2009-06-21 19:43:50 +0000 | [diff] [blame] | 21 | #include <cstdlib> |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 22 | using namespace llvm; |
| 23 | |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 24 | AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { |
| 25 | CurBuf = NULL; |
| 26 | CurPtr = NULL; |
Chris Lattner | faf32c1 | 2009-06-24 00:33:19 +0000 | [diff] [blame] | 27 | } |
| 28 | |
| 29 | AsmLexer::~AsmLexer() { |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 30 | } |
| 31 | |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 32 | void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { |
| 33 | CurBuf = buf; |
| 34 | |
| 35 | if (ptr) |
| 36 | CurPtr = ptr; |
| 37 | else |
| 38 | CurPtr = CurBuf->getBufferStart(); |
| 39 | |
| 40 | TokStart = 0; |
| 41 | } |
| 42 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 43 | /// ReturnError - Set the error to the specified string at the specified |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 44 | /// location. This is defined to always return AsmToken::Error. |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 45 | AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { |
Sean Callanan | 79036e4 | 2010-01-20 22:18:24 +0000 | [diff] [blame] | 46 | SetError(SMLoc::getFromPointer(Loc), Msg); |
| 47 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 48 | return AsmToken(AsmToken::Error, StringRef(Loc, 0)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 49 | } |
| 50 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 51 | int AsmLexer::getNextChar() { |
| 52 | char CurChar = *CurPtr++; |
| 53 | switch (CurChar) { |
| 54 | default: |
| 55 | return (unsigned char)CurChar; |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 56 | case 0: |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 57 | // A nul character in the stream is either the end of the current buffer or |
| 58 | // a random nul in the file. Disambiguate that here. |
| 59 | if (CurPtr-1 != CurBuf->getBufferEnd()) |
| 60 | return 0; // Just whitespace. |
| 61 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 62 | // Otherwise, return end of file. |
| 63 | --CurPtr; // Another call to lex will return EOF again. |
| 64 | return EOF; |
| 65 | } |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 66 | } |
| 67 | |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 68 | /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? |
| 69 | /// |
| 70 | /// The leading integral digit sequence and dot should have already been |
| 71 | /// consumed, some or all of the fractional digit sequence *can* have been |
| 72 | /// consumed. |
| 73 | AsmToken AsmLexer::LexFloatLiteral() { |
| 74 | // Skip the fractional digit sequence. |
| 75 | while (isdigit(*CurPtr)) |
| 76 | ++CurPtr; |
| 77 | |
| 78 | // Check for exponent; we intentionally accept a slighlty wider set of |
| 79 | // literals here and rely on the upstream client to reject invalid ones (e.g., |
| 80 | // "1e+"). |
| 81 | if (*CurPtr == 'e' || *CurPtr == 'E') { |
| 82 | ++CurPtr; |
| 83 | if (*CurPtr == '-' || *CurPtr == '+') |
| 84 | ++CurPtr; |
| 85 | while (isdigit(*CurPtr)) |
| 86 | ++CurPtr; |
| 87 | } |
| 88 | |
| 89 | return AsmToken(AsmToken::Real, |
| 90 | StringRef(TokStart, CurPtr - TokStart)); |
| 91 | } |
| 92 | |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 93 | /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 94 | static bool IsIdentifierChar(char c) { |
| 95 | return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; |
| 96 | } |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 97 | AsmToken AsmLexer::LexIdentifier() { |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 98 | // Check for floating point literals. |
| 99 | if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 100 | // Disambiguate a .1243foo identifier from a floating literal. |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 101 | while (isdigit(*CurPtr)) |
| 102 | ++CurPtr; |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 103 | if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) |
| 104 | return LexFloatLiteral(); |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 105 | } |
| 106 | |
| 107 | while (IsIdentifierChar(*CurPtr)) |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 108 | ++CurPtr; |
Chris Lattner | d305035 | 2010-04-14 04:40:28 +0000 | [diff] [blame] | 109 | |
| 110 | // Handle . as a special case. |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 111 | if (CurPtr == TokStart+1 && TokStart[0] == '.') |
| 112 | return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); |
| 113 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 114 | return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 115 | } |
| 116 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 117 | /// LexSlash: Slash: / |
| 118 | /// C-Style Comment: /* ... */ |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 119 | AsmToken AsmLexer::LexSlash() { |
Daniel Dunbar | 383a4a8 | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 120 | switch (*CurPtr) { |
| 121 | case '*': break; // C style comment. |
| 122 | case '/': return ++CurPtr, LexLineComment(); |
Daniel Dunbar | bdf90d6 | 2010-10-25 20:18:53 +0000 | [diff] [blame] | 123 | default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); |
Daniel Dunbar | 383a4a8 | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 124 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 125 | |
| 126 | // C Style comment. |
| 127 | ++CurPtr; // skip the star. |
| 128 | while (1) { |
| 129 | int CurChar = getNextChar(); |
| 130 | switch (CurChar) { |
| 131 | case EOF: |
Chris Lattner | 27aa7d2 | 2009-06-21 20:16:42 +0000 | [diff] [blame] | 132 | return ReturnError(TokStart, "unterminated comment"); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 133 | case '*': |
| 134 | // End of the comment? |
| 135 | if (CurPtr[0] != '/') break; |
| 136 | |
| 137 | ++CurPtr; // End the */. |
| 138 | return LexToken(); |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |
Daniel Dunbar | 383a4a8 | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 143 | /// LexLineComment: Comment: #[^\n]* |
| 144 | /// : //[^\n]* |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 145 | AsmToken AsmLexer::LexLineComment() { |
| 146 | // FIXME: This is broken if we happen to a comment at the end of a file, which |
| 147 | // was .included, and which doesn't end with a newline. |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 148 | int CurChar = getNextChar(); |
| 149 | while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) |
| 150 | CurChar = getNextChar(); |
| 151 | |
| 152 | if (CurChar == EOF) |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 153 | return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); |
| 154 | return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 155 | } |
| 156 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 157 | static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { |
| 158 | if (CurPtr[0] == 'L' && CurPtr[1] == 'L') |
| 159 | CurPtr += 2; |
| 160 | if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L') |
| 161 | CurPtr += 3; |
| 162 | } |
| 163 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 164 | /// LexDigit: First character is [0-9]. |
| 165 | /// Local Label: [0-9][:] |
| 166 | /// Forward/Backward Label: [0-9][fb] |
| 167 | /// Binary integer: 0b[01]+ |
| 168 | /// Octal integer: 0[0-7]+ |
| 169 | /// Hex integer: 0x[0-9a-fA-F]+ |
| 170 | /// Decimal integer: [1-9][0-9]* |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 171 | AsmToken AsmLexer::LexDigit() { |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 172 | // Decimal integer: [1-9][0-9]* |
Daniel Dunbar | facb34b | 2010-09-24 17:10:26 +0000 | [diff] [blame] | 173 | if (CurPtr[-1] != '0' || CurPtr[0] == '.') { |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 174 | while (isdigit(*CurPtr)) |
| 175 | ++CurPtr; |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 176 | |
| 177 | // Check for floating point literals. |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 178 | if (*CurPtr == '.' || *CurPtr == 'e') { |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 179 | ++CurPtr; |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 180 | return LexFloatLiteral(); |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 181 | } |
| 182 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 183 | StringRef Result(TokStart, CurPtr - TokStart); |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 184 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 185 | long long Value; |
Chris Lattner | 3a151be | 2010-03-13 19:25:13 +0000 | [diff] [blame] | 186 | if (Result.getAsInteger(10, Value)) { |
| 187 | // We have to handle minint_as_a_positive_value specially, because |
| 188 | // - minint_as_a_positive_value = minint and it is valid. |
| 189 | if (Result == "9223372036854775808") |
| 190 | Value = -9223372036854775808ULL; |
| 191 | else |
| 192 | return ReturnError(TokStart, "Invalid decimal number"); |
| 193 | } |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 194 | |
| 195 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 196 | // suffixes on integer literals. |
| 197 | SkipIgnoredIntegerSuffix(CurPtr); |
| 198 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 199 | return AsmToken(AsmToken::Integer, Result, Value); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 200 | } |
| 201 | |
| 202 | if (*CurPtr == 'b') { |
| 203 | ++CurPtr; |
Kevin Enderby | ebe7fcd | 2010-05-17 23:08:19 +0000 | [diff] [blame] | 204 | // See if we actually have "0b" as part of something like "jmp 0b\n" |
Kevin Enderby | 9f2ad11 | 2010-05-18 18:09:20 +0000 | [diff] [blame] | 205 | if (!isdigit(CurPtr[0])) { |
Kevin Enderby | ebe7fcd | 2010-05-17 23:08:19 +0000 | [diff] [blame] | 206 | --CurPtr; |
| 207 | StringRef Result(TokStart, CurPtr - TokStart); |
Kevin Enderby | ebe7fcd | 2010-05-17 23:08:19 +0000 | [diff] [blame] | 208 | return AsmToken(AsmToken::Integer, Result, 0); |
| 209 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 210 | const char *NumStart = CurPtr; |
| 211 | while (CurPtr[0] == '0' || CurPtr[0] == '1') |
| 212 | ++CurPtr; |
| 213 | |
| 214 | // Requires at least one binary digit. |
| 215 | if (CurPtr == NumStart) |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 216 | return ReturnError(TokStart, "Invalid binary number"); |
| 217 | |
| 218 | StringRef Result(TokStart, CurPtr - TokStart); |
| 219 | |
| 220 | long long Value; |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 221 | if (Result.substr(2).getAsInteger(2, Value)) |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 222 | return ReturnError(TokStart, "Invalid binary number"); |
| 223 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 224 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 225 | // suffixes on integer literals. |
| 226 | SkipIgnoredIntegerSuffix(CurPtr); |
| 227 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 228 | return AsmToken(AsmToken::Integer, Result, Value); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 229 | } |
| 230 | |
| 231 | if (*CurPtr == 'x') { |
| 232 | ++CurPtr; |
| 233 | const char *NumStart = CurPtr; |
| 234 | while (isxdigit(CurPtr[0])) |
| 235 | ++CurPtr; |
| 236 | |
| 237 | // Requires at least one hex digit. |
| 238 | if (CurPtr == NumStart) |
| 239 | return ReturnError(CurPtr-2, "Invalid hexadecimal number"); |
Chris Lattner | 03949c9 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 240 | |
| 241 | unsigned long long Result; |
| 242 | if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 243 | return ReturnError(TokStart, "Invalid hexadecimal number"); |
Chris Lattner | 03949c9 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 244 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 245 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 246 | // suffixes on integer literals. |
| 247 | SkipIgnoredIntegerSuffix(CurPtr); |
| 248 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 249 | return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), |
Chris Lattner | 03949c9 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 250 | (int64_t)Result); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 251 | } |
| 252 | |
| 253 | // Must be an octal number, it starts with 0. |
| 254 | while (*CurPtr >= '0' && *CurPtr <= '7') |
| 255 | ++CurPtr; |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 256 | |
| 257 | StringRef Result(TokStart, CurPtr - TokStart); |
| 258 | long long Value; |
| 259 | if (Result.getAsInteger(8, Value)) |
| 260 | return ReturnError(TokStart, "Invalid octal number"); |
| 261 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 262 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 263 | // suffixes on integer literals. |
| 264 | SkipIgnoredIntegerSuffix(CurPtr); |
| 265 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 266 | return AsmToken(AsmToken::Integer, Result, Value); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 267 | } |
| 268 | |
Roman Divacky | 7529b16 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 269 | /// LexSingleQuote: Integer: 'b' |
| 270 | AsmToken AsmLexer::LexSingleQuote() { |
| 271 | int CurChar = getNextChar(); |
| 272 | |
| 273 | if (CurChar == '\\') |
| 274 | CurChar = getNextChar(); |
| 275 | |
| 276 | if (CurChar == EOF) |
| 277 | return ReturnError(TokStart, "unterminated single quote"); |
| 278 | |
| 279 | CurChar = getNextChar(); |
| 280 | |
| 281 | if (CurChar != '\'') |
| 282 | return ReturnError(TokStart, "single quote way too long"); |
| 283 | |
| 284 | // The idea here being that 'c' is basically just an integral |
| 285 | // constant. |
| 286 | StringRef Res = StringRef(TokStart,CurPtr - TokStart); |
| 287 | long long Value; |
| 288 | |
| 289 | if (Res.startswith("\'\\")) { |
| 290 | char theChar = Res[2]; |
| 291 | switch (theChar) { |
| 292 | default: Value = theChar; break; |
| 293 | case '\'': Value = '\''; break; |
| 294 | case 't': Value = '\t'; break; |
| 295 | case 'n': Value = '\n'; break; |
| 296 | case 'b': Value = '\b'; break; |
| 297 | } |
| 298 | } else |
| 299 | Value = TokStart[1]; |
| 300 | |
| 301 | return AsmToken(AsmToken::Integer, Res, Value); |
| 302 | } |
| 303 | |
| 304 | |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 305 | /// LexQuote: String: "..." |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 306 | AsmToken AsmLexer::LexQuote() { |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 307 | int CurChar = getNextChar(); |
| 308 | // TODO: does gas allow multiline string constants? |
| 309 | while (CurChar != '"') { |
| 310 | if (CurChar == '\\') { |
| 311 | // Allow \", etc. |
| 312 | CurChar = getNextChar(); |
| 313 | } |
| 314 | |
Chris Lattner | 14ee48a | 2009-06-21 21:22:11 +0000 | [diff] [blame] | 315 | if (CurChar == EOF) |
| 316 | return ReturnError(TokStart, "unterminated string constant"); |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 317 | |
| 318 | CurChar = getNextChar(); |
| 319 | } |
| 320 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 321 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 322 | } |
| 323 | |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 324 | StringRef AsmLexer::LexUntilEndOfStatement() { |
| 325 | TokStart = CurPtr; |
| 326 | |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 327 | while (!isAtStartOfComment(*CurPtr) && // Start of line comment. |
Duncan Sands | 3472766 | 2010-07-12 08:16:59 +0000 | [diff] [blame] | 328 | *CurPtr != ';' && // End of statement marker. |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 329 | *CurPtr != '\n' && |
| 330 | *CurPtr != '\r' && |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 331 | (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 332 | ++CurPtr; |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 333 | } |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 334 | return StringRef(TokStart, CurPtr-TokStart); |
| 335 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 336 | |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 337 | bool AsmLexer::isAtStartOfComment(char Char) { |
Chris Lattner | cec5450 | 2009-09-27 19:38:39 +0000 | [diff] [blame] | 338 | // FIXME: This won't work for multi-character comment indicators like "//". |
| 339 | return Char == *MAI.getCommentString(); |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 340 | } |
| 341 | |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 342 | AsmToken AsmLexer::LexToken() { |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 343 | TokStart = CurPtr; |
| 344 | // This always consumes at least one character. |
| 345 | int CurChar = getNextChar(); |
| 346 | |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 347 | if (isAtStartOfComment(CurChar)) |
| 348 | return LexLineComment(); |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 349 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 350 | switch (CurChar) { |
| 351 | default: |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 352 | // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* |
| 353 | if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 354 | return LexIdentifier(); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 355 | |
| 356 | // Unknown character, emit an error. |
Chris Lattner | 27aa7d2 | 2009-06-21 20:16:42 +0000 | [diff] [blame] | 357 | return ReturnError(TokStart, "invalid character in input"); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 358 | case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 359 | case 0: |
| 360 | case ' ': |
| 361 | case '\t': |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 362 | // Ignore whitespace. |
| 363 | return LexToken(); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 364 | case '\n': // FALL THROUGH. |
| 365 | case '\r': // FALL THROUGH. |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 366 | case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
| 367 | case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); |
| 368 | case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); |
| 369 | case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); |
| 370 | case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); |
| 371 | case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); |
| 372 | case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); |
Kevin Enderby | fb0f0de | 2009-09-04 22:40:31 +0000 | [diff] [blame] | 373 | case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); |
| 374 | case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); |
| 375 | case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); |
| 376 | case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 377 | case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); |
| 378 | case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 379 | case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); |
Matt Fleming | 924c5e5 | 2010-05-21 11:36:59 +0000 | [diff] [blame] | 380 | case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 381 | case '=': |
| 382 | if (*CurPtr == '=') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 383 | return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); |
| 384 | return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 385 | case '|': |
| 386 | if (*CurPtr == '|') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 387 | return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); |
| 388 | return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); |
| 389 | case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 390 | case '&': |
| 391 | if (*CurPtr == '&') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 392 | return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); |
| 393 | return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 394 | case '!': |
| 395 | if (*CurPtr == '=') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 396 | return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); |
| 397 | return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); |
Kevin Enderby | 7b4608d | 2009-09-03 17:15:07 +0000 | [diff] [blame] | 398 | case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 399 | case '/': return LexSlash(); |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 400 | case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
Roman Divacky | 7529b16 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 401 | case '\'': return LexSingleQuote(); |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 402 | case '"': return LexQuote(); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 403 | case '0': case '1': case '2': case '3': case '4': |
| 404 | case '5': case '6': case '7': case '8': case '9': |
| 405 | return LexDigit(); |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 406 | case '<': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 407 | switch (*CurPtr) { |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 408 | case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 409 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 410 | case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 411 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 412 | case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 413 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 414 | default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 415 | } |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 416 | case '>': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 417 | switch (*CurPtr) { |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 418 | case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 419 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 420 | case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 421 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 422 | default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 423 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 424 | |
| 425 | // TODO: Quoted identifiers (objc methods etc) |
| 426 | // local labels: [0-9][:] |
| 427 | // Forward/backward labels: [0-9][fb] |
| 428 | // Integers, fp constants, character constants. |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 429 | } |
Duncan Sands | 66b9f29 | 2009-06-22 06:59:32 +0000 | [diff] [blame] | 430 | } |