Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 1 | //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This class implements the lexer for assembly files. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Chris Lattner | be343b3 | 2010-01-22 01:58:08 +0000 | [diff] [blame] | 14 | #include "llvm/MC/MCParser/AsmLexer.h" |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 15 | #include "llvm/Support/SMLoc.h" |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 16 | #include "llvm/Support/MemoryBuffer.h" |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 17 | #include "llvm/MC/MCAsmInfo.h" |
Nick Lewycky | 476b242 | 2010-12-19 20:43:38 +0000 | [diff] [blame] | 18 | #include <cctype> |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 19 | #include <cerrno> |
Duncan Sands | 66b9f29 | 2009-06-22 06:59:32 +0000 | [diff] [blame] | 20 | #include <cstdio> |
Chris Lattner | 4506bd2 | 2009-06-21 19:43:50 +0000 | [diff] [blame] | 21 | #include <cstdlib> |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 22 | using namespace llvm; |
| 23 | |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 24 | AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { |
| 25 | CurBuf = NULL; |
| 26 | CurPtr = NULL; |
Jim Grosbach | 6148225 | 2011-09-14 16:37:04 +0000 | [diff] [blame] | 27 | isAtStartOfLine = true; |
Chris Lattner | faf32c1 | 2009-06-24 00:33:19 +0000 | [diff] [blame] | 28 | } |
| 29 | |
| 30 | AsmLexer::~AsmLexer() { |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 31 | } |
| 32 | |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 33 | void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { |
| 34 | CurBuf = buf; |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 35 | |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 36 | if (ptr) |
| 37 | CurPtr = ptr; |
| 38 | else |
| 39 | CurPtr = CurBuf->getBufferStart(); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 40 | |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 41 | TokStart = 0; |
| 42 | } |
| 43 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 44 | /// ReturnError - Set the error to the specified string at the specified |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 45 | /// location. This is defined to always return AsmToken::Error. |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 46 | AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { |
Sean Callanan | 79036e4 | 2010-01-20 22:18:24 +0000 | [diff] [blame] | 47 | SetError(SMLoc::getFromPointer(Loc), Msg); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 48 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 49 | return AsmToken(AsmToken::Error, StringRef(Loc, 0)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 50 | } |
| 51 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 52 | int AsmLexer::getNextChar() { |
| 53 | char CurChar = *CurPtr++; |
| 54 | switch (CurChar) { |
| 55 | default: |
| 56 | return (unsigned char)CurChar; |
Sean Callanan | fd0b028 | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 57 | case 0: |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 58 | // A nul character in the stream is either the end of the current buffer or |
| 59 | // a random nul in the file. Disambiguate that here. |
| 60 | if (CurPtr-1 != CurBuf->getBufferEnd()) |
| 61 | return 0; // Just whitespace. |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 62 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 63 | // Otherwise, return end of file. |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 64 | --CurPtr; // Another call to lex will return EOF again. |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 65 | return EOF; |
| 66 | } |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 67 | } |
| 68 | |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 69 | /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? |
| 70 | /// |
| 71 | /// The leading integral digit sequence and dot should have already been |
| 72 | /// consumed, some or all of the fractional digit sequence *can* have been |
| 73 | /// consumed. |
| 74 | AsmToken AsmLexer::LexFloatLiteral() { |
| 75 | // Skip the fractional digit sequence. |
| 76 | while (isdigit(*CurPtr)) |
| 77 | ++CurPtr; |
| 78 | |
| 79 | // Check for exponent; we intentionally accept a slighlty wider set of |
| 80 | // literals here and rely on the upstream client to reject invalid ones (e.g., |
| 81 | // "1e+"). |
| 82 | if (*CurPtr == 'e' || *CurPtr == 'E') { |
| 83 | ++CurPtr; |
| 84 | if (*CurPtr == '-' || *CurPtr == '+') |
| 85 | ++CurPtr; |
| 86 | while (isdigit(*CurPtr)) |
| 87 | ++CurPtr; |
| 88 | } |
| 89 | |
| 90 | return AsmToken(AsmToken::Real, |
| 91 | StringRef(TokStart, CurPtr - TokStart)); |
| 92 | } |
| 93 | |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 94 | /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 95 | static bool IsIdentifierChar(char c) { |
| 96 | return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; |
| 97 | } |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 98 | AsmToken AsmLexer::LexIdentifier() { |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 99 | // Check for floating point literals. |
| 100 | if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 101 | // Disambiguate a .1243foo identifier from a floating literal. |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 102 | while (isdigit(*CurPtr)) |
| 103 | ++CurPtr; |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 104 | if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr)) |
| 105 | return LexFloatLiteral(); |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 106 | } |
| 107 | |
| 108 | while (IsIdentifierChar(*CurPtr)) |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 109 | ++CurPtr; |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 110 | |
Chris Lattner | d305035 | 2010-04-14 04:40:28 +0000 | [diff] [blame] | 111 | // Handle . as a special case. |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 112 | if (CurPtr == TokStart+1 && TokStart[0] == '.') |
| 113 | return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 114 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 115 | return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 116 | } |
| 117 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 118 | /// LexSlash: Slash: / |
| 119 | /// C-Style Comment: /* ... */ |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 120 | AsmToken AsmLexer::LexSlash() { |
Daniel Dunbar | 383a4a8 | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 121 | switch (*CurPtr) { |
| 122 | case '*': break; // C style comment. |
| 123 | case '/': return ++CurPtr, LexLineComment(); |
Daniel Dunbar | bdf90d6 | 2010-10-25 20:18:53 +0000 | [diff] [blame] | 124 | default: return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1)); |
Daniel Dunbar | 383a4a8 | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 125 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 126 | |
| 127 | // C Style comment. |
| 128 | ++CurPtr; // skip the star. |
| 129 | while (1) { |
| 130 | int CurChar = getNextChar(); |
| 131 | switch (CurChar) { |
| 132 | case EOF: |
Chris Lattner | 27aa7d2 | 2009-06-21 20:16:42 +0000 | [diff] [blame] | 133 | return ReturnError(TokStart, "unterminated comment"); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 134 | case '*': |
| 135 | // End of the comment? |
| 136 | if (CurPtr[0] != '/') break; |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 137 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 138 | ++CurPtr; // End the */. |
| 139 | return LexToken(); |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | |
Daniel Dunbar | 383a4a8 | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 144 | /// LexLineComment: Comment: #[^\n]* |
| 145 | /// : //[^\n]* |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 146 | AsmToken AsmLexer::LexLineComment() { |
| 147 | // FIXME: This is broken if we happen to a comment at the end of a file, which |
| 148 | // was .included, and which doesn't end with a newline. |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 149 | int CurChar = getNextChar(); |
Chris Lattner | 0ecd825 | 2011-08-04 19:31:26 +0000 | [diff] [blame] | 150 | while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 151 | CurChar = getNextChar(); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 152 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 153 | if (CurChar == EOF) |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 154 | return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); |
| 155 | return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 156 | } |
| 157 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 158 | static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { |
| 159 | if (CurPtr[0] == 'L' && CurPtr[1] == 'L') |
| 160 | CurPtr += 2; |
| 161 | if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L') |
| 162 | CurPtr += 3; |
| 163 | } |
| 164 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 165 | /// LexDigit: First character is [0-9]. |
| 166 | /// Local Label: [0-9][:] |
| 167 | /// Forward/Backward Label: [0-9][fb] |
| 168 | /// Binary integer: 0b[01]+ |
| 169 | /// Octal integer: 0[0-7]+ |
| 170 | /// Hex integer: 0x[0-9a-fA-F]+ |
| 171 | /// Decimal integer: [1-9][0-9]* |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 172 | AsmToken AsmLexer::LexDigit() { |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 173 | // Decimal integer: [1-9][0-9]* |
Daniel Dunbar | facb34b | 2010-09-24 17:10:26 +0000 | [diff] [blame] | 174 | if (CurPtr[-1] != '0' || CurPtr[0] == '.') { |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 175 | while (isdigit(*CurPtr)) |
| 176 | ++CurPtr; |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 177 | |
| 178 | // Check for floating point literals. |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 179 | if (*CurPtr == '.' || *CurPtr == 'e') { |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 180 | ++CurPtr; |
Daniel Dunbar | 4f2afe3 | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 181 | return LexFloatLiteral(); |
Daniel Dunbar | 54f0a62 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 182 | } |
| 183 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 184 | StringRef Result(TokStart, CurPtr - TokStart); |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 185 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 186 | long long Value; |
Chris Lattner | 3a151be | 2010-03-13 19:25:13 +0000 | [diff] [blame] | 187 | if (Result.getAsInteger(10, Value)) { |
Chris Lattner | 7ab3cc3 | 2010-12-25 21:36:35 +0000 | [diff] [blame] | 188 | // Allow positive values that are too large to fit into a signed 64-bit |
| 189 | // integer, but that do fit in an unsigned one, we just convert them over. |
| 190 | unsigned long long UValue; |
| 191 | if (Result.getAsInteger(10, UValue)) |
| 192 | return ReturnError(TokStart, "invalid decimal number"); |
| 193 | Value = (long long)UValue; |
Chris Lattner | 3a151be | 2010-03-13 19:25:13 +0000 | [diff] [blame] | 194 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 195 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 196 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 197 | // suffixes on integer literals. |
| 198 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 199 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 200 | return AsmToken(AsmToken::Integer, Result, Value); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 201 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 202 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 203 | if (*CurPtr == 'b') { |
| 204 | ++CurPtr; |
Kevin Enderby | ebe7fcd | 2010-05-17 23:08:19 +0000 | [diff] [blame] | 205 | // See if we actually have "0b" as part of something like "jmp 0b\n" |
Kevin Enderby | 9f2ad11 | 2010-05-18 18:09:20 +0000 | [diff] [blame] | 206 | if (!isdigit(CurPtr[0])) { |
Kevin Enderby | ebe7fcd | 2010-05-17 23:08:19 +0000 | [diff] [blame] | 207 | --CurPtr; |
| 208 | StringRef Result(TokStart, CurPtr - TokStart); |
Kevin Enderby | ebe7fcd | 2010-05-17 23:08:19 +0000 | [diff] [blame] | 209 | return AsmToken(AsmToken::Integer, Result, 0); |
| 210 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 211 | const char *NumStart = CurPtr; |
| 212 | while (CurPtr[0] == '0' || CurPtr[0] == '1') |
| 213 | ++CurPtr; |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 214 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 215 | // Requires at least one binary digit. |
| 216 | if (CurPtr == NumStart) |
Eric Christopher | 05f9e4e | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 217 | return ReturnError(TokStart, "invalid binary number"); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 218 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 219 | StringRef Result(TokStart, CurPtr - TokStart); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 220 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 221 | long long Value; |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 222 | if (Result.substr(2).getAsInteger(2, Value)) |
Eric Christopher | 05f9e4e | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 223 | return ReturnError(TokStart, "invalid binary number"); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 224 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 225 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 226 | // suffixes on integer literals. |
| 227 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 228 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 229 | return AsmToken(AsmToken::Integer, Result, Value); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 230 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 231 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 232 | if (*CurPtr == 'x') { |
| 233 | ++CurPtr; |
| 234 | const char *NumStart = CurPtr; |
| 235 | while (isxdigit(CurPtr[0])) |
| 236 | ++CurPtr; |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 237 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 238 | // Requires at least one hex digit. |
| 239 | if (CurPtr == NumStart) |
Eric Christopher | 05f9e4e | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 240 | return ReturnError(CurPtr-2, "invalid hexadecimal number"); |
Chris Lattner | 03949c9 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 241 | |
| 242 | unsigned long long Result; |
| 243 | if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) |
Eric Christopher | 05f9e4e | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 244 | return ReturnError(TokStart, "invalid hexadecimal number"); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 245 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 246 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 247 | // suffixes on integer literals. |
| 248 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 249 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 250 | return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart), |
Chris Lattner | 03949c9 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 251 | (int64_t)Result); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 252 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 253 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 254 | // Must be an octal number, it starts with 0. |
Eric Christopher | 120fd2d | 2011-04-11 22:24:56 +0000 | [diff] [blame] | 255 | while (*CurPtr >= '0' && *CurPtr <= '9') |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 256 | ++CurPtr; |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 257 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 258 | StringRef Result(TokStart, CurPtr - TokStart); |
| 259 | long long Value; |
| 260 | if (Result.getAsInteger(8, Value)) |
Eric Christopher | 05f9e4e | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 261 | return ReturnError(TokStart, "invalid octal number"); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 262 | |
Chris Lattner | a78c67e | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 263 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 264 | // suffixes on integer literals. |
| 265 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 266 | |
Chris Lattner | d5a7e35 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 267 | return AsmToken(AsmToken::Integer, Result, Value); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 268 | } |
| 269 | |
Roman Divacky | 7529b16 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 270 | /// LexSingleQuote: Integer: 'b' |
| 271 | AsmToken AsmLexer::LexSingleQuote() { |
| 272 | int CurChar = getNextChar(); |
| 273 | |
| 274 | if (CurChar == '\\') |
| 275 | CurChar = getNextChar(); |
| 276 | |
| 277 | if (CurChar == EOF) |
| 278 | return ReturnError(TokStart, "unterminated single quote"); |
| 279 | |
| 280 | CurChar = getNextChar(); |
| 281 | |
| 282 | if (CurChar != '\'') |
| 283 | return ReturnError(TokStart, "single quote way too long"); |
| 284 | |
| 285 | // The idea here being that 'c' is basically just an integral |
| 286 | // constant. |
| 287 | StringRef Res = StringRef(TokStart,CurPtr - TokStart); |
| 288 | long long Value; |
| 289 | |
| 290 | if (Res.startswith("\'\\")) { |
| 291 | char theChar = Res[2]; |
| 292 | switch (theChar) { |
| 293 | default: Value = theChar; break; |
| 294 | case '\'': Value = '\''; break; |
| 295 | case 't': Value = '\t'; break; |
| 296 | case 'n': Value = '\n'; break; |
| 297 | case 'b': Value = '\b'; break; |
| 298 | } |
| 299 | } else |
| 300 | Value = TokStart[1]; |
| 301 | |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 302 | return AsmToken(AsmToken::Integer, Res, Value); |
Roman Divacky | 7529b16 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 303 | } |
| 304 | |
| 305 | |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 306 | /// LexQuote: String: "..." |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 307 | AsmToken AsmLexer::LexQuote() { |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 308 | int CurChar = getNextChar(); |
| 309 | // TODO: does gas allow multiline string constants? |
| 310 | while (CurChar != '"') { |
| 311 | if (CurChar == '\\') { |
| 312 | // Allow \", etc. |
| 313 | CurChar = getNextChar(); |
| 314 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 315 | |
Chris Lattner | 14ee48a | 2009-06-21 21:22:11 +0000 | [diff] [blame] | 316 | if (CurChar == EOF) |
| 317 | return ReturnError(TokStart, "unterminated string constant"); |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 318 | |
| 319 | CurChar = getNextChar(); |
| 320 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 321 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 322 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 323 | } |
| 324 | |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 325 | StringRef AsmLexer::LexUntilEndOfStatement() { |
| 326 | TokStart = CurPtr; |
| 327 | |
Jim Grosbach | d31d304 | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 328 | while (!isAtStartOfComment(*CurPtr) && // Start of line comment. |
| 329 | !isAtStatementSeparator(CurPtr) && // End of statement marker. |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 330 | *CurPtr != '\n' && |
| 331 | *CurPtr != '\r' && |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 332 | (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 333 | ++CurPtr; |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 334 | } |
Chris Lattner | ff4bc46 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 335 | return StringRef(TokStart, CurPtr-TokStart); |
| 336 | } |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 337 | |
Kevin Enderby | f1c21a8 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 338 | StringRef AsmLexer::LexUntilEndOfLine() { |
| 339 | TokStart = CurPtr; |
| 340 | |
| 341 | while (*CurPtr != '\n' && |
| 342 | *CurPtr != '\r' && |
| 343 | (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { |
| 344 | ++CurPtr; |
| 345 | } |
| 346 | return StringRef(TokStart, CurPtr-TokStart); |
| 347 | } |
| 348 | |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 349 | bool AsmLexer::isAtStartOfComment(char Char) { |
Chris Lattner | cec5450 | 2009-09-27 19:38:39 +0000 | [diff] [blame] | 350 | // FIXME: This won't work for multi-character comment indicators like "//". |
| 351 | return Char == *MAI.getCommentString(); |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 352 | } |
| 353 | |
Jim Grosbach | d31d304 | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 354 | bool AsmLexer::isAtStatementSeparator(const char *Ptr) { |
| 355 | return strncmp(Ptr, MAI.getSeparatorString(), |
| 356 | strlen(MAI.getSeparatorString())) == 0; |
| 357 | } |
| 358 | |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 359 | AsmToken AsmLexer::LexToken() { |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 360 | TokStart = CurPtr; |
| 361 | // This always consumes at least one character. |
| 362 | int CurChar = getNextChar(); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 363 | |
Kevin Enderby | f1c21a8 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 364 | if (isAtStartOfComment(CurChar)) { |
| 365 | // If this comment starts with a '#', then return the Hash token and let |
| 366 | // the assembler parser see if it can be parsed as a cpp line filename |
| 367 | // comment. We do this only if we are at the start of a line. |
| 368 | if (CurChar == '#' && isAtStartOfLine) |
| 369 | return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
| 370 | isAtStartOfLine = true; |
Kevin Enderby | b5db830 | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 371 | return LexLineComment(); |
Kevin Enderby | f1c21a8 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 372 | } |
Jim Grosbach | d31d304 | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 373 | if (isAtStatementSeparator(TokStart)) { |
| 374 | CurPtr += strlen(MAI.getSeparatorString()) - 1; |
| 375 | return AsmToken(AsmToken::EndOfStatement, |
| 376 | StringRef(TokStart, strlen(MAI.getSeparatorString()))); |
| 377 | } |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 378 | |
Jim Grosbach | 70796ca | 2011-09-15 16:52:06 +0000 | [diff] [blame] | 379 | // If we're missing a newline at EOF, make sure we still get an |
| 380 | // EndOfStatement token before the Eof token. |
| 381 | if (CurChar == EOF && !isAtStartOfLine) { |
| 382 | isAtStartOfLine = true; |
| 383 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
| 384 | } |
| 385 | |
| 386 | isAtStartOfLine = false; |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 387 | switch (CurChar) { |
| 388 | default: |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 389 | // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* |
| 390 | if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 391 | return LexIdentifier(); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 392 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 393 | // Unknown character, emit an error. |
Chris Lattner | 27aa7d2 | 2009-06-21 20:16:42 +0000 | [diff] [blame] | 394 | return ReturnError(TokStart, "invalid character in input"); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 395 | case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 396 | case 0: |
| 397 | case ' ': |
| 398 | case '\t': |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 399 | // Ignore whitespace. |
| 400 | return LexToken(); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 401 | case '\n': // FALL THROUGH. |
Jim Grosbach | d31d304 | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 402 | case '\r': |
Kevin Enderby | f1c21a8 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 403 | isAtStartOfLine = true; |
Jim Grosbach | d31d304 | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 404 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 405 | case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); |
| 406 | case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); |
| 407 | case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); |
| 408 | case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); |
| 409 | case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); |
| 410 | case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); |
Kevin Enderby | fb0f0de | 2009-09-04 22:40:31 +0000 | [diff] [blame] | 411 | case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); |
| 412 | case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); |
| 413 | case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); |
| 414 | case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 415 | case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); |
| 416 | case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); |
Daniel Dunbar | 5fe03c0 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 417 | case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); |
Matt Fleming | 924c5e5 | 2010-05-21 11:36:59 +0000 | [diff] [blame] | 418 | case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); |
Rafael Espindola | 6536644 | 2011-06-05 02:43:45 +0000 | [diff] [blame] | 419 | case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 420 | case '=': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 421 | if (*CurPtr == '=') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 422 | return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); |
| 423 | return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 424 | case '|': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 425 | if (*CurPtr == '|') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 426 | return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); |
| 427 | return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); |
| 428 | case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 429 | case '&': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 430 | if (*CurPtr == '&') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 431 | return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); |
| 432 | return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 433 | case '!': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 434 | if (*CurPtr == '=') |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 435 | return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); |
| 436 | return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); |
Kevin Enderby | 7b4608d | 2009-09-03 17:15:07 +0000 | [diff] [blame] | 437 | case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 438 | case '/': return LexSlash(); |
Kevin Enderby | 9823ca9 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 439 | case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
Roman Divacky | 7529b16 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 440 | case '\'': return LexSingleQuote(); |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 441 | case '"': return LexQuote(); |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 442 | case '0': case '1': case '2': case '3': case '4': |
| 443 | case '5': case '6': case '7': case '8': case '9': |
| 444 | return LexDigit(); |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 445 | case '<': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 446 | switch (*CurPtr) { |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 447 | case '<': return ++CurPtr, AsmToken(AsmToken::LessLess, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 448 | StringRef(TokStart, 2)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 449 | case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 450 | StringRef(TokStart, 2)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 451 | case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 452 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 453 | default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 454 | } |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 455 | case '>': |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 456 | switch (*CurPtr) { |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 457 | case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 458 | StringRef(TokStart, 2)); |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 459 | case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual, |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 460 | StringRef(TokStart, 2)); |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 461 | default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 462 | } |
Jim Grosbach | de2f5f4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 463 | |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 464 | // TODO: Quoted identifiers (objc methods etc) |
| 465 | // local labels: [0-9][:] |
| 466 | // Forward/backward labels: [0-9][fb] |
| 467 | // Integers, fp constants, character constants. |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 468 | } |
Duncan Sands | 66b9f29 | 2009-06-22 06:59:32 +0000 | [diff] [blame] | 469 | } |