Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 1 | //===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This class implements the lexer for assembly files. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
Chandler Carruth | 6bda14b | 2017-06-06 11:49:48 +0000 | [diff] [blame] | 14 | #include "llvm/MC/MCParser/AsmLexer.h" |
Eugene Zelenko | 33d7b76 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 15 | #include "llvm/ADT/APInt.h" |
| 16 | #include "llvm/ADT/ArrayRef.h" |
Nirav Dave | e4c6153 | 2016-10-01 10:57:55 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/StringRef.h" |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 18 | #include "llvm/ADT/StringSwitch.h" |
Nirav Dave | e4c6153 | 2016-10-01 10:57:55 +0000 | [diff] [blame] | 19 | #include "llvm/MC/MCAsmInfo.h" |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 20 | #include "llvm/MC/MCParser/MCAsmLexer.h" |
Chandler Carruth | ed0881b | 2012-12-03 16:50:05 +0000 | [diff] [blame] | 21 | #include "llvm/Support/SMLoc.h" |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 22 | #include "llvm/Support/SaveAndRestore.h" |
Eugene Zelenko | 33d7b76 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 23 | #include <cassert> |
Nick Lewycky | 0de20af | 2010-12-19 20:43:38 +0000 | [diff] [blame] | 24 | #include <cctype> |
Duncan Sands | 376c6f1 | 2009-06-22 06:59:32 +0000 | [diff] [blame] | 25 | #include <cstdio> |
Eugene Zelenko | 33d7b76 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 26 | #include <cstring> |
Nirav Dave | e4c6153 | 2016-10-01 10:57:55 +0000 | [diff] [blame] | 27 | #include <string> |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 28 | #include <tuple> |
Eugene Zelenko | 33d7b76 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 29 | #include <utility> |
Daniel Sanders | 3feeb9c | 2016-08-08 11:50:25 +0000 | [diff] [blame] | 30 | |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 31 | using namespace llvm; |
| 32 | |
Eugene Zelenko | 4b6ff6b | 2017-02-10 01:33:54 +0000 | [diff] [blame] | 33 | AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { |
David Peixotto | 27aa001 | 2013-12-06 23:05:33 +0000 | [diff] [blame] | 34 | AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); |
Chris Lattner | 4c501fc | 2009-06-24 00:33:19 +0000 | [diff] [blame] | 35 | } |
| 36 | |
Eugene Zelenko | 4b6ff6b | 2017-02-10 01:33:54 +0000 | [diff] [blame] | 37 | AsmLexer::~AsmLexer() = default; |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 38 | |
Rafael Espindola | 8026bd0 | 2014-07-06 14:17:29 +0000 | [diff] [blame] | 39 | void AsmLexer::setBuffer(StringRef Buf, const char *ptr) { |
| 40 | CurBuf = Buf; |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 41 | |
Sean Callanan | 7a77eae | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 42 | if (ptr) |
| 43 | CurPtr = ptr; |
| 44 | else |
Rafael Espindola | 8026bd0 | 2014-07-06 14:17:29 +0000 | [diff] [blame] | 45 | CurPtr = CurBuf.begin(); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 46 | |
Craig Topper | 353eda4 | 2014-04-24 06:44:33 +0000 | [diff] [blame] | 47 | TokStart = nullptr; |
Sean Callanan | 7a77eae | 2010-01-21 00:19:58 +0000 | [diff] [blame] | 48 | } |
| 49 | |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 50 | /// ReturnError - Set the error to the specified string at the specified |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 51 | /// location. This is defined to always return AsmToken::Error. |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 52 | AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { |
Sean Callanan | 70855e4 | 2010-01-20 22:18:24 +0000 | [diff] [blame] | 53 | SetError(SMLoc::getFromPointer(Loc), Msg); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 54 | |
Nirav Dave | 1180e689 | 2016-06-02 17:15:05 +0000 | [diff] [blame] | 55 | return AsmToken(AsmToken::Error, StringRef(Loc, CurPtr - Loc)); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 56 | } |
| 57 | |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 58 | int AsmLexer::getNextChar() { |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 59 | if (CurPtr == CurBuf.end()) |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 60 | return EOF; |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 61 | return (unsigned char)*CurPtr++; |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 62 | } |
| 63 | |
Daniel Dunbar | d116d8a | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 64 | /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)? |
| 65 | /// |
| 66 | /// The leading integral digit sequence and dot should have already been |
| 67 | /// consumed, some or all of the fractional digit sequence *can* have been |
| 68 | /// consumed. |
| 69 | AsmToken AsmLexer::LexFloatLiteral() { |
| 70 | // Skip the fractional digit sequence. |
| 71 | while (isdigit(*CurPtr)) |
| 72 | ++CurPtr; |
| 73 | |
| 74 | // Check for exponent; we intentionally accept a slighlty wider set of |
| 75 | // literals here and rely on the upstream client to reject invalid ones (e.g., |
| 76 | // "1e+"). |
| 77 | if (*CurPtr == 'e' || *CurPtr == 'E') { |
| 78 | ++CurPtr; |
| 79 | if (*CurPtr == '-' || *CurPtr == '+') |
| 80 | ++CurPtr; |
| 81 | while (isdigit(*CurPtr)) |
| 82 | ++CurPtr; |
| 83 | } |
| 84 | |
| 85 | return AsmToken(AsmToken::Real, |
| 86 | StringRef(TokStart, CurPtr - TokStart)); |
| 87 | } |
| 88 | |
Tim Northover | 1f25623 | 2013-08-14 14:23:31 +0000 | [diff] [blame] | 89 | /// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ |
| 90 | /// while making sure there are enough actual digits around for the constant to |
| 91 | /// be valid. |
| 92 | /// |
| 93 | /// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed |
| 94 | /// before we get here. |
| 95 | AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { |
| 96 | assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && |
| 97 | "unexpected parse state in floating hex"); |
| 98 | bool NoFracDigits = true; |
| 99 | |
| 100 | // Skip the fractional part if there is one |
| 101 | if (*CurPtr == '.') { |
| 102 | ++CurPtr; |
| 103 | |
| 104 | const char *FracStart = CurPtr; |
| 105 | while (isxdigit(*CurPtr)) |
| 106 | ++CurPtr; |
| 107 | |
| 108 | NoFracDigits = CurPtr == FracStart; |
| 109 | } |
| 110 | |
| 111 | if (NoIntDigits && NoFracDigits) |
| 112 | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " |
| 113 | "expected at least one significand digit"); |
| 114 | |
| 115 | // Make sure we do have some kind of proper exponent part |
| 116 | if (*CurPtr != 'p' && *CurPtr != 'P') |
| 117 | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " |
| 118 | "expected exponent part 'p'"); |
| 119 | ++CurPtr; |
| 120 | |
| 121 | if (*CurPtr == '+' || *CurPtr == '-') |
| 122 | ++CurPtr; |
| 123 | |
| 124 | // N.b. exponent digits are *not* hex |
| 125 | const char *ExpStart = CurPtr; |
| 126 | while (isdigit(*CurPtr)) |
| 127 | ++CurPtr; |
| 128 | |
| 129 | if (CurPtr == ExpStart) |
| 130 | return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " |
| 131 | "expected at least one exponent digit"); |
| 132 | |
| 133 | return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); |
| 134 | } |
| 135 | |
Hans Wennborg | ce69d77 | 2013-10-18 20:46:28 +0000 | [diff] [blame] | 136 | /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* |
David Peixotto | 2cdc56d | 2013-12-06 20:35:58 +0000 | [diff] [blame] | 137 | static bool IsIdentifierChar(char c, bool AllowAt) { |
| 138 | return isalnum(c) || c == '_' || c == '$' || c == '.' || |
| 139 | (c == '@' && AllowAt) || c == '?'; |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 140 | } |
Eugene Zelenko | 33d7b76 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 141 | |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 142 | AsmToken AsmLexer::LexIdentifier() { |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 143 | // Check for floating point literals. |
| 144 | if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { |
Daniel Dunbar | d116d8a | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 145 | // Disambiguate a .1243foo identifier from a floating literal. |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 146 | while (isdigit(*CurPtr)) |
| 147 | ++CurPtr; |
David Peixotto | 2cdc56d | 2013-12-06 20:35:58 +0000 | [diff] [blame] | 148 | if (*CurPtr == 'e' || *CurPtr == 'E' || |
| 149 | !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) |
Daniel Dunbar | d116d8a | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 150 | return LexFloatLiteral(); |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 151 | } |
| 152 | |
David Peixotto | 2cdc56d | 2013-12-06 20:35:58 +0000 | [diff] [blame] | 153 | while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 154 | ++CurPtr; |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 155 | |
Chris Lattner | 6b55cb9 | 2010-04-14 04:40:28 +0000 | [diff] [blame] | 156 | // Handle . as a special case. |
Daniel Dunbar | b0ceb76 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 157 | if (CurPtr == TokStart+1 && TokStart[0] == '.') |
| 158 | return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 159 | |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 160 | return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 161 | } |
| 162 | |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 163 | /// LexSlash: Slash: / |
| 164 | /// C-Style Comment: /* ... */ |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 165 | AsmToken AsmLexer::LexSlash() { |
Daniel Dunbar | 6b22f9c | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 166 | switch (*CurPtr) { |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 167 | case '*': |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 168 | IsAtStartOfStatement = false; |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 169 | break; // C style comment. |
| 170 | case '/': |
| 171 | ++CurPtr; |
| 172 | return LexLineComment(); |
| 173 | default: |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 174 | IsAtStartOfStatement = false; |
| 175 | return AsmToken(AsmToken::Slash, StringRef(TokStart, 1)); |
Daniel Dunbar | 6b22f9c | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 176 | } |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 177 | |
| 178 | // C Style comment. |
| 179 | ++CurPtr; // skip the star. |
Oliver Stannard | 68e7c21 | 2016-12-08 10:31:21 +0000 | [diff] [blame] | 180 | const char *CommentTextStart = CurPtr; |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 181 | while (CurPtr != CurBuf.end()) { |
| 182 | switch (*CurPtr++) { |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 183 | case '*': |
| 184 | // End of the comment? |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 185 | if (*CurPtr != '/') |
| 186 | break; |
Oliver Stannard | 68e7c21 | 2016-12-08 10:31:21 +0000 | [diff] [blame] | 187 | // If we have a CommentConsumer, notify it about the comment. |
| 188 | if (CommentConsumer) { |
| 189 | CommentConsumer->HandleComment( |
| 190 | SMLoc::getFromPointer(CommentTextStart), |
| 191 | StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); |
| 192 | } |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 193 | ++CurPtr; // End the */. |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 194 | return AsmToken(AsmToken::Comment, |
| 195 | StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 196 | } |
| 197 | } |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 198 | return ReturnError(TokStart, "unterminated comment"); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 199 | } |
| 200 | |
Daniel Dunbar | 6b22f9c | 2009-06-29 21:58:22 +0000 | [diff] [blame] | 201 | /// LexLineComment: Comment: #[^\n]* |
| 202 | /// : //[^\n]* |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 203 | AsmToken AsmLexer::LexLineComment() { |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 204 | // Mark This as an end of statement with a body of the |
| 205 | // comment. While it would be nicer to leave this two tokens, |
| 206 | // backwards compatability with TargetParsers makes keeping this in this form |
| 207 | // better. |
Oliver Stannard | 68e7c21 | 2016-12-08 10:31:21 +0000 | [diff] [blame] | 208 | const char *CommentTextStart = CurPtr; |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 209 | int CurChar = getNextChar(); |
Chris Lattner | e8baa38 | 2011-08-04 19:31:26 +0000 | [diff] [blame] | 210 | while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF) |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 211 | CurChar = getNextChar(); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 212 | |
Oliver Stannard | 68e7c21 | 2016-12-08 10:31:21 +0000 | [diff] [blame] | 213 | // If we have a CommentConsumer, notify it about the comment. |
| 214 | if (CommentConsumer) { |
| 215 | CommentConsumer->HandleComment( |
| 216 | SMLoc::getFromPointer(CommentTextStart), |
| 217 | StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart)); |
| 218 | } |
| 219 | |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 220 | IsAtStartOfLine = true; |
Nirav Dave | 8b3dc87 | 2016-07-29 14:42:00 +0000 | [diff] [blame] | 221 | // This is a whole line comment. leave newline |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 222 | if (IsAtStartOfStatement) |
| 223 | return AsmToken(AsmToken::EndOfStatement, |
| 224 | StringRef(TokStart, CurPtr - TokStart)); |
| 225 | IsAtStartOfStatement = true; |
| 226 | |
| 227 | return AsmToken(AsmToken::EndOfStatement, |
| 228 | StringRef(TokStart, CurPtr - 1 - TokStart)); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 229 | } |
| 230 | |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 231 | static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { |
Jim Grosbach | 94a2260 | 2013-02-26 20:17:10 +0000 | [diff] [blame] | 232 | // Skip ULL, UL, U, L and LL suffices. |
| 233 | if (CurPtr[0] == 'U') |
| 234 | ++CurPtr; |
| 235 | if (CurPtr[0] == 'L') |
| 236 | ++CurPtr; |
| 237 | if (CurPtr[0] == 'L') |
| 238 | ++CurPtr; |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 239 | } |
| 240 | |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 241 | // Look ahead to search for first non-hex digit, if it's [hH], then we treat the |
| 242 | // integer as a hexadecimal, possibly with leading zeroes. |
| 243 | static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { |
Craig Topper | 353eda4 | 2014-04-24 06:44:33 +0000 | [diff] [blame] | 244 | const char *FirstHex = nullptr; |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 245 | const char *LookAhead = CurPtr; |
Eugene Zelenko | 33d7b76 | 2016-08-23 17:14:32 +0000 | [diff] [blame] | 246 | while (true) { |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 247 | if (isdigit(*LookAhead)) { |
| 248 | ++LookAhead; |
| 249 | } else if (isxdigit(*LookAhead)) { |
| 250 | if (!FirstHex) |
| 251 | FirstHex = LookAhead; |
| 252 | ++LookAhead; |
| 253 | } else { |
| 254 | break; |
| 255 | } |
| 256 | } |
| 257 | bool isHex = *LookAhead == 'h' || *LookAhead == 'H'; |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 258 | CurPtr = isHex || !FirstHex ? LookAhead : FirstHex; |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 259 | if (isHex) |
| 260 | return 16; |
| 261 | return DefaultRadix; |
| 262 | } |
| 263 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 264 | static AsmToken intToken(StringRef Ref, APInt &Value) |
| 265 | { |
| 266 | if (Value.isIntN(64)) |
| 267 | return AsmToken(AsmToken::Integer, Ref, Value); |
| 268 | return AsmToken(AsmToken::BigNum, Ref, Value); |
| 269 | } |
| 270 | |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 271 | /// LexDigit: First character is [0-9]. |
| 272 | /// Local Label: [0-9][:] |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 273 | /// Forward/Backward Label: [0-9][fb] |
| 274 | /// Binary integer: 0b[01]+ |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 275 | /// Octal integer: 0[0-7]+ |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 276 | /// Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH] |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 277 | /// Decimal integer: [1-9][0-9]* |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 278 | AsmToken AsmLexer::LexDigit() { |
Yunzhong Gao | 27ea29b | 2016-09-02 23:15:29 +0000 | [diff] [blame] | 279 | // MASM-flavor binary integer: [01]+[bB] |
| 280 | // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH] |
| 281 | if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) { |
| 282 | const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? |
| 283 | CurPtr - 1 : nullptr; |
| 284 | const char *OldCurPtr = CurPtr; |
| 285 | while (isxdigit(*CurPtr)) { |
| 286 | if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) |
| 287 | FirstNonBinary = CurPtr; |
| 288 | ++CurPtr; |
| 289 | } |
| 290 | |
| 291 | unsigned Radix = 0; |
| 292 | if (*CurPtr == 'h' || *CurPtr == 'H') { |
| 293 | // hexadecimal number |
| 294 | ++CurPtr; |
| 295 | Radix = 16; |
| 296 | } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr && |
| 297 | (*FirstNonBinary == 'b' || *FirstNonBinary == 'B')) |
| 298 | Radix = 2; |
| 299 | |
| 300 | if (Radix == 2 || Radix == 16) { |
| 301 | StringRef Result(TokStart, CurPtr - TokStart); |
| 302 | APInt Value(128, 0, true); |
| 303 | |
| 304 | if (Result.drop_back().getAsInteger(Radix, Value)) |
| 305 | return ReturnError(TokStart, Radix == 2 ? "invalid binary number" : |
| 306 | "invalid hexdecimal number"); |
| 307 | |
| 308 | // MSVC accepts and ignores type suffices on integer literals. |
| 309 | SkipIgnoredIntegerSuffix(CurPtr); |
| 310 | |
| 311 | return intToken(Result, Value); |
| 312 | } |
| 313 | |
| 314 | // octal/decimal integers, or floating point numbers, fall through |
| 315 | CurPtr = OldCurPtr; |
| 316 | } |
| 317 | |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 318 | // Decimal integer: [1-9][0-9]* |
Daniel Dunbar | ce17f72 | 2010-09-24 17:10:26 +0000 | [diff] [blame] | 319 | if (CurPtr[-1] != '0' || CurPtr[0] == '.') { |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 320 | unsigned Radix = doLookAhead(CurPtr, 10); |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 321 | bool isHex = Radix == 16; |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 322 | // Check for floating point literals. |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 323 | if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) { |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 324 | ++CurPtr; |
Daniel Dunbar | d116d8a | 2010-09-27 20:12:52 +0000 | [diff] [blame] | 325 | return LexFloatLiteral(); |
Daniel Dunbar | 3068a93 | 2010-09-24 01:59:31 +0000 | [diff] [blame] | 326 | } |
| 327 | |
Chris Lattner | e649401 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 328 | StringRef Result(TokStart, CurPtr - TokStart); |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 329 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 330 | APInt Value(128, 0, true); |
| 331 | if (Result.getAsInteger(Radix, Value)) |
| 332 | return ReturnError(TokStart, !isHex ? "invalid decimal number" : |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 333 | "invalid hexdecimal number"); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 334 | |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 335 | // Consume the [bB][hH]. |
| 336 | if (Radix == 2 || Radix == 16) |
| 337 | ++CurPtr; |
| 338 | |
Jim Grosbach | 94a2260 | 2013-02-26 20:17:10 +0000 | [diff] [blame] | 339 | // The darwin/x86 (and x86-64) assembler accepts and ignores type |
| 340 | // suffices on integer literals. |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 341 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 342 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 343 | return intToken(Result, Value); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 344 | } |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 345 | |
Yunzhong Gao | 27ea29b | 2016-09-02 23:15:29 +0000 | [diff] [blame] | 346 | if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 347 | ++CurPtr; |
| 348 | // See if we actually have "0b" as part of something like "jmp 0b\n" |
| 349 | if (!isdigit(CurPtr[0])) { |
| 350 | --CurPtr; |
| 351 | StringRef Result(TokStart, CurPtr - TokStart); |
| 352 | return AsmToken(AsmToken::Integer, Result, 0); |
| 353 | } |
| 354 | const char *NumStart = CurPtr; |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 355 | while (CurPtr[0] == '0' || CurPtr[0] == '1') |
| 356 | ++CurPtr; |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 357 | |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 358 | // Requires at least one binary digit. |
| 359 | if (CurPtr == NumStart) |
Eric Christopher | ffc0e1f | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 360 | return ReturnError(TokStart, "invalid binary number"); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 361 | |
Chris Lattner | e649401 | 2010-01-22 07:34:12 +0000 | [diff] [blame] | 362 | StringRef Result(TokStart, CurPtr - TokStart); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 363 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 364 | APInt Value(128, 0, true); |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 365 | if (Result.substr(2).getAsInteger(2, Value)) |
Eric Christopher | ffc0e1f | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 366 | return ReturnError(TokStart, "invalid binary number"); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 367 | |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 368 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 369 | // suffixes on integer literals. |
| 370 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 371 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 372 | return intToken(Result, Value); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 373 | } |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 374 | |
Colin LeMahieu | 0143146 | 2016-03-18 18:22:07 +0000 | [diff] [blame] | 375 | if ((*CurPtr == 'x') || (*CurPtr == 'X')) { |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 376 | ++CurPtr; |
| 377 | const char *NumStart = CurPtr; |
| 378 | while (isxdigit(CurPtr[0])) |
| 379 | ++CurPtr; |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 380 | |
Tim Northover | 1f25623 | 2013-08-14 14:23:31 +0000 | [diff] [blame] | 381 | // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be |
| 382 | // diagnosed by LexHexFloatLiteral). |
| 383 | if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') |
| 384 | return LexHexFloatLiteral(NumStart == CurPtr); |
| 385 | |
| 386 | // Otherwise requires at least one hex digit. |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 387 | if (CurPtr == NumStart) |
Eric Christopher | ffc0e1f | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 388 | return ReturnError(CurPtr-2, "invalid hexadecimal number"); |
Chris Lattner | 6401c88 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 389 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 390 | APInt Result(128, 0); |
Chris Lattner | 6401c88 | 2010-01-22 01:17:12 +0000 | [diff] [blame] | 391 | if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) |
Eric Christopher | ffc0e1f | 2011-04-12 00:18:03 +0000 | [diff] [blame] | 392 | return ReturnError(TokStart, "invalid hexadecimal number"); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 393 | |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 394 | // Consume the optional [hH]. |
Yunzhong Gao | 27ea29b | 2016-09-02 23:15:29 +0000 | [diff] [blame] | 395 | if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H')) |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 396 | ++CurPtr; |
| 397 | |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 398 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 399 | // suffixes on integer literals. |
| 400 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 401 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 402 | return intToken(StringRef(TokStart, CurPtr - TokStart), Result); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 403 | } |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 404 | |
Matt Beaumont-Gay | 0e760da | 2013-02-25 18:11:18 +0000 | [diff] [blame] | 405 | // Either octal or hexadecimal. |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 406 | APInt Value(128, 0, true); |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 407 | unsigned Radix = doLookAhead(CurPtr, 8); |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 408 | bool isHex = Radix == 16; |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 409 | StringRef Result(TokStart, CurPtr - TokStart); |
| 410 | if (Result.getAsInteger(Radix, Value)) |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 411 | return ReturnError(TokStart, !isHex ? "invalid octal number" : |
Chad Rosier | 559cea4 | 2013-02-12 01:12:24 +0000 | [diff] [blame] | 412 | "invalid hexdecimal number"); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 413 | |
Rafael Espindola | 86d5345 | 2013-02-14 16:23:08 +0000 | [diff] [blame] | 414 | // Consume the [hH]. |
| 415 | if (Radix == 16) |
Chad Rosier | 8bc6556 | 2013-02-12 01:00:01 +0000 | [diff] [blame] | 416 | ++CurPtr; |
| 417 | |
Chris Lattner | 02db8f6 | 2010-08-24 00:43:25 +0000 | [diff] [blame] | 418 | // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL |
| 419 | // suffixes on integer literals. |
| 420 | SkipIgnoredIntegerSuffix(CurPtr); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 421 | |
David Woodhouse | f42a666 | 2014-02-01 16:20:54 +0000 | [diff] [blame] | 422 | return intToken(Result, Value); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 423 | } |
| 424 | |
Roman Divacky | 71d2916 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 425 | /// LexSingleQuote: Integer: 'b' |
| 426 | AsmToken AsmLexer::LexSingleQuote() { |
| 427 | int CurChar = getNextChar(); |
| 428 | |
| 429 | if (CurChar == '\\') |
| 430 | CurChar = getNextChar(); |
| 431 | |
| 432 | if (CurChar == EOF) |
| 433 | return ReturnError(TokStart, "unterminated single quote"); |
| 434 | |
| 435 | CurChar = getNextChar(); |
| 436 | |
| 437 | if (CurChar != '\'') |
| 438 | return ReturnError(TokStart, "single quote way too long"); |
| 439 | |
| 440 | // The idea here being that 'c' is basically just an integral |
| 441 | // constant. |
| 442 | StringRef Res = StringRef(TokStart,CurPtr - TokStart); |
| 443 | long long Value; |
| 444 | |
| 445 | if (Res.startswith("\'\\")) { |
| 446 | char theChar = Res[2]; |
| 447 | switch (theChar) { |
| 448 | default: Value = theChar; break; |
| 449 | case '\'': Value = '\''; break; |
| 450 | case 't': Value = '\t'; break; |
| 451 | case 'n': Value = '\n'; break; |
| 452 | case 'b': Value = '\b'; break; |
| 453 | } |
| 454 | } else |
| 455 | Value = TokStart[1]; |
| 456 | |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 457 | return AsmToken(AsmToken::Integer, Res, Value); |
Roman Divacky | 71d2916 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 458 | } |
| 459 | |
Chris Lattner | 419a974 | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 460 | /// LexQuote: String: "..." |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 461 | AsmToken AsmLexer::LexQuote() { |
Chris Lattner | 419a974 | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 462 | int CurChar = getNextChar(); |
| 463 | // TODO: does gas allow multiline string constants? |
| 464 | while (CurChar != '"') { |
| 465 | if (CurChar == '\\') { |
| 466 | // Allow \", etc. |
| 467 | CurChar = getNextChar(); |
| 468 | } |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 469 | |
Chris Lattner | 2adc9e7 | 2009-06-21 21:22:11 +0000 | [diff] [blame] | 470 | if (CurChar == EOF) |
| 471 | return ReturnError(TokStart, "unterminated string constant"); |
Chris Lattner | 419a974 | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 472 | |
| 473 | CurChar = getNextChar(); |
| 474 | } |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 475 | |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 476 | return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart)); |
Chris Lattner | 419a974 | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 477 | } |
| 478 | |
Chris Lattner | cb307a27 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 479 | StringRef AsmLexer::LexUntilEndOfStatement() { |
| 480 | TokStart = CurPtr; |
| 481 | |
Saleem Abdulrasool | bb67af4 | 2014-08-14 02:51:43 +0000 | [diff] [blame] | 482 | while (!isAtStartOfComment(CurPtr) && // Start of line comment. |
Jim Grosbach | a3df87f | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 483 | !isAtStatementSeparator(CurPtr) && // End of statement marker. |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 484 | *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { |
Chris Lattner | cb307a27 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 485 | ++CurPtr; |
Kevin Enderby | f92f990 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 486 | } |
Chris Lattner | cb307a27 | 2009-08-10 01:39:42 +0000 | [diff] [blame] | 487 | return StringRef(TokStart, CurPtr-TokStart); |
| 488 | } |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 489 | |
Kevin Enderby | 7255361 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 490 | StringRef AsmLexer::LexUntilEndOfLine() { |
| 491 | TokStart = CurPtr; |
| 492 | |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 493 | while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) { |
Kevin Enderby | 7255361 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 494 | ++CurPtr; |
| 495 | } |
| 496 | return StringRef(TokStart, CurPtr-TokStart); |
| 497 | } |
| 498 | |
Benjamin Kramer | 1ee99a8 | 2015-08-17 14:35:25 +0000 | [diff] [blame] | 499 | size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf, |
| 500 | bool ShouldSkipSpace) { |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 501 | SaveAndRestore<const char *> SavedTokenStart(TokStart); |
| 502 | SaveAndRestore<const char *> SavedCurPtr(CurPtr); |
| 503 | SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine); |
| 504 | SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement); |
| 505 | SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace); |
| 506 | SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true); |
Saleem Abdulrasool | a879fab | 2014-02-09 23:29:24 +0000 | [diff] [blame] | 507 | std::string SavedErr = getErr(); |
| 508 | SMLoc SavedErrLoc = getErrLoc(); |
| 509 | |
Benjamin Kramer | 1ee99a8 | 2015-08-17 14:35:25 +0000 | [diff] [blame] | 510 | size_t ReadCount; |
| 511 | for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { |
| 512 | AsmToken Token = LexToken(); |
| 513 | |
| 514 | Buf[ReadCount] = Token; |
| 515 | |
| 516 | if (Token.is(AsmToken::Eof)) |
| 517 | break; |
| 518 | } |
Saleem Abdulrasool | a879fab | 2014-02-09 23:29:24 +0000 | [diff] [blame] | 519 | |
| 520 | SetError(SavedErrLoc, SavedErr); |
Benjamin Kramer | 1ee99a8 | 2015-08-17 14:35:25 +0000 | [diff] [blame] | 521 | return ReadCount; |
Saleem Abdulrasool | a879fab | 2014-02-09 23:29:24 +0000 | [diff] [blame] | 522 | } |
| 523 | |
Saleem Abdulrasool | bb67af4 | 2014-08-14 02:51:43 +0000 | [diff] [blame] | 524 | bool AsmLexer::isAtStartOfComment(const char *Ptr) { |
Mehdi Amini | 36d33fc | 2016-10-01 06:46:33 +0000 | [diff] [blame] | 525 | StringRef CommentString = MAI.getCommentString(); |
Saleem Abdulrasool | bb67af4 | 2014-08-14 02:51:43 +0000 | [diff] [blame] | 526 | |
Mehdi Amini | 36d33fc | 2016-10-01 06:46:33 +0000 | [diff] [blame] | 527 | if (CommentString.size() == 1) |
Saleem Abdulrasool | bb67af4 | 2014-08-14 02:51:43 +0000 | [diff] [blame] | 528 | return CommentString[0] == Ptr[0]; |
| 529 | |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 530 | // Allow # preprocessor commments also be counted as comments for "##" cases |
Saleem Abdulrasool | bb67af4 | 2014-08-14 02:51:43 +0000 | [diff] [blame] | 531 | if (CommentString[1] == '#') |
| 532 | return CommentString[0] == Ptr[0]; |
| 533 | |
Mehdi Amini | 36d33fc | 2016-10-01 06:46:33 +0000 | [diff] [blame] | 534 | return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0; |
Kevin Enderby | ecd879a | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 535 | } |
| 536 | |
Jim Grosbach | a3df87f | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 537 | bool AsmLexer::isAtStatementSeparator(const char *Ptr) { |
| 538 | return strncmp(Ptr, MAI.getSeparatorString(), |
| 539 | strlen(MAI.getSeparatorString())) == 0; |
| 540 | } |
| 541 | |
Daniel Dunbar | 8368f4e | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 542 | AsmToken AsmLexer::LexToken() { |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 543 | TokStart = CurPtr; |
| 544 | // This always consumes at least one character. |
| 545 | int CurChar = getNextChar(); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 546 | |
Nirav Dave | 157891c | 2016-10-03 13:48:27 +0000 | [diff] [blame] | 547 | if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 548 | // If this starts with a '#', this may be a cpp |
| 549 | // hash directive and otherwise a line comment. |
| 550 | AsmToken TokenBuf[2]; |
| 551 | MutableArrayRef<AsmToken> Buf(TokenBuf, 2); |
| 552 | size_t num = peekTokens(Buf, true); |
| 553 | // There cannot be a space preceeding this |
| 554 | if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) && |
| 555 | TokenBuf[1].is(AsmToken::String)) { |
| 556 | CurPtr = TokStart; // reset curPtr; |
| 557 | StringRef s = LexUntilEndOfLine(); |
| 558 | UnLex(TokenBuf[1]); |
| 559 | UnLex(TokenBuf[0]); |
| 560 | return AsmToken(AsmToken::HashDirective, s); |
| 561 | } |
Kevin Enderby | ecd879a | 2009-09-16 18:08:00 +0000 | [diff] [blame] | 562 | return LexLineComment(); |
Kevin Enderby | 7255361 | 2011-09-13 23:45:18 +0000 | [diff] [blame] | 563 | } |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 564 | |
| 565 | if (isAtStartOfComment(TokStart)) |
| 566 | return LexLineComment(); |
| 567 | |
Jim Grosbach | a3df87f | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 568 | if (isAtStatementSeparator(TokStart)) { |
| 569 | CurPtr += strlen(MAI.getSeparatorString()) - 1; |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 570 | IsAtStartOfLine = true; |
| 571 | IsAtStartOfStatement = true; |
Jim Grosbach | a3df87f | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 572 | return AsmToken(AsmToken::EndOfStatement, |
| 573 | StringRef(TokStart, strlen(MAI.getSeparatorString()))); |
| 574 | } |
Kevin Enderby | f92f990 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 575 | |
Jim Grosbach | a9aa3c1 | 2011-09-15 16:52:06 +0000 | [diff] [blame] | 576 | // If we're missing a newline at EOF, make sure we still get an |
| 577 | // EndOfStatement token before the Eof token. |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 578 | if (CurChar == EOF && !IsAtStartOfStatement) { |
| 579 | IsAtStartOfLine = true; |
| 580 | IsAtStartOfStatement = true; |
Jim Grosbach | a9aa3c1 | 2011-09-15 16:52:06 +0000 | [diff] [blame] | 581 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
| 582 | } |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 583 | IsAtStartOfLine = false; |
| 584 | bool OldIsAtStartOfStatement = IsAtStartOfStatement; |
| 585 | IsAtStartOfStatement = false; |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 586 | switch (CurChar) { |
| 587 | default: |
Daniel Dunbar | b0ceb76 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 588 | // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* |
| 589 | if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 590 | return LexIdentifier(); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 591 | |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 592 | // Unknown character, emit an error. |
Chris Lattner | b013345 | 2009-06-21 20:16:42 +0000 | [diff] [blame] | 593 | return ReturnError(TokStart, "invalid character in input"); |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 594 | case EOF: |
| 595 | IsAtStartOfLine = true; |
| 596 | IsAtStartOfStatement = true; |
| 597 | return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 598 | case 0: |
| 599 | case ' ': |
| 600 | case '\t': |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 601 | IsAtStartOfStatement = OldIsAtStartOfStatement; |
| 602 | while (*CurPtr == ' ' || *CurPtr == '\t') |
| 603 | CurPtr++; |
| 604 | if (SkipSpace) |
| 605 | return LexToken(); // Ignore whitespace. |
| 606 | else |
| 607 | return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); |
| 608 | case '\n': |
Jim Grosbach | a3df87f | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 609 | case '\r': |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 610 | IsAtStartOfLine = true; |
| 611 | IsAtStartOfStatement = true; |
Jim Grosbach | a3df87f | 2011-03-24 18:46:34 +0000 | [diff] [blame] | 612 | return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 613 | case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); |
| 614 | case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); |
| 615 | case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); |
| 616 | case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1)); |
| 617 | case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1)); |
| 618 | case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1)); |
Kevin Enderby | 9c0f7fc | 2009-09-04 22:40:31 +0000 | [diff] [blame] | 619 | case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1)); |
| 620 | case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1)); |
| 621 | case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1)); |
| 622 | case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 623 | case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); |
| 624 | case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); |
Daniel Dunbar | b0ceb76 | 2010-05-06 14:46:38 +0000 | [diff] [blame] | 625 | case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); |
Matt Fleming | ec9d6fa | 2010-05-21 11:36:59 +0000 | [diff] [blame] | 626 | case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); |
Rafael Espindola | 1134ab23 | 2011-06-05 02:43:45 +0000 | [diff] [blame] | 627 | case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 628 | case '=': |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 629 | if (*CurPtr == '=') { |
| 630 | ++CurPtr; |
| 631 | return AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2)); |
| 632 | } |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 633 | return AsmToken(AsmToken::Equal, StringRef(TokStart, 1)); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 634 | case '|': |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 635 | if (*CurPtr == '|') { |
| 636 | ++CurPtr; |
| 637 | return AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2)); |
| 638 | } |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 639 | return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1)); |
| 640 | case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1)); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 641 | case '&': |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 642 | if (*CurPtr == '&') { |
| 643 | ++CurPtr; |
| 644 | return AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2)); |
| 645 | } |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 646 | return AsmToken(AsmToken::Amp, StringRef(TokStart, 1)); |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 647 | case '!': |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 648 | if (*CurPtr == '=') { |
| 649 | ++CurPtr; |
| 650 | return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2)); |
| 651 | } |
Daniel Dunbar | f2dcd77 | 2009-07-28 16:08:33 +0000 | [diff] [blame] | 652 | return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1)); |
Daniel Sanders | 3feeb9c | 2016-08-08 11:50:25 +0000 | [diff] [blame] | 653 | case '%': |
| 654 | if (MAI.hasMipsExpressions()) { |
| 655 | AsmToken::TokenKind Operator; |
| 656 | unsigned OperatorLength; |
| 657 | |
| 658 | std::tie(Operator, OperatorLength) = |
| 659 | StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>( |
| 660 | StringRef(CurPtr)) |
| 661 | .StartsWith("call16", {AsmToken::PercentCall16, 7}) |
| 662 | .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8}) |
| 663 | .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8}) |
| 664 | .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10}) |
| 665 | .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10}) |
| 666 | .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9}) |
| 667 | .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7}) |
| 668 | .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7}) |
| 669 | .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9}) |
| 670 | .StartsWith("got_page", {AsmToken::PercentGot_Page, 9}) |
| 671 | .StartsWith("gottprel", {AsmToken::PercentGottprel, 9}) |
| 672 | .StartsWith("got", {AsmToken::PercentGot, 4}) |
| 673 | .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7}) |
| 674 | .StartsWith("higher", {AsmToken::PercentHigher, 7}) |
| 675 | .StartsWith("highest", {AsmToken::PercentHighest, 8}) |
| 676 | .StartsWith("hi", {AsmToken::PercentHi, 3}) |
| 677 | .StartsWith("lo", {AsmToken::PercentLo, 3}) |
| 678 | .StartsWith("neg", {AsmToken::PercentNeg, 4}) |
| 679 | .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9}) |
| 680 | .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9}) |
| 681 | .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6}) |
| 682 | .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7}) |
| 683 | .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9}) |
| 684 | .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9}) |
| 685 | .Default({AsmToken::Percent, 1}); |
| 686 | |
| 687 | if (Operator != AsmToken::Percent) { |
| 688 | CurPtr += OperatorLength - 1; |
| 689 | return AsmToken(Operator, StringRef(TokStart, OperatorLength)); |
| 690 | } |
| 691 | } |
| 692 | return AsmToken(AsmToken::Percent, StringRef(TokStart, 1)); |
Nirav Dave | fd91041 | 2016-06-17 16:06:17 +0000 | [diff] [blame] | 693 | case '/': |
| 694 | IsAtStartOfStatement = OldIsAtStartOfStatement; |
| 695 | return LexSlash(); |
Kevin Enderby | f92f990 | 2009-09-04 21:45:34 +0000 | [diff] [blame] | 696 | case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1)); |
Roman Divacky | 71d2916 | 2010-12-18 08:56:37 +0000 | [diff] [blame] | 697 | case '\'': return LexSingleQuote(); |
Chris Lattner | 419a974 | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 698 | case '"': return LexQuote(); |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 699 | case '0': case '1': case '2': case '3': case '4': |
| 700 | case '5': case '6': case '7': case '8': case '9': |
| 701 | return LexDigit(); |
Chris Lattner | f97d8bb | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 702 | case '<': |
Daniel Dunbar | 7e8d6c7 | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 703 | switch (*CurPtr) { |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 704 | case '<': |
| 705 | ++CurPtr; |
| 706 | return AsmToken(AsmToken::LessLess, StringRef(TokStart, 2)); |
| 707 | case '=': |
| 708 | ++CurPtr; |
| 709 | return AsmToken(AsmToken::LessEqual, StringRef(TokStart, 2)); |
| 710 | case '>': |
| 711 | ++CurPtr; |
| 712 | return AsmToken(AsmToken::LessGreater, StringRef(TokStart, 2)); |
| 713 | default: |
| 714 | return AsmToken(AsmToken::Less, StringRef(TokStart, 1)); |
Chris Lattner | f97d8bb | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 715 | } |
Chris Lattner | f97d8bb | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 716 | case '>': |
Daniel Dunbar | 7e8d6c7 | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 717 | switch (*CurPtr) { |
Richard Trieu | 7a08381 | 2016-02-18 22:09:30 +0000 | [diff] [blame] | 718 | case '>': |
| 719 | ++CurPtr; |
| 720 | return AsmToken(AsmToken::GreaterGreater, StringRef(TokStart, 2)); |
| 721 | case '=': |
| 722 | ++CurPtr; |
| 723 | return AsmToken(AsmToken::GreaterEqual, StringRef(TokStart, 2)); |
| 724 | default: |
| 725 | return AsmToken(AsmToken::Greater, StringRef(TokStart, 1)); |
Chris Lattner | f97d8bb | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 726 | } |
Jim Grosbach | 01af6c4 | 2011-02-11 19:05:56 +0000 | [diff] [blame] | 727 | |
Chris Lattner | d076561 | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 728 | // TODO: Quoted identifiers (objc methods etc) |
| 729 | // local labels: [0-9][:] |
| 730 | // Forward/backward labels: [0-9][fb] |
| 731 | // Integers, fp constants, character constants. |
Chris Lattner | c8dfbcb | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 732 | } |
Duncan Sands | 376c6f1 | 2009-06-22 06:59:32 +0000 | [diff] [blame] | 733 | } |