Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 1 | //===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file is distributed under the University of Illinois Open Source |
| 6 | // License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This class declares the lexer for assembly files. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #ifndef ASMLEXER_H |
| 15 | #define ASMLEXER_H |
| 16 | |
Daniel Dunbar | 9a7e2cc | 2009-07-27 21:49:56 +0000 | [diff] [blame] | 17 | #include "llvm/ADT/StringRef.h" |
Daniel Dunbar | dbd692a | 2009-07-20 20:01:54 +0000 | [diff] [blame] | 18 | #include "llvm/MC/MCAsmLexer.h" |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 19 | #include "llvm/Support/DataTypes.h" |
| 20 | #include <string> |
| 21 | #include <cassert> |
| 22 | |
| 23 | namespace llvm { |
| 24 | class MemoryBuffer; |
| 25 | class SourceMgr; |
| 26 | class SMLoc; |
| 27 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 28 | /// AsmToken - Target independent representation for an assembler token. |
| 29 | struct AsmToken { |
| 30 | enum TokenKind { |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 31 | // Markers |
| 32 | Eof, Error, |
| 33 | |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 34 | // String values. |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 35 | Identifier, |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 36 | Register, |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 37 | String, |
| 38 | |
| 39 | // Integer values. |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 40 | Integer, |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 41 | |
Chris Lattner | 10a907d | 2009-06-21 19:56:35 +0000 | [diff] [blame] | 42 | // No-value. |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 43 | EndOfStatement, |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 44 | Colon, |
Chris Lattner | 74ec1a3 | 2009-06-22 06:32:03 +0000 | [diff] [blame] | 45 | Plus, Minus, Tilde, |
Chris Lattner | 4651bca | 2009-06-21 19:21:25 +0000 | [diff] [blame] | 46 | Slash, // '/' |
| 47 | LParen, RParen, |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 48 | Star, Comma, Dollar, Equal, EqualEqual, |
Chris Lattner | 8dfbe6c | 2009-06-23 05:57:07 +0000 | [diff] [blame] | 49 | |
Daniel Dunbar | 475839e | 2009-06-29 20:37:27 +0000 | [diff] [blame] | 50 | Pipe, PipePipe, Caret, |
| 51 | Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, |
| 52 | Less, LessEqual, LessLess, LessGreater, |
| 53 | Greater, GreaterEqual, GreaterGreater |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 54 | }; |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 55 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 56 | TokenKind Kind; |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 57 | |
| 58 | /// A reference to the entire token contents; this is always a pointer into |
| 59 | /// a memory buffer owned by the source manager. |
| 60 | StringRef Str; |
| 61 | |
| 62 | int64_t IntVal; |
| 63 | |
| 64 | public: |
| 65 | AsmToken() {} |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 66 | AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0) |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 67 | : Kind(_Kind), Str(_Str), IntVal(_IntVal) {} |
| 68 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 69 | TokenKind getKind() const { return Kind; } |
| 70 | bool is(TokenKind K) const { return Kind == K; } |
| 71 | bool isNot(TokenKind K) const { return Kind != K; } |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 72 | |
| 73 | SMLoc getLoc() const; |
| 74 | |
| 75 | StringRef getString() const { return Str; } |
| 76 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 77 | // FIXME: Don't compute this in advance, it makes every token larger, and is |
| 78 | // also not generally what we want (it is nicer for recovery etc. to lex 123br |
| 79 | // as a single token, then diagnose as an invalid number). |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 80 | int64_t getIntVal() const { |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 81 | assert(Kind == Integer && "This token isn't an integer"); |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 82 | return IntVal; |
| 83 | } |
| 84 | }; |
| 85 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 86 | /// AsmLexer - Lexer class for assembly files. |
Daniel Dunbar | dbd692a | 2009-07-20 20:01:54 +0000 | [diff] [blame] | 87 | class AsmLexer : public MCAsmLexer { |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 88 | SourceMgr &SrcMgr; |
| 89 | |
| 90 | const char *CurPtr; |
| 91 | const MemoryBuffer *CurBuf; |
| 92 | |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 93 | const char *TokStart; |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 94 | |
| 95 | /// The current token. |
| 96 | AsmToken CurTok; |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 97 | |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 98 | /// This is the current buffer index we're lexing from as managed by the |
| 99 | /// SourceMgr object. |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 100 | int CurBuffer; |
| 101 | |
Chris Lattner | faf32c1 | 2009-06-24 00:33:19 +0000 | [diff] [blame] | 102 | void operator=(const AsmLexer&); // DO NOT IMPLEMENT |
| 103 | AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 104 | public: |
| 105 | AsmLexer(SourceMgr &SrcMgr); |
Chris Lattner | faf32c1 | 2009-06-24 00:33:19 +0000 | [diff] [blame] | 106 | ~AsmLexer(); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 107 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 108 | AsmToken::TokenKind Lex() { |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 109 | return CurTok = LexToken(), getKind(); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 110 | } |
| 111 | |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 112 | AsmToken::TokenKind getKind() const { return CurTok.getKind(); } |
| 113 | bool is(AsmToken::TokenKind K) const { return CurTok.is(K); } |
| 114 | bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); } |
Daniel Dunbar | 9a7e2cc | 2009-07-27 21:49:56 +0000 | [diff] [blame] | 115 | |
| 116 | /// getCurStrVal - Get the string for the current token, this includes all |
| 117 | /// characters (for example, the quotes on strings) in the token. |
| 118 | /// |
| 119 | /// The returned StringRef points into the source manager's memory buffer, and |
| 120 | /// is safe to store across calls to Lex(). |
| 121 | StringRef getCurStrVal() const { |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 122 | return CurTok.getString(); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 123 | } |
| 124 | int64_t getCurIntVal() const { |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 125 | return CurTok.getIntVal(); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 126 | } |
| 127 | |
| 128 | SMLoc getLoc() const; |
Daniel Dunbar | 3f87233 | 2009-07-28 16:08:33 +0000 | [diff] [blame^] | 129 | |
| 130 | const AsmToken &getTok() const; |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 131 | |
Chris Lattner | 8e25e2d | 2009-07-16 06:14:39 +0000 | [diff] [blame] | 132 | /// EnterIncludeFile - Enter the specified file. This returns true on failure. |
| 133 | bool EnterIncludeFile(const std::string &Filename); |
| 134 | |
Daniel Dunbar | 3fb7683 | 2009-06-30 00:49:23 +0000 | [diff] [blame] | 135 | void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 136 | |
| 137 | private: |
| 138 | int getNextChar(); |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 139 | AsmToken ReturnError(const char *Loc, const std::string &Msg); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 140 | |
| 141 | /// LexToken - Read the next token and return its code. |
Daniel Dunbar | cb358b6 | 2009-07-28 03:00:54 +0000 | [diff] [blame] | 142 | AsmToken LexToken(); |
| 143 | AsmToken LexIdentifier(); |
| 144 | AsmToken LexPercent(); |
| 145 | AsmToken LexSlash(); |
| 146 | AsmToken LexLineComment(); |
| 147 | AsmToken LexDigit(); |
| 148 | AsmToken LexQuote(); |
Chris Lattner | a59e877 | 2009-06-21 07:19:10 +0000 | [diff] [blame] | 149 | }; |
| 150 | |
| 151 | } // end namespace llvm |
| 152 | |
| 153 | #endif |