blob: 6146499e16c514706885bdb27116146549470568 [file] [log] [blame]
Chris Lattnera59e8772009-06-21 07:19:10 +00001//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class declares the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef ASMLEXER_H
15#define ASMLEXER_H
16
Daniel Dunbar9a7e2cc2009-07-27 21:49:56 +000017#include "llvm/ADT/StringRef.h"
Daniel Dunbardbd692a2009-07-20 20:01:54 +000018#include "llvm/MC/MCAsmLexer.h"
Chris Lattnera59e8772009-06-21 07:19:10 +000019#include "llvm/Support/DataTypes.h"
20#include <string>
21#include <cassert>
22
23namespace llvm {
24class MemoryBuffer;
25class SourceMgr;
26class SMLoc;
27
Daniel Dunbar3f872332009-07-28 16:08:33 +000028/// AsmToken - Target independent representation for an assembler token.
29struct AsmToken {
30 enum TokenKind {
Chris Lattnera59e8772009-06-21 07:19:10 +000031 // Markers
32 Eof, Error,
33
Chris Lattner10a907d2009-06-21 19:56:35 +000034 // String values.
Chris Lattnera59e8772009-06-21 07:19:10 +000035 Identifier,
Chris Lattner4651bca2009-06-21 19:21:25 +000036 Register,
Chris Lattner10a907d2009-06-21 19:56:35 +000037 String,
38
39 // Integer values.
Daniel Dunbar3f872332009-07-28 16:08:33 +000040 Integer,
Chris Lattnera59e8772009-06-21 07:19:10 +000041
Chris Lattner10a907d2009-06-21 19:56:35 +000042 // No-value.
Chris Lattner4651bca2009-06-21 19:21:25 +000043 EndOfStatement,
Chris Lattnera59e8772009-06-21 07:19:10 +000044 Colon,
Chris Lattner74ec1a32009-06-22 06:32:03 +000045 Plus, Minus, Tilde,
Chris Lattner4651bca2009-06-21 19:21:25 +000046 Slash, // '/'
47 LParen, RParen,
Daniel Dunbar475839e2009-06-29 20:37:27 +000048 Star, Comma, Dollar, Equal, EqualEqual,
Chris Lattner8dfbe6c2009-06-23 05:57:07 +000049
Daniel Dunbar475839e2009-06-29 20:37:27 +000050 Pipe, PipePipe, Caret,
51 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent,
52 Less, LessEqual, LessLess, LessGreater,
53 Greater, GreaterEqual, GreaterGreater
Chris Lattnera59e8772009-06-21 07:19:10 +000054 };
Chris Lattnera59e8772009-06-21 07:19:10 +000055
Daniel Dunbar3f872332009-07-28 16:08:33 +000056 TokenKind Kind;
Daniel Dunbarcb358b62009-07-28 03:00:54 +000057
58 /// A reference to the entire token contents; this is always a pointer into
59 /// a memory buffer owned by the source manager.
60 StringRef Str;
61
62 int64_t IntVal;
63
64public:
65 AsmToken() {}
Daniel Dunbar3f872332009-07-28 16:08:33 +000066 AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0)
Daniel Dunbarcb358b62009-07-28 03:00:54 +000067 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
68
Daniel Dunbar3f872332009-07-28 16:08:33 +000069 TokenKind getKind() const { return Kind; }
70 bool is(TokenKind K) const { return Kind == K; }
71 bool isNot(TokenKind K) const { return Kind != K; }
Daniel Dunbarcb358b62009-07-28 03:00:54 +000072
73 SMLoc getLoc() const;
74
75 StringRef getString() const { return Str; }
76
Daniel Dunbar3f872332009-07-28 16:08:33 +000077 // FIXME: Don't compute this in advance, it makes every token larger, and is
78 // also not generally what we want (it is nicer for recovery etc. to lex 123br
79 // as a single token, then diagnose as an invalid number).
Daniel Dunbarcb358b62009-07-28 03:00:54 +000080 int64_t getIntVal() const {
Daniel Dunbar3f872332009-07-28 16:08:33 +000081 assert(Kind == Integer && "This token isn't an integer");
Daniel Dunbarcb358b62009-07-28 03:00:54 +000082 return IntVal;
83 }
84};
85
Chris Lattnera59e8772009-06-21 07:19:10 +000086/// AsmLexer - Lexer class for assembly files.
Daniel Dunbardbd692a2009-07-20 20:01:54 +000087class AsmLexer : public MCAsmLexer {
Chris Lattnera59e8772009-06-21 07:19:10 +000088 SourceMgr &SrcMgr;
89
90 const char *CurPtr;
91 const MemoryBuffer *CurBuf;
92
Chris Lattnera59e8772009-06-21 07:19:10 +000093 const char *TokStart;
Daniel Dunbarcb358b62009-07-28 03:00:54 +000094
95 /// The current token.
96 AsmToken CurTok;
Chris Lattnera59e8772009-06-21 07:19:10 +000097
Daniel Dunbarcb358b62009-07-28 03:00:54 +000098 /// This is the current buffer index we're lexing from as managed by the
99 /// SourceMgr object.
Chris Lattnera59e8772009-06-21 07:19:10 +0000100 int CurBuffer;
101
Chris Lattnerfaf32c12009-06-24 00:33:19 +0000102 void operator=(const AsmLexer&); // DO NOT IMPLEMENT
103 AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT
Chris Lattnera59e8772009-06-21 07:19:10 +0000104public:
105 AsmLexer(SourceMgr &SrcMgr);
Chris Lattnerfaf32c12009-06-24 00:33:19 +0000106 ~AsmLexer();
Chris Lattnera59e8772009-06-21 07:19:10 +0000107
Daniel Dunbar3f872332009-07-28 16:08:33 +0000108 AsmToken::TokenKind Lex() {
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000109 return CurTok = LexToken(), getKind();
Chris Lattnera59e8772009-06-21 07:19:10 +0000110 }
111
Daniel Dunbar3f872332009-07-28 16:08:33 +0000112 AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
113 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
114 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
Daniel Dunbar9a7e2cc2009-07-27 21:49:56 +0000115
116 /// getCurStrVal - Get the string for the current token, this includes all
117 /// characters (for example, the quotes on strings) in the token.
118 ///
119 /// The returned StringRef points into the source manager's memory buffer, and
120 /// is safe to store across calls to Lex().
121 StringRef getCurStrVal() const {
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000122 return CurTok.getString();
Chris Lattnera59e8772009-06-21 07:19:10 +0000123 }
124 int64_t getCurIntVal() const {
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000125 return CurTok.getIntVal();
Chris Lattnera59e8772009-06-21 07:19:10 +0000126 }
127
128 SMLoc getLoc() const;
Daniel Dunbar3f872332009-07-28 16:08:33 +0000129
130 const AsmToken &getTok() const;
Chris Lattnera59e8772009-06-21 07:19:10 +0000131
Chris Lattner8e25e2d2009-07-16 06:14:39 +0000132 /// EnterIncludeFile - Enter the specified file. This returns true on failure.
133 bool EnterIncludeFile(const std::string &Filename);
134
Daniel Dunbar3fb76832009-06-30 00:49:23 +0000135 void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
Chris Lattnera59e8772009-06-21 07:19:10 +0000136
137private:
138 int getNextChar();
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000139 AsmToken ReturnError(const char *Loc, const std::string &Msg);
Chris Lattnera59e8772009-06-21 07:19:10 +0000140
141 /// LexToken - Read the next token and return its code.
Daniel Dunbarcb358b62009-07-28 03:00:54 +0000142 AsmToken LexToken();
143 AsmToken LexIdentifier();
144 AsmToken LexPercent();
145 AsmToken LexSlash();
146 AsmToken LexLineComment();
147 AsmToken LexDigit();
148 AsmToken LexQuote();
Chris Lattnera59e8772009-06-21 07:19:10 +0000149};
150
151} // end namespace llvm
152
153#endif