blob: 6146499e16c514706885bdb27116146549470568 [file] [log] [blame]
Chris Lattner22b67fb2009-06-21 07:19:10 +00001//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This class declares the lexer for assembly files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef ASMLEXER_H
15#define ASMLEXER_H
16
Daniel Dunbar9a7b61d2009-07-27 21:49:56 +000017#include "llvm/ADT/StringRef.h"
Daniel Dunbar78929e52009-07-20 20:01:54 +000018#include "llvm/MC/MCAsmLexer.h"
Chris Lattner22b67fb2009-06-21 07:19:10 +000019#include "llvm/Support/DataTypes.h"
20#include <string>
21#include <cassert>
22
23namespace llvm {
24class MemoryBuffer;
25class SourceMgr;
26class SMLoc;
27
Daniel Dunbarc479a542009-07-28 16:08:33 +000028/// AsmToken - Target independent representation for an assembler token.
29struct AsmToken {
30 enum TokenKind {
Chris Lattner22b67fb2009-06-21 07:19:10 +000031 // Markers
32 Eof, Error,
33
Chris Lattnerba605b72009-06-21 19:56:35 +000034 // String values.
Chris Lattner22b67fb2009-06-21 07:19:10 +000035 Identifier,
Chris Lattnerc688c232009-06-21 19:21:25 +000036 Register,
Chris Lattnerba605b72009-06-21 19:56:35 +000037 String,
38
39 // Integer values.
Daniel Dunbarc479a542009-07-28 16:08:33 +000040 Integer,
Chris Lattner22b67fb2009-06-21 07:19:10 +000041
Chris Lattnerba605b72009-06-21 19:56:35 +000042 // No-value.
Chris Lattnerc688c232009-06-21 19:21:25 +000043 EndOfStatement,
Chris Lattner22b67fb2009-06-21 07:19:10 +000044 Colon,
Chris Lattnere8164362009-06-22 06:32:03 +000045 Plus, Minus, Tilde,
Chris Lattnerc688c232009-06-21 19:21:25 +000046 Slash, // '/'
47 LParen, RParen,
Daniel Dunbarc3b8a4b2009-06-29 20:37:27 +000048 Star, Comma, Dollar, Equal, EqualEqual,
Chris Lattnerd8817ef2009-06-23 05:57:07 +000049
Daniel Dunbarc3b8a4b2009-06-29 20:37:27 +000050 Pipe, PipePipe, Caret,
51 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent,
52 Less, LessEqual, LessLess, LessGreater,
53 Greater, GreaterEqual, GreaterGreater
Chris Lattner22b67fb2009-06-21 07:19:10 +000054 };
Chris Lattner22b67fb2009-06-21 07:19:10 +000055
Daniel Dunbarc479a542009-07-28 16:08:33 +000056 TokenKind Kind;
Daniel Dunbar8ef32962009-07-28 03:00:54 +000057
58 /// A reference to the entire token contents; this is always a pointer into
59 /// a memory buffer owned by the source manager.
60 StringRef Str;
61
62 int64_t IntVal;
63
64public:
65 AsmToken() {}
Daniel Dunbarc479a542009-07-28 16:08:33 +000066 AsmToken(TokenKind _Kind, const StringRef &_Str, int64_t _IntVal = 0)
Daniel Dunbar8ef32962009-07-28 03:00:54 +000067 : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
68
Daniel Dunbarc479a542009-07-28 16:08:33 +000069 TokenKind getKind() const { return Kind; }
70 bool is(TokenKind K) const { return Kind == K; }
71 bool isNot(TokenKind K) const { return Kind != K; }
Daniel Dunbar8ef32962009-07-28 03:00:54 +000072
73 SMLoc getLoc() const;
74
75 StringRef getString() const { return Str; }
76
Daniel Dunbarc479a542009-07-28 16:08:33 +000077 // FIXME: Don't compute this in advance, it makes every token larger, and is
78 // also not generally what we want (it is nicer for recovery etc. to lex 123br
79 // as a single token, then diagnose as an invalid number).
Daniel Dunbar8ef32962009-07-28 03:00:54 +000080 int64_t getIntVal() const {
Daniel Dunbarc479a542009-07-28 16:08:33 +000081 assert(Kind == Integer && "This token isn't an integer");
Daniel Dunbar8ef32962009-07-28 03:00:54 +000082 return IntVal;
83 }
84};
85
Chris Lattner22b67fb2009-06-21 07:19:10 +000086/// AsmLexer - Lexer class for assembly files.
Daniel Dunbar78929e52009-07-20 20:01:54 +000087class AsmLexer : public MCAsmLexer {
Chris Lattner22b67fb2009-06-21 07:19:10 +000088 SourceMgr &SrcMgr;
89
90 const char *CurPtr;
91 const MemoryBuffer *CurBuf;
92
Chris Lattner22b67fb2009-06-21 07:19:10 +000093 const char *TokStart;
Daniel Dunbar8ef32962009-07-28 03:00:54 +000094
95 /// The current token.
96 AsmToken CurTok;
Chris Lattner22b67fb2009-06-21 07:19:10 +000097
Daniel Dunbar8ef32962009-07-28 03:00:54 +000098 /// This is the current buffer index we're lexing from as managed by the
99 /// SourceMgr object.
Chris Lattner22b67fb2009-06-21 07:19:10 +0000100 int CurBuffer;
101
Chris Lattner592e3bb2009-06-24 00:33:19 +0000102 void operator=(const AsmLexer&); // DO NOT IMPLEMENT
103 AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT
Chris Lattner22b67fb2009-06-21 07:19:10 +0000104public:
105 AsmLexer(SourceMgr &SrcMgr);
Chris Lattner592e3bb2009-06-24 00:33:19 +0000106 ~AsmLexer();
Chris Lattner22b67fb2009-06-21 07:19:10 +0000107
Daniel Dunbarc479a542009-07-28 16:08:33 +0000108 AsmToken::TokenKind Lex() {
Daniel Dunbar8ef32962009-07-28 03:00:54 +0000109 return CurTok = LexToken(), getKind();
Chris Lattner22b67fb2009-06-21 07:19:10 +0000110 }
111
Daniel Dunbarc479a542009-07-28 16:08:33 +0000112 AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
113 bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
114 bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
Daniel Dunbar9a7b61d2009-07-27 21:49:56 +0000115
116 /// getCurStrVal - Get the string for the current token, this includes all
117 /// characters (for example, the quotes on strings) in the token.
118 ///
119 /// The returned StringRef points into the source manager's memory buffer, and
120 /// is safe to store across calls to Lex().
121 StringRef getCurStrVal() const {
Daniel Dunbar8ef32962009-07-28 03:00:54 +0000122 return CurTok.getString();
Chris Lattner22b67fb2009-06-21 07:19:10 +0000123 }
124 int64_t getCurIntVal() const {
Daniel Dunbar8ef32962009-07-28 03:00:54 +0000125 return CurTok.getIntVal();
Chris Lattner22b67fb2009-06-21 07:19:10 +0000126 }
127
128 SMLoc getLoc() const;
Daniel Dunbarc479a542009-07-28 16:08:33 +0000129
130 const AsmToken &getTok() const;
Chris Lattner22b67fb2009-06-21 07:19:10 +0000131
Chris Lattner7aca1522009-07-16 06:14:39 +0000132 /// EnterIncludeFile - Enter the specified file. This returns true on failure.
133 bool EnterIncludeFile(const std::string &Filename);
134
Daniel Dunbarb5aec992009-06-30 00:49:23 +0000135 void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
Chris Lattner22b67fb2009-06-21 07:19:10 +0000136
137private:
138 int getNextChar();
Daniel Dunbar8ef32962009-07-28 03:00:54 +0000139 AsmToken ReturnError(const char *Loc, const std::string &Msg);
Chris Lattner22b67fb2009-06-21 07:19:10 +0000140
141 /// LexToken - Read the next token and return its code.
Daniel Dunbar8ef32962009-07-28 03:00:54 +0000142 AsmToken LexToken();
143 AsmToken LexIdentifier();
144 AsmToken LexPercent();
145 AsmToken LexSlash();
146 AsmToken LexLineComment();
147 AsmToken LexDigit();
148 AsmToken LexQuote();
Chris Lattner22b67fb2009-06-21 07:19:10 +0000149};
150
151} // end namespace llvm
152
153#endif