blob: a58c0c583f86b9534feebe4bd273091936c9fed6 [file] [log] [blame]
Chris Lattnerd2177732007-07-20 16:59:19 +00001//===--- Token.h - Token interface ------------------------------*- C++ -*-===//
Reid Spencer5f016e22007-07-11 17:01:13 +00002//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
Chris Lattnerd2177732007-07-20 16:59:19 +000010// This file defines the Token interface.
Reid Spencer5f016e22007-07-11 17:01:13 +000011//
12//===----------------------------------------------------------------------===//
13
Stephen Hines176edba2014-12-01 14:53:08 -080014#ifndef LLVM_CLANG_LEX_TOKEN_H
15#define LLVM_CLANG_LEX_TOKEN_H
Reid Spencer5f016e22007-07-11 17:01:13 +000016
Chandler Carruth30a2e162012-12-04 09:18:49 +000017#include "clang/Basic/OperatorKinds.h"
18#include "clang/Basic/SourceLocation.h"
Douglas Gregor39a8de12009-02-25 19:37:18 +000019#include "clang/Basic/TemplateKinds.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000020#include "clang/Basic/TokenKinds.h"
Stephen Hines6bcf27b2014-05-29 04:14:42 -070021#include "llvm/ADT/StringRef.h"
Douglas Gregorac1afdc2009-02-25 19:48:02 +000022#include <cstdlib>
Reid Spencer5f016e22007-07-11 17:01:13 +000023
24namespace clang {
25
Sean Hunt6cf75022010-08-30 17:47:05 +000026class IdentifierInfo;
27
Chris Lattnerd2177732007-07-20 16:59:19 +000028/// Token - This structure provides full information about a lexed token.
Reid Spencer5f016e22007-07-11 17:01:13 +000029/// It is not intended to be space efficient, it is intended to return as much
30/// information as possible about each returned token. This is expected to be
31/// compressed into a smaller form if memory footprint is important.
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +000032///
33/// The parser can create a special "annotation token" representing a stream of
34/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
35/// can be represented by a single typename annotation token that carries
36/// information about the SourceRange of the tokens and the type object.
Chris Lattnerd2177732007-07-20 16:59:19 +000037class Token {
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +000038 /// The location of the token.
Reid Spencer5f016e22007-07-11 17:01:13 +000039 SourceLocation Loc;
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +000040
Chris Lattner47246be2009-01-26 19:29:26 +000041 // Conceptually these next two fields could be in a union. However, this
42 // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
43 // routine. Keeping as separate members with casts until a more beautiful fix
44 // presents itself.
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +000045
Daniel Dunbarb28d6de2008-11-20 08:01:39 +000046 /// UintData - This holds either the length of the token text, when
47 /// a normal token, or the end of the SourceRange when an annotation
48 /// token.
49 unsigned UintData;
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +000050
Sean Hunt6cf75022010-08-30 17:47:05 +000051 /// PtrData - This is a union of four different pointer types, which depends
Chris Lattner47246be2009-01-26 19:29:26 +000052 /// on what type of token this is:
53 /// Identifiers, keywords, etc:
54 /// This is an IdentifierInfo*, which contains the uniqued identifier
55 /// spelling.
56 /// Literals: isLiteral() returns true.
57 /// This is a pointer to the start of the token in a text buffer, which
58 /// may be dirty (have trigraphs / escaped newlines).
59 /// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
60 /// This is a pointer to sema-specific data for the annotation token.
61 /// Other:
62 /// This is null.
Daniel Dunbarb28d6de2008-11-20 08:01:39 +000063 void *PtrData;
Reid Spencer5f016e22007-07-11 17:01:13 +000064
65 /// Kind - The actual flavor of token this is.
Stephen Hines651f13c2014-04-23 16:59:28 -070066 tok::TokenKind Kind;
Mike Stump1eb44332009-09-09 15:08:12 +000067
Reid Spencer5f016e22007-07-11 17:01:13 +000068 /// Flags - Bits we track about this token, members of the TokenFlags enum.
Daniel Dunbar082fb9a2009-11-04 00:34:40 +000069 unsigned char Flags;
Reid Spencer5f016e22007-07-11 17:01:13 +000070public:
Mike Stump1eb44332009-09-09 15:08:12 +000071
Sean Hunt6cf75022010-08-30 17:47:05 +000072 // Various flags set per token:
Reid Spencer5f016e22007-07-11 17:01:13 +000073 enum TokenFlags {
Eli Friedmand2f93082013-09-19 00:41:32 +000074 StartOfLine = 0x01, // At start of line or only after whitespace
75 // (considering the line after macro expansion).
76 LeadingSpace = 0x02, // Whitespace exists before this token (considering
77 // whitespace after macro expansion).
Sean Hunt6cf75022010-08-30 17:47:05 +000078 DisableExpand = 0x04, // This identifier may never be macro expanded.
Jordan Rosec7629d92013-01-24 20:50:46 +000079 NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
Richard Smith99831e42012-03-06 03:21:47 +000080 LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
Jordan Rosec7629d92013-01-24 20:50:46 +000081 HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
Reid Kleckner11be0642013-06-26 17:16:08 +000082 HasUCN = 0x40, // This identifier contains a UCN.
Nico Weber2ccf6982013-06-26 17:31:55 +000083 IgnoredComma = 0x80 // This comma is not a macro argument separator (MS).
Reid Spencer5f016e22007-07-11 17:01:13 +000084 };
85
Stephen Hines651f13c2014-04-23 16:59:28 -070086 tok::TokenKind getKind() const { return Kind; }
Reid Spencer5f016e22007-07-11 17:01:13 +000087 void setKind(tok::TokenKind K) { Kind = K; }
Mike Stump1eb44332009-09-09 15:08:12 +000088
Chris Lattner00073222007-10-09 17:23:58 +000089 /// is/isNot - Predicates to check if this token is a specific kind, as in
90 /// "if (Tok.is(tok::l_brace)) {...}".
Stephen Hines651f13c2014-04-23 16:59:28 -070091 bool is(tok::TokenKind K) const { return Kind == K; }
92 bool isNot(tok::TokenKind K) const { return Kind != K; }
Reid Spencer5f016e22007-07-11 17:01:13 +000093
James Dennetta1e92ac2012-06-17 05:49:13 +000094 /// \brief Return true if this is a raw identifier (when lexing
Abramo Bagnarac4bf2b92010-12-22 08:23:18 +000095 /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
96 bool isAnyIdentifier() const {
Alexander Kornienko66da0ab2012-09-28 22:24:03 +000097 return tok::isAnyIdentifier(getKind());
Abramo Bagnarac4bf2b92010-12-22 08:23:18 +000098 }
99
Alexander Kornienko66da0ab2012-09-28 22:24:03 +0000100 /// \brief Return true if this is a "literal", like a numeric
Chris Lattner47246be2009-01-26 19:29:26 +0000101 /// constant, string, etc.
102 bool isLiteral() const {
Alexander Kornienko66da0ab2012-09-28 22:24:03 +0000103 return tok::isLiteral(getKind());
Chris Lattner47246be2009-01-26 19:29:26 +0000104 }
105
Alexander Kornienko66da0ab2012-09-28 22:24:03 +0000106 /// \brief Return true if this is any of tok::annot_* kind tokens.
Mike Stump1eb44332009-09-09 15:08:12 +0000107 bool isAnnotation() const {
Alexander Kornienko66da0ab2012-09-28 22:24:03 +0000108 return tok::isAnnotation(getKind());
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000109 }
Mike Stump1eb44332009-09-09 15:08:12 +0000110
James Dennetta1e92ac2012-06-17 05:49:13 +0000111 /// \brief Return a source location identifier for the specified
Reid Spencer5f016e22007-07-11 17:01:13 +0000112 /// offset in the current file.
113 SourceLocation getLocation() const { return Loc; }
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000114 unsigned getLength() const {
Chris Lattner47246be2009-01-26 19:29:26 +0000115 assert(!isAnnotation() && "Annotation tokens have no length field");
Daniel Dunbarb28d6de2008-11-20 08:01:39 +0000116 return UintData;
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000117 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000118
119 void setLocation(SourceLocation L) { Loc = L; }
Chris Lattner79ed16e2009-01-06 05:25:04 +0000120 void setLength(unsigned Len) {
Chris Lattner47246be2009-01-26 19:29:26 +0000121 assert(!isAnnotation() && "Annotation tokens have no length field");
Chris Lattner79ed16e2009-01-06 05:25:04 +0000122 UintData = Len;
123 }
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000124
125 SourceLocation getAnnotationEndLoc() const {
Chris Lattner47246be2009-01-26 19:29:26 +0000126 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
Daniel Dunbarb28d6de2008-11-20 08:01:39 +0000127 return SourceLocation::getFromRawEncoding(UintData);
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000128 }
129 void setAnnotationEndLoc(SourceLocation L) {
Chris Lattner47246be2009-01-26 19:29:26 +0000130 assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
Daniel Dunbarb28d6de2008-11-20 08:01:39 +0000131 UintData = L.getRawEncoding();
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000132 }
133
Sebastian Redl39d67112010-02-08 19:35:18 +0000134 SourceLocation getLastLoc() const {
135 return isAnnotation() ? getAnnotationEndLoc() : getLocation();
136 }
137
James Dennetta1e92ac2012-06-17 05:49:13 +0000138 /// \brief SourceRange of the group of tokens that this annotation token
139 /// represents.
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000140 SourceRange getAnnotationRange() const {
141 return SourceRange(getLocation(), getAnnotationEndLoc());
142 }
143 void setAnnotationRange(SourceRange R) {
144 setLocation(R.getBegin());
145 setAnnotationEndLoc(R.getEnd());
146 }
Mike Stump1eb44332009-09-09 15:08:12 +0000147
Stephen Hines651f13c2014-04-23 16:59:28 -0700148 const char *getName() const { return tok::getTokenName(Kind); }
Mike Stump1eb44332009-09-09 15:08:12 +0000149
James Dennetta1e92ac2012-06-17 05:49:13 +0000150 /// \brief Reset all flags to cleared.
Reid Spencer5f016e22007-07-11 17:01:13 +0000151 void startToken() {
Chris Lattner79ed16e2009-01-06 05:25:04 +0000152 Kind = tok::unknown;
Reid Spencer5f016e22007-07-11 17:01:13 +0000153 Flags = 0;
Stephen Hines6bcf27b2014-05-29 04:14:42 -0700154 PtrData = nullptr;
Chris Lattnerc54539c2010-06-15 21:06:38 +0000155 UintData = 0;
Reid Spencer5f016e22007-07-11 17:01:13 +0000156 Loc = SourceLocation();
157 }
Mike Stump1eb44332009-09-09 15:08:12 +0000158
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000159 IdentifierInfo *getIdentifierInfo() const {
Abramo Bagnarac4bf2b92010-12-22 08:23:18 +0000160 assert(isNot(tok::raw_identifier) &&
161 "getIdentifierInfo() on a tok::raw_identifier token!");
162 assert(!isAnnotation() &&
163 "getIdentifierInfo() on an annotation token!");
Stephen Hines6bcf27b2014-05-29 04:14:42 -0700164 if (isLiteral()) return nullptr;
Sean Hunt6cf75022010-08-30 17:47:05 +0000165 return (IdentifierInfo*) PtrData;
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000166 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000167 void setIdentifierInfo(IdentifierInfo *II) {
Sean Hunt6cf75022010-08-30 17:47:05 +0000168 PtrData = (void*) II;
Reid Spencer5f016e22007-07-11 17:01:13 +0000169 }
Mike Stump1eb44332009-09-09 15:08:12 +0000170
Stephen Hines6bcf27b2014-05-29 04:14:42 -0700171 /// getRawIdentifier - For a raw identifier token (i.e., an identifier
172 /// lexed in raw mode), returns a reference to the text substring in the
173 /// buffer if known.
174 StringRef getRawIdentifier() const {
Abramo Bagnarac4bf2b92010-12-22 08:23:18 +0000175 assert(is(tok::raw_identifier));
Stephen Hines6bcf27b2014-05-29 04:14:42 -0700176 return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
Abramo Bagnarac4bf2b92010-12-22 08:23:18 +0000177 }
178 void setRawIdentifierData(const char *Ptr) {
179 assert(is(tok::raw_identifier));
180 PtrData = const_cast<char*>(Ptr);
181 }
182
Chris Lattner47246be2009-01-26 19:29:26 +0000183 /// getLiteralData - For a literal token (numeric constant, string, etc), this
184 /// returns a pointer to the start of it in the text buffer if known, null
185 /// otherwise.
186 const char *getLiteralData() const {
187 assert(isLiteral() && "Cannot get literal data of non-literal");
Sean Hunt6cf75022010-08-30 17:47:05 +0000188 return reinterpret_cast<const char*>(PtrData);
Chris Lattner47246be2009-01-26 19:29:26 +0000189 }
190 void setLiteralData(const char *Ptr) {
191 assert(isLiteral() && "Cannot set literal data of non-literal");
Sean Hunt6cf75022010-08-30 17:47:05 +0000192 PtrData = const_cast<char*>(Ptr);
Chris Lattner47246be2009-01-26 19:29:26 +0000193 }
Mike Stump1eb44332009-09-09 15:08:12 +0000194
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000195 void *getAnnotationValue() const {
Chris Lattner47246be2009-01-26 19:29:26 +0000196 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
Daniel Dunbarb28d6de2008-11-20 08:01:39 +0000197 return PtrData;
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000198 }
199 void setAnnotationValue(void *val) {
Chris Lattner47246be2009-01-26 19:29:26 +0000200 assert(isAnnotation() && "Used AnnotVal on non-annotation token");
Daniel Dunbarb28d6de2008-11-20 08:01:39 +0000201 PtrData = val;
Argyrios Kyrtzidis3604e382008-11-08 16:17:04 +0000202 }
Mike Stump1eb44332009-09-09 15:08:12 +0000203
James Dennetta1e92ac2012-06-17 05:49:13 +0000204 /// \brief Set the specified flag.
Reid Spencer5f016e22007-07-11 17:01:13 +0000205 void setFlag(TokenFlags Flag) {
206 Flags |= Flag;
207 }
Mike Stump1eb44332009-09-09 15:08:12 +0000208
James Dennetta1e92ac2012-06-17 05:49:13 +0000209 /// \brief Unset the specified flag.
Reid Spencer5f016e22007-07-11 17:01:13 +0000210 void clearFlag(TokenFlags Flag) {
211 Flags &= ~Flag;
212 }
Mike Stump1eb44332009-09-09 15:08:12 +0000213
James Dennetta1e92ac2012-06-17 05:49:13 +0000214 /// \brief Return the internal represtation of the flags.
215 ///
216 /// This is only intended for low-level operations such as writing tokens to
217 /// disk.
Ted Kremenekef3b2152008-10-21 03:32:15 +0000218 unsigned getFlags() const {
219 return Flags;
220 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000221
James Dennetta1e92ac2012-06-17 05:49:13 +0000222 /// \brief Set a flag to either true or false.
Reid Spencer5f016e22007-07-11 17:01:13 +0000223 void setFlagValue(TokenFlags Flag, bool Val) {
Mike Stump1eb44332009-09-09 15:08:12 +0000224 if (Val)
Reid Spencer5f016e22007-07-11 17:01:13 +0000225 setFlag(Flag);
226 else
227 clearFlag(Flag);
228 }
Mike Stump1eb44332009-09-09 15:08:12 +0000229
Reid Spencer5f016e22007-07-11 17:01:13 +0000230 /// isAtStartOfLine - Return true if this token is at the start of a line.
231 ///
Chris Lattnerb8128142007-09-03 18:28:41 +0000232 bool isAtStartOfLine() const { return (Flags & StartOfLine) ? true : false; }
Mike Stump1eb44332009-09-09 15:08:12 +0000233
James Dennetta1e92ac2012-06-17 05:49:13 +0000234 /// \brief Return true if this token has whitespace before it.
Reid Spencer5f016e22007-07-11 17:01:13 +0000235 ///
Chris Lattnerb8128142007-09-03 18:28:41 +0000236 bool hasLeadingSpace() const { return (Flags & LeadingSpace) ? true : false; }
Mike Stump1eb44332009-09-09 15:08:12 +0000237
James Dennetta1e92ac2012-06-17 05:49:13 +0000238 /// \brief Return true if this identifier token should never
Reid Spencer5f016e22007-07-11 17:01:13 +0000239 /// be expanded in the future, due to C99 6.10.3.4p2.
Ted Kremenek8e748ab2008-02-23 01:05:54 +0000240 bool isExpandDisabled() const {
241 return (Flags & DisableExpand) ? true : false;
242 }
Mike Stump1eb44332009-09-09 15:08:12 +0000243
James Dennetta1e92ac2012-06-17 05:49:13 +0000244 /// \brief Return true if we have an ObjC keyword identifier.
Steve Naroff861cf3e2007-08-23 18:16:40 +0000245 bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
Mike Stump1eb44332009-09-09 15:08:12 +0000246
James Dennetta1e92ac2012-06-17 05:49:13 +0000247 /// \brief Return the ObjC keyword kind.
Steve Naroff861cf3e2007-08-23 18:16:40 +0000248 tok::ObjCKeywordKind getObjCKeywordID() const;
Mike Stump1eb44332009-09-09 15:08:12 +0000249
James Dennetta1e92ac2012-06-17 05:49:13 +0000250 /// \brief Return true if this token has trigraphs or escaped newlines in it.
Sean Hunt6cf75022010-08-30 17:47:05 +0000251 bool needsCleaning() const { return (Flags & NeedsCleaning) ? true : false; }
Argyrios Kyrtzidis44aa1f32010-11-20 02:04:01 +0000252
253 /// \brief Return true if this token has an empty macro before it.
254 ///
255 bool hasLeadingEmptyMacro() const {
256 return (Flags & LeadingEmptyMacro) ? true : false;
257 }
258
Richard Smith99831e42012-03-06 03:21:47 +0000259 /// \brief Return true if this token is a string or character literal which
260 /// has a ud-suffix.
261 bool hasUDSuffix() const { return (Flags & HasUDSuffix) ? true : false; }
Jordan Rosec7629d92013-01-24 20:50:46 +0000262
263 /// Returns true if this token contains a universal character name.
264 bool hasUCN() const { return (Flags & HasUCN) ? true : false; }
Reid Spencer5f016e22007-07-11 17:01:13 +0000265};
266
James Dennetta1e92ac2012-06-17 05:49:13 +0000267/// \brief Information about the conditional stack (\#if directives)
Reid Spencer5f016e22007-07-11 17:01:13 +0000268/// currently active.
269struct PPConditionalInfo {
James Dennetta1e92ac2012-06-17 05:49:13 +0000270 /// \brief Location where the conditional started.
Reid Spencer5f016e22007-07-11 17:01:13 +0000271 SourceLocation IfLoc;
Mike Stump1eb44332009-09-09 15:08:12 +0000272
James Dennetta1e92ac2012-06-17 05:49:13 +0000273 /// \brief True if this was contained in a skipping directive, e.g.,
274 /// in a "\#if 0" block.
Reid Spencer5f016e22007-07-11 17:01:13 +0000275 bool WasSkipping;
Mike Stump1eb44332009-09-09 15:08:12 +0000276
James Dennetta1e92ac2012-06-17 05:49:13 +0000277 /// \brief True if we have emitted tokens already, and now we're in
278 /// an \#else block or something. Only useful in Skipping blocks.
Reid Spencer5f016e22007-07-11 17:01:13 +0000279 bool FoundNonSkip;
Mike Stump1eb44332009-09-09 15:08:12 +0000280
James Dennetta1e92ac2012-06-17 05:49:13 +0000281 /// \brief True if we've seen a \#else in this block. If so,
282 /// \#elif/\#else directives are not allowed.
Reid Spencer5f016e22007-07-11 17:01:13 +0000283 bool FoundElse;
284};
285
Reid Spencer5f016e22007-07-11 17:01:13 +0000286} // end namespace clang
287
Benjamin Kramer1c573cb2010-06-08 11:23:26 +0000288namespace llvm {
289 template <>
290 struct isPodLike<clang::Token> { static const bool value = true; };
291} // end namespace llvm
292
Reid Spencer5f016e22007-07-11 17:01:13 +0000293#endif