blob: 92edca7be4361cc95750d99d419a84792c55dbc8 [file] [log] [blame]
Chris Lattnerd7038e12009-02-13 00:46:04 +00001//===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TokenConcatenation class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Lex/TokenConcatenation.h"
15#include "clang/Lex/Preprocessor.h"
16using namespace clang;
17
18
19/// StartsWithL - Return true if the spelling of this token starts with 'L'.
20bool TokenConcatenation::StartsWithL(const Token &Tok) const {
21 if (!Tok.needsCleaning()) {
22 SourceManager &SM = PP.getSourceManager();
23 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
24 }
25
26 if (Tok.getLength() < 256) {
27 char Buffer[256];
28 const char *TokPtr = Buffer;
29 PP.getSpelling(Tok, TokPtr);
30 return TokPtr[0] == 'L';
31 }
32
33 return PP.getSpelling(Tok)[0] == 'L';
34}
35
36/// IsIdentifierL - Return true if the spelling of this token is literally
37/// 'L'.
38bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
39 if (!Tok.needsCleaning()) {
40 if (Tok.getLength() != 1)
41 return false;
42 SourceManager &SM = PP.getSourceManager();
43 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
44 }
45
46 if (Tok.getLength() < 256) {
47 char Buffer[256];
48 const char *TokPtr = Buffer;
49 if (PP.getSpelling(Tok, TokPtr) != 1)
50 return false;
51 return TokPtr[0] == 'L';
52 }
53
54 return PP.getSpelling(Tok) == "L";
55}
56
57TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
58 memset(TokenInfo, 0, sizeof(TokenInfo));
59
60 // These tokens have custom code in AvoidConcat.
61 TokenInfo[tok::identifier ] |= aci_custom;
62 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
63 TokenInfo[tok::period ] |= aci_custom_firstchar;
64 TokenInfo[tok::amp ] |= aci_custom_firstchar;
65 TokenInfo[tok::plus ] |= aci_custom_firstchar;
66 TokenInfo[tok::minus ] |= aci_custom_firstchar;
67 TokenInfo[tok::slash ] |= aci_custom_firstchar;
68 TokenInfo[tok::less ] |= aci_custom_firstchar;
69 TokenInfo[tok::greater ] |= aci_custom_firstchar;
70 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
71 TokenInfo[tok::percent ] |= aci_custom_firstchar;
72 TokenInfo[tok::colon ] |= aci_custom_firstchar;
73 TokenInfo[tok::hash ] |= aci_custom_firstchar;
74 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
75
76 // These tokens change behavior if followed by an '='.
77 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
78 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
79 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
80 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
81 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
82 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
83 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
84 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
85 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
86 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
87 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
88 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
89 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
90 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
91}
92
Daniel Dunbar99c76222009-03-18 03:32:24 +000093/// GetFirstChar - Get the first character of the token \arg Tok,
94/// avoiding calls to getSpelling where possible.
95static char GetFirstChar(Preprocessor &PP, const Token &Tok) {
96 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
97 // Avoid spelling identifiers, the most common form of token.
98 return II->getName()[0];
99 } else if (!Tok.needsCleaning()) {
100 if (Tok.isLiteral() && Tok.getLiteralData()) {
101 return *Tok.getLiteralData();
102 } else {
103 SourceManager &SM = PP.getSourceManager();
104 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
105 }
106 } else if (Tok.getLength() < 256) {
107 char Buffer[256];
108 const char *TokPtr = Buffer;
109 PP.getSpelling(Tok, TokPtr);
110 return TokPtr[0];
111 } else {
112 return PP.getSpelling(Tok)[0];
113 }
114}
115
Chris Lattnerd7038e12009-02-13 00:46:04 +0000116/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
117/// the two individual tokens to be lexed as a single token, return true
118/// (which causes a space to be printed between them). This allows the output
119/// of -E mode to be lexed to the same token stream as lexing the input
120/// directly would.
121///
122/// This code must conservatively return true if it doesn't want to be 100%
123/// accurate. This will cause the output to include extra space characters,
124/// but the resulting output won't have incorrect concatenations going on.
125/// Examples include "..", which we print with a space between, because we
126/// don't want to track enough to tell "x.." from "...".
127bool TokenConcatenation::AvoidConcat(const Token &PrevTok,
128 const Token &Tok) const {
Chris Lattnerd7038e12009-02-13 00:46:04 +0000129 tok::TokenKind PrevKind = PrevTok.getKind();
130 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
131 PrevKind = tok::identifier;
132
133 // Look up information on when we should avoid concatenation with prevtok.
134 unsigned ConcatInfo = TokenInfo[PrevKind];
135
136 // If prevtok never causes a problem for anything after it, return quickly.
137 if (ConcatInfo == 0) return false;
138
139 if (ConcatInfo & aci_avoid_equal) {
140 // If the next token is '=' or '==', avoid concatenation.
141 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
142 return true;
143 ConcatInfo &= ~aci_avoid_equal;
144 }
145
146 if (ConcatInfo == 0) return false;
147
148 // Basic algorithm: we look at the first character of the second token, and
149 // determine whether it, if appended to the first token, would form (or
150 // would contribute) to a larger token if concatenated.
151 char FirstChar = 0;
152 if (ConcatInfo & aci_custom) {
153 // If the token does not need to know the first character, don't get it.
Chris Lattnerd7038e12009-02-13 00:46:04 +0000154 } else {
Daniel Dunbar99c76222009-03-18 03:32:24 +0000155 FirstChar = GetFirstChar(PP, Tok);
Chris Lattnerd7038e12009-02-13 00:46:04 +0000156 }
Daniel Dunbar99c76222009-03-18 03:32:24 +0000157
Chris Lattnerd7038e12009-02-13 00:46:04 +0000158 switch (PrevKind) {
159 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Daniel Dunbar99c76222009-03-18 03:32:24 +0000160 case tok::identifier: // id+id or id+number or id+L"foo".
161 // id+'.'... will not append.
162 if (Tok.is(tok::numeric_constant))
163 return GetFirstChar(PP, Tok) != '.';
164
165 if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
Chris Lattnerd7038e12009-02-13 00:46:04 +0000166 Tok.is(tok::wide_char_literal)*/)
167 return true;
168
169 // If this isn't identifier + string, we're done.
170 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
171 return false;
172
173 // FIXME: need a wide_char_constant!
174
175 // If the string was a wide string L"foo" or wide char L'f', it would
176 // concat with the previous identifier into fooL"bar". Avoid this.
177 if (StartsWithL(Tok))
178 return true;
179
180 // Otherwise, this is a narrow character or string. If the *identifier*
181 // is a literal 'L', avoid pasting L "foo" -> L"foo".
182 return IsIdentifierL(PrevTok);
183 case tok::numeric_constant:
184 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
185 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
186 case tok::period: // ..., .*, .1234
187 return FirstChar == '.' || isdigit(FirstChar) ||
188 (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
189 case tok::amp: // &&
190 return FirstChar == '&';
191 case tok::plus: // ++
192 return FirstChar == '+';
193 case tok::minus: // --, ->, ->*
194 return FirstChar == '-' || FirstChar == '>';
195 case tok::slash: //, /*, //
196 return FirstChar == '*' || FirstChar == '/';
197 case tok::less: // <<, <<=, <:, <%
198 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
199 case tok::greater: // >>, >>=
200 return FirstChar == '>';
201 case tok::pipe: // ||
202 return FirstChar == '|';
203 case tok::percent: // %>, %:
204 return (FirstChar == '>' || FirstChar == ':') &&
205 PP.getLangOptions().Digraphs;
206 case tok::colon: // ::, :>
207 return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
208 (FirstChar == '>' && PP.getLangOptions().Digraphs);
209 case tok::hash: // ##, #@, %:%:
210 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
211 case tok::arrow: // ->*
212 return FirstChar == '*';
213 }
214}