blob: fc6db2151a3ac0aa0e062143a3096327cce34cb9 [file] [log] [blame]
Chris Lattnerd7038e12009-02-13 00:46:04 +00001//===--- TokenConcatenation.cpp - Token Concatenation Avoidance -----------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the TokenConcatenation class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Lex/TokenConcatenation.h"
15#include "clang/Lex/Preprocessor.h"
Mike Stump1eb44332009-09-09 15:08:12 +000016using namespace clang;
Chris Lattnerd7038e12009-02-13 00:46:04 +000017
18
19/// StartsWithL - Return true if the spelling of this token starts with 'L'.
20bool TokenConcatenation::StartsWithL(const Token &Tok) const {
21 if (!Tok.needsCleaning()) {
22 SourceManager &SM = PP.getSourceManager();
23 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
24 }
Mike Stump1eb44332009-09-09 15:08:12 +000025
Chris Lattnerd7038e12009-02-13 00:46:04 +000026 if (Tok.getLength() < 256) {
27 char Buffer[256];
28 const char *TokPtr = Buffer;
29 PP.getSpelling(Tok, TokPtr);
30 return TokPtr[0] == 'L';
31 }
Mike Stump1eb44332009-09-09 15:08:12 +000032
Chris Lattnerd7038e12009-02-13 00:46:04 +000033 return PP.getSpelling(Tok)[0] == 'L';
34}
35
36/// IsIdentifierL - Return true if the spelling of this token is literally
37/// 'L'.
38bool TokenConcatenation::IsIdentifierL(const Token &Tok) const {
39 if (!Tok.needsCleaning()) {
40 if (Tok.getLength() != 1)
41 return false;
42 SourceManager &SM = PP.getSourceManager();
43 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation())) == 'L';
44 }
Mike Stump1eb44332009-09-09 15:08:12 +000045
Chris Lattnerd7038e12009-02-13 00:46:04 +000046 if (Tok.getLength() < 256) {
47 char Buffer[256];
48 const char *TokPtr = Buffer;
Mike Stump1eb44332009-09-09 15:08:12 +000049 if (PP.getSpelling(Tok, TokPtr) != 1)
Chris Lattnerd7038e12009-02-13 00:46:04 +000050 return false;
51 return TokPtr[0] == 'L';
52 }
Mike Stump1eb44332009-09-09 15:08:12 +000053
Chris Lattnerd7038e12009-02-13 00:46:04 +000054 return PP.getSpelling(Tok) == "L";
55}
56
57TokenConcatenation::TokenConcatenation(Preprocessor &pp) : PP(pp) {
58 memset(TokenInfo, 0, sizeof(TokenInfo));
Mike Stump1eb44332009-09-09 15:08:12 +000059
Chris Lattnerd7038e12009-02-13 00:46:04 +000060 // These tokens have custom code in AvoidConcat.
61 TokenInfo[tok::identifier ] |= aci_custom;
62 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
63 TokenInfo[tok::period ] |= aci_custom_firstchar;
64 TokenInfo[tok::amp ] |= aci_custom_firstchar;
65 TokenInfo[tok::plus ] |= aci_custom_firstchar;
66 TokenInfo[tok::minus ] |= aci_custom_firstchar;
67 TokenInfo[tok::slash ] |= aci_custom_firstchar;
68 TokenInfo[tok::less ] |= aci_custom_firstchar;
69 TokenInfo[tok::greater ] |= aci_custom_firstchar;
70 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
71 TokenInfo[tok::percent ] |= aci_custom_firstchar;
72 TokenInfo[tok::colon ] |= aci_custom_firstchar;
73 TokenInfo[tok::hash ] |= aci_custom_firstchar;
74 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
Mike Stump1eb44332009-09-09 15:08:12 +000075
Chris Lattnerd7038e12009-02-13 00:46:04 +000076 // These tokens change behavior if followed by an '='.
77 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
78 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
79 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
80 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
81 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
82 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
83 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
84 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
85 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
86 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
87 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
Chris Lattner86851102010-03-26 17:10:02 +000088 TokenInfo[tok::greatergreater] |= aci_avoid_equal; // >>=
Chris Lattnerd7038e12009-02-13 00:46:04 +000089 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
90 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
91}
92
Daniel Dunbar99c76222009-03-18 03:32:24 +000093/// GetFirstChar - Get the first character of the token \arg Tok,
94/// avoiding calls to getSpelling where possible.
95static char GetFirstChar(Preprocessor &PP, const Token &Tok) {
96 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
97 // Avoid spelling identifiers, the most common form of token.
Daniel Dunbare013d682009-10-18 20:26:12 +000098 return II->getNameStart()[0];
Daniel Dunbar99c76222009-03-18 03:32:24 +000099 } else if (!Tok.needsCleaning()) {
100 if (Tok.isLiteral() && Tok.getLiteralData()) {
101 return *Tok.getLiteralData();
102 } else {
103 SourceManager &SM = PP.getSourceManager();
104 return *SM.getCharacterData(SM.getSpellingLoc(Tok.getLocation()));
105 }
106 } else if (Tok.getLength() < 256) {
107 char Buffer[256];
108 const char *TokPtr = Buffer;
109 PP.getSpelling(Tok, TokPtr);
110 return TokPtr[0];
111 } else {
112 return PP.getSpelling(Tok)[0];
113 }
114}
115
Chris Lattnerd7038e12009-02-13 00:46:04 +0000116/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
117/// the two individual tokens to be lexed as a single token, return true
118/// (which causes a space to be printed between them). This allows the output
119/// of -E mode to be lexed to the same token stream as lexing the input
120/// directly would.
121///
122/// This code must conservatively return true if it doesn't want to be 100%
123/// accurate. This will cause the output to include extra space characters,
124/// but the resulting output won't have incorrect concatenations going on.
125/// Examples include "..", which we print with a space between, because we
126/// don't want to track enough to tell "x.." from "...".
Chris Lattner88773212010-04-14 03:57:19 +0000127bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
128 const Token &PrevTok,
Chris Lattnerd7038e12009-02-13 00:46:04 +0000129 const Token &Tok) const {
Chris Lattnere1614bb2009-04-21 23:28:41 +0000130 // First, check to see if the tokens were directly adjacent in the original
131 // source. If they were, it must be okay to stick them together: if there
132 // were an issue, the tokens would have been lexed differently.
133 if (PrevTok.getLocation().isFileID() && Tok.getLocation().isFileID() &&
Mike Stump1eb44332009-09-09 15:08:12 +0000134 PrevTok.getLocation().getFileLocWithOffset(PrevTok.getLength()) ==
Chris Lattnere1614bb2009-04-21 23:28:41 +0000135 Tok.getLocation())
136 return false;
Mike Stump1eb44332009-09-09 15:08:12 +0000137
Chris Lattnerd7038e12009-02-13 00:46:04 +0000138 tok::TokenKind PrevKind = PrevTok.getKind();
139 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
140 PrevKind = tok::identifier;
Mike Stump1eb44332009-09-09 15:08:12 +0000141
Chris Lattnerd7038e12009-02-13 00:46:04 +0000142 // Look up information on when we should avoid concatenation with prevtok.
143 unsigned ConcatInfo = TokenInfo[PrevKind];
Mike Stump1eb44332009-09-09 15:08:12 +0000144
Chris Lattnerd7038e12009-02-13 00:46:04 +0000145 // If prevtok never causes a problem for anything after it, return quickly.
146 if (ConcatInfo == 0) return false;
Mike Stump1eb44332009-09-09 15:08:12 +0000147
Chris Lattnerd7038e12009-02-13 00:46:04 +0000148 if (ConcatInfo & aci_avoid_equal) {
149 // If the next token is '=' or '==', avoid concatenation.
150 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
151 return true;
152 ConcatInfo &= ~aci_avoid_equal;
153 }
Mike Stump1eb44332009-09-09 15:08:12 +0000154
Chris Lattnerd7038e12009-02-13 00:46:04 +0000155 if (ConcatInfo == 0) return false;
Mike Stump1eb44332009-09-09 15:08:12 +0000156
Chris Lattnerd7038e12009-02-13 00:46:04 +0000157 // Basic algorithm: we look at the first character of the second token, and
158 // determine whether it, if appended to the first token, would form (or
159 // would contribute) to a larger token if concatenated.
160 char FirstChar = 0;
161 if (ConcatInfo & aci_custom) {
162 // If the token does not need to know the first character, don't get it.
Chris Lattnerd7038e12009-02-13 00:46:04 +0000163 } else {
Daniel Dunbar99c76222009-03-18 03:32:24 +0000164 FirstChar = GetFirstChar(PP, Tok);
Chris Lattnerd7038e12009-02-13 00:46:04 +0000165 }
Mike Stump1eb44332009-09-09 15:08:12 +0000166
Chris Lattnerd7038e12009-02-13 00:46:04 +0000167 switch (PrevKind) {
168 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Mike Stump1eb44332009-09-09 15:08:12 +0000169 case tok::identifier: // id+id or id+number or id+L"foo".
Daniel Dunbar99c76222009-03-18 03:32:24 +0000170 // id+'.'... will not append.
171 if (Tok.is(tok::numeric_constant))
172 return GetFirstChar(PP, Tok) != '.';
173
174 if (Tok.getIdentifierInfo() || Tok.is(tok::wide_string_literal) /* ||
Chris Lattnerd7038e12009-02-13 00:46:04 +0000175 Tok.is(tok::wide_char_literal)*/)
176 return true;
Mike Stump1eb44332009-09-09 15:08:12 +0000177
Chris Lattnerd7038e12009-02-13 00:46:04 +0000178 // If this isn't identifier + string, we're done.
179 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
180 return false;
Mike Stump1eb44332009-09-09 15:08:12 +0000181
Chris Lattnerd7038e12009-02-13 00:46:04 +0000182 // FIXME: need a wide_char_constant!
Mike Stump1eb44332009-09-09 15:08:12 +0000183
Chris Lattnerd7038e12009-02-13 00:46:04 +0000184 // If the string was a wide string L"foo" or wide char L'f', it would
185 // concat with the previous identifier into fooL"bar". Avoid this.
186 if (StartsWithL(Tok))
187 return true;
Mike Stump1eb44332009-09-09 15:08:12 +0000188
Chris Lattnerd7038e12009-02-13 00:46:04 +0000189 // Otherwise, this is a narrow character or string. If the *identifier*
190 // is a literal 'L', avoid pasting L "foo" -> L"foo".
191 return IsIdentifierL(PrevTok);
192 case tok::numeric_constant:
193 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
194 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
195 case tok::period: // ..., .*, .1234
Chris Lattner88773212010-04-14 03:57:19 +0000196 return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
197 isdigit(FirstChar) ||
Eli Friedman8849f112009-06-15 19:48:50 +0000198 (PP.getLangOptions().CPlusPlus && FirstChar == '*');
Chris Lattnerd7038e12009-02-13 00:46:04 +0000199 case tok::amp: // &&
200 return FirstChar == '&';
201 case tok::plus: // ++
202 return FirstChar == '+';
203 case tok::minus: // --, ->, ->*
204 return FirstChar == '-' || FirstChar == '>';
205 case tok::slash: //, /*, //
206 return FirstChar == '*' || FirstChar == '/';
207 case tok::less: // <<, <<=, <:, <%
208 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
209 case tok::greater: // >>, >>=
210 return FirstChar == '>';
211 case tok::pipe: // ||
212 return FirstChar == '|';
213 case tok::percent: // %>, %:
Eli Friedman896ccf82009-05-27 22:33:06 +0000214 return FirstChar == '>' || FirstChar == ':';
Chris Lattnerd7038e12009-02-13 00:46:04 +0000215 case tok::colon: // ::, :>
Eli Friedman8849f112009-06-15 19:48:50 +0000216 return FirstChar == '>' ||
217 (PP.getLangOptions().CPlusPlus && FirstChar == ':');
Chris Lattnerd7038e12009-02-13 00:46:04 +0000218 case tok::hash: // ##, #@, %:%:
219 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
220 case tok::arrow: // ->*
Eli Friedman8849f112009-06-15 19:48:50 +0000221 return PP.getLangOptions().CPlusPlus && FirstChar == '*';
Chris Lattnerd7038e12009-02-13 00:46:04 +0000222 }
223}