blob: 370adc72c44acd8db9f91328ab558136af12a8a8 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner4b009652007-07-25 00:24:17 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner6619f662008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattner4b009652007-07-25 00:24:17 +000021#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringExtras.h"
Chris Lattner6619f662008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Chris Lattner4b009652007-07-25 00:24:17 +000025#include "llvm/Config/config.h"
Chris Lattner93b4f302008-08-17 01:47:12 +000026#include "llvm/Support/raw_ostream.h"
Chris Lattner4b009652007-07-25 00:24:17 +000027#include <cstdio>
28using namespace clang;
29
Chris Lattner4b009652007-07-25 00:24:17 +000030//===----------------------------------------------------------------------===//
31// Preprocessed token printer
32//===----------------------------------------------------------------------===//
33
34static llvm::cl::opt<bool>
35DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
36static llvm::cl::opt<bool>
37EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
38static llvm::cl::opt<bool>
39EnableMacroCommentOutput("CC",
40 llvm::cl::desc("Enable comment output in -E mode, "
41 "even from macro expansions"));
42
43namespace {
44class PrintPPOutputPPCallbacks : public PPCallbacks {
45 Preprocessor &PP;
Chris Lattner21494222008-08-17 03:12:02 +000046public:
47 llvm::raw_ostream &OS;
48private:
Chris Lattner4b009652007-07-25 00:24:17 +000049 unsigned CurLine;
50 bool EmittedTokensOnThisLine;
Chris Lattner7a4864e2008-10-27 01:19:25 +000051 SrcMgr::CharacteristicKind FileType;
Chris Lattner4b009652007-07-25 00:24:17 +000052 llvm::SmallString<512> CurFilename;
Daniel Dunbare0d59462008-09-05 03:22:57 +000053 bool Initialized;
Chris Lattner4b009652007-07-25 00:24:17 +000054public:
Chris Lattner21494222008-08-17 03:12:02 +000055 PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os)
56 : PP(pp), OS(os) {
Chris Lattner4b009652007-07-25 00:24:17 +000057 CurLine = 0;
58 CurFilename += "<uninit>";
59 EmittedTokensOnThisLine = false;
Chris Lattner6f044062008-09-26 21:18:42 +000060 FileType = SrcMgr::C_User;
Daniel Dunbare0d59462008-09-05 03:22:57 +000061 Initialized = false;
Chris Lattner4b009652007-07-25 00:24:17 +000062 }
63
64 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
65 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
66
67 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
Chris Lattner7a4864e2008-10-27 01:19:25 +000068 SrcMgr::CharacteristicKind FileType);
Chris Lattner4b009652007-07-25 00:24:17 +000069 virtual void Ident(SourceLocation Loc, const std::string &str);
Chris Lattner183b8392009-01-16 19:25:54 +000070 virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
71 const std::string &Str);
72
Chris Lattner4b009652007-07-25 00:24:17 +000073
Chris Lattner6c451292007-12-09 21:11:08 +000074 bool HandleFirstTokOnLine(Token &Tok);
75 bool MoveToLine(SourceLocation Loc);
Chris Lattner4b009652007-07-25 00:24:17 +000076 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Daniel Dunbare0d59462008-09-05 03:22:57 +000077 void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
Chris Lattner4b009652007-07-25 00:24:17 +000078};
Chris Lattner6619f662008-04-08 04:16:20 +000079} // end anonymous namespace
Chris Lattner4b009652007-07-25 00:24:17 +000080
Daniel Dunbare0d59462008-09-05 03:22:57 +000081void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
82 const char *Extra,
83 unsigned ExtraLen) {
84 if (EmittedTokensOnThisLine) {
85 OS << '\n';
86 EmittedTokensOnThisLine = false;
87 }
88
89 OS << '#' << ' ' << LineNo << ' ' << '"';
90 OS.write(&CurFilename[0], CurFilename.size());
91 OS << '"';
92
93 if (ExtraLen)
94 OS.write(Extra, ExtraLen);
95
Chris Lattner6f044062008-09-26 21:18:42 +000096 if (FileType == SrcMgr::C_System)
Daniel Dunbare0d59462008-09-05 03:22:57 +000097 OS.write(" 3", 2);
Chris Lattner6f044062008-09-26 21:18:42 +000098 else if (FileType == SrcMgr::C_ExternCSystem)
Daniel Dunbare0d59462008-09-05 03:22:57 +000099 OS.write(" 3 4", 4);
100 OS << '\n';
101}
102
Chris Lattner4b009652007-07-25 00:24:17 +0000103/// MoveToLine - Move the output to the source line specified by the location
104/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner6c451292007-12-09 21:11:08 +0000105/// #line directive. This returns false if already at the specified line, true
106/// if some newlines were emitted.
107bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattner18c8dc02009-01-16 07:36:28 +0000108 unsigned LineNo = PP.getSourceManager().getInstantiationLineNumber(Loc);
Daniel Dunbare0d59462008-09-05 03:22:57 +0000109
Chris Lattner4b009652007-07-25 00:24:17 +0000110 if (DisableLineMarkers) {
Chris Lattner6c451292007-12-09 21:11:08 +0000111 if (LineNo == CurLine) return false;
112
113 CurLine = LineNo;
114
115 if (!EmittedTokensOnThisLine)
116 return true;
117
Chris Lattner21494222008-08-17 03:12:02 +0000118 OS << '\n';
Chris Lattner6c451292007-12-09 21:11:08 +0000119 EmittedTokensOnThisLine = false;
120 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000121 }
Daniel Dunbare0d59462008-09-05 03:22:57 +0000122
Chris Lattner4b009652007-07-25 00:24:17 +0000123 // If this line is "close enough" to the original line, just print newlines,
124 // otherwise print a #line directive.
Daniel Dunbar6fa81e72008-09-26 01:13:35 +0000125 if (LineNo-CurLine <= 8) {
Chris Lattner4b009652007-07-25 00:24:17 +0000126 if (LineNo-CurLine == 1)
Chris Lattner21494222008-08-17 03:12:02 +0000127 OS << '\n';
Chris Lattner6c451292007-12-09 21:11:08 +0000128 else if (LineNo == CurLine)
Chris Lattner18c8dc02009-01-16 07:36:28 +0000129 return false; // Spelling line moved, but instantiation line didn't.
Chris Lattner4b009652007-07-25 00:24:17 +0000130 else {
131 const char *NewLines = "\n\n\n\n\n\n\n\n";
Chris Lattner21494222008-08-17 03:12:02 +0000132 OS.write(NewLines, LineNo-CurLine);
Chris Lattner4b009652007-07-25 00:24:17 +0000133 }
134 } else {
Daniel Dunbare0d59462008-09-05 03:22:57 +0000135 WriteLineInfo(LineNo, 0, 0);
Chris Lattner4b009652007-07-25 00:24:17 +0000136 }
Daniel Dunbare0d59462008-09-05 03:22:57 +0000137
138 CurLine = LineNo;
Chris Lattner6c451292007-12-09 21:11:08 +0000139 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000140}
141
142
143/// FileChanged - Whenever the preprocessor enters or exits a #include file
144/// it invokes this handler. Update our conception of the current source
145/// position.
146void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
147 FileChangeReason Reason,
Chris Lattner7a4864e2008-10-27 01:19:25 +0000148 SrcMgr::CharacteristicKind NewFileType) {
Chris Lattner4b009652007-07-25 00:24:17 +0000149 // Unless we are exiting a #include, make sure to skip ahead to the line the
150 // #include directive was at.
151 SourceManager &SourceMgr = PP.getSourceManager();
152 if (Reason == PPCallbacks::EnterFile) {
Chris Lattner092fc402009-01-30 18:44:17 +0000153 SourceLocation IncludeLoc = SourceMgr.getPresumedLoc(Loc).getIncludeLoc();
154 if (IncludeLoc.isValid())
155 MoveToLine(IncludeLoc);
Chris Lattner4b009652007-07-25 00:24:17 +0000156 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
157 MoveToLine(Loc);
158
159 // TODO GCC emits the # directive for this directive on the line AFTER the
160 // directive and emits a bunch of spaces that aren't needed. Emulate this
161 // strange behavior.
162 }
163
Chris Lattner18c8dc02009-01-16 07:36:28 +0000164 Loc = SourceMgr.getInstantiationLoc(Loc);
Chris Lattner2d89c562009-02-04 01:06:56 +0000165 CurLine = SourceMgr.getInstantiationLineNumber(Loc);
Daniel Dunbare0d59462008-09-05 03:22:57 +0000166
Chris Lattner6c451292007-12-09 21:11:08 +0000167 if (DisableLineMarkers) return;
168
Chris Lattner4b009652007-07-25 00:24:17 +0000169 CurFilename.clear();
Chris Lattner836774b2009-01-27 07:57:44 +0000170 CurFilename += SourceMgr.getPresumedLoc(Loc).getFilename();
Chris Lattner4b009652007-07-25 00:24:17 +0000171 Lexer::Stringify(CurFilename);
Daniel Dunbare0d59462008-09-05 03:22:57 +0000172 FileType = NewFileType;
173
174 if (!Initialized) {
175 WriteLineInfo(CurLine);
176 Initialized = true;
Chris Lattner4b009652007-07-25 00:24:17 +0000177 }
Daniel Dunbare0d59462008-09-05 03:22:57 +0000178
Chris Lattner4b009652007-07-25 00:24:17 +0000179 switch (Reason) {
180 case PPCallbacks::EnterFile:
Daniel Dunbare0d59462008-09-05 03:22:57 +0000181 WriteLineInfo(CurLine, " 1", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000182 break;
183 case PPCallbacks::ExitFile:
Daniel Dunbare0d59462008-09-05 03:22:57 +0000184 WriteLineInfo(CurLine, " 2", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000185 break;
Daniel Dunbare0d59462008-09-05 03:22:57 +0000186 case PPCallbacks::SystemHeaderPragma:
187 case PPCallbacks::RenameFile:
188 WriteLineInfo(CurLine);
189 break;
Chris Lattner4b009652007-07-25 00:24:17 +0000190 }
Chris Lattner4b009652007-07-25 00:24:17 +0000191}
192
Chris Lattner183b8392009-01-16 19:25:54 +0000193/// Ident - Handle #ident directives when read by the preprocessor.
Chris Lattner4b009652007-07-25 00:24:17 +0000194///
195void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
196 MoveToLine(Loc);
197
Chris Lattner21494222008-08-17 03:12:02 +0000198 OS.write("#ident ", strlen("#ident "));
199 OS.write(&S[0], S.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000200 EmittedTokensOnThisLine = true;
201}
202
Chris Lattner183b8392009-01-16 19:25:54 +0000203void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
204 const IdentifierInfo *Kind,
205 const std::string &Str) {
206 MoveToLine(Loc);
207 OS << "#pragma comment(" << Kind->getName();
208
209 if (!Str.empty()) {
210 OS << ", \"";
211
212 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
213 unsigned char Char = Str[i];
Chris Lattnere1a7e712009-01-16 22:13:37 +0000214 if (isprint(Char) && Char != '\\' && Char != '"')
Chris Lattner183b8392009-01-16 19:25:54 +0000215 OS << (char)Char;
216 else // Output anything hard as an octal escape.
217 OS << '\\'
218 << (char)('0'+ ((Char >> 6) & 7))
219 << (char)('0'+ ((Char >> 3) & 7))
220 << (char)('0'+ ((Char >> 0) & 7));
221 }
222 OS << '"';
223 }
224
225 OS << ')';
226 EmittedTokensOnThisLine = true;
227}
228
229
Chris Lattner4b009652007-07-25 00:24:17 +0000230/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner6c451292007-12-09 21:11:08 +0000231/// is called for the first token on each new line. If this really is the start
232/// of a new logical line, handle it and return true, otherwise return false.
233/// This may not be the start of a logical line because the "start of line"
Chris Lattner18c8dc02009-01-16 07:36:28 +0000234/// marker is set for spelling lines, not instantiation ones.
Chris Lattner6c451292007-12-09 21:11:08 +0000235bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Chris Lattner4b009652007-07-25 00:24:17 +0000236 // Figure out what line we went to and insert the appropriate number of
237 // newline characters.
Chris Lattner6c451292007-12-09 21:11:08 +0000238 if (!MoveToLine(Tok.getLocation()))
239 return false;
Chris Lattner4b009652007-07-25 00:24:17 +0000240
241 // Print out space characters so that the first token on a line is
242 // indented for easy reading.
243 const SourceManager &SourceMgr = PP.getSourceManager();
Chris Lattner18c8dc02009-01-16 07:36:28 +0000244 unsigned ColNo = SourceMgr.getInstantiationColumnNumber(Tok.getLocation());
Chris Lattner4b009652007-07-25 00:24:17 +0000245
246 // This hack prevents stuff like:
247 // #define HASH #
248 // HASH define foo bar
249 // From having the # character end up at column 1, which makes it so it
250 // is not handled as a #define next time through the preprocessor if in
251 // -fpreprocessed mode.
Chris Lattner3b494152007-10-09 18:03:42 +0000252 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattner21494222008-08-17 03:12:02 +0000253 OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000254
255 // Otherwise, indent the appropriate number of spaces.
256 for (; ColNo > 1; --ColNo)
Chris Lattner21494222008-08-17 03:12:02 +0000257 OS << ' ';
Chris Lattner6c451292007-12-09 21:11:08 +0000258
259 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000260}
261
262namespace {
263struct UnknownPragmaHandler : public PragmaHandler {
264 const char *Prefix;
265 PrintPPOutputPPCallbacks *Callbacks;
266
267 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
268 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
269 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
270 // Figure out what line we went to and insert the appropriate number of
271 // newline characters.
272 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattner21494222008-08-17 03:12:02 +0000273 Callbacks->OS.write(Prefix, strlen(Prefix));
Chris Lattner4b009652007-07-25 00:24:17 +0000274
275 // Read and print all of the pragma tokens.
Chris Lattner3b494152007-10-09 18:03:42 +0000276 while (PragmaTok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000277 if (PragmaTok.hasLeadingSpace())
Chris Lattner21494222008-08-17 03:12:02 +0000278 Callbacks->OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000279 std::string TokSpell = PP.getSpelling(PragmaTok);
Chris Lattner21494222008-08-17 03:12:02 +0000280 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000281 PP.LexUnexpandedToken(PragmaTok);
282 }
Chris Lattner21494222008-08-17 03:12:02 +0000283 Callbacks->OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000284 }
285};
286} // end anonymous namespace
287
288
289enum AvoidConcatInfo {
290 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
291 /// ',', ')', etc) don't cause a problem when concatenated.
292 aci_never_avoid_concat = 0,
293
294 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
295 /// token's requirements, and it needs to know the first character of the
296 /// token.
297 aci_custom_firstchar = 1,
298
299 /// aci_custom - AvoidConcat contains custom code to handle this token's
300 /// requirements, but it doesn't need to know the first character of the
301 /// token.
302 aci_custom = 2,
303
304 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
305 /// character. For example, "<<" turns into "<<=" when followed by an =.
306 aci_avoid_equal = 4
307};
308
309/// This array contains information for each token on what action to take when
310/// avoiding concatenation of tokens in the AvoidConcat method.
311static char TokenInfo[tok::NUM_TOKENS];
312
313/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
314/// marked by this function.
315static void InitAvoidConcatTokenInfo() {
316 // These tokens have custom code in AvoidConcat.
317 TokenInfo[tok::identifier ] |= aci_custom;
318 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
319 TokenInfo[tok::period ] |= aci_custom_firstchar;
320 TokenInfo[tok::amp ] |= aci_custom_firstchar;
321 TokenInfo[tok::plus ] |= aci_custom_firstchar;
322 TokenInfo[tok::minus ] |= aci_custom_firstchar;
323 TokenInfo[tok::slash ] |= aci_custom_firstchar;
324 TokenInfo[tok::less ] |= aci_custom_firstchar;
325 TokenInfo[tok::greater ] |= aci_custom_firstchar;
326 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
327 TokenInfo[tok::percent ] |= aci_custom_firstchar;
328 TokenInfo[tok::colon ] |= aci_custom_firstchar;
329 TokenInfo[tok::hash ] |= aci_custom_firstchar;
330 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
331
332 // These tokens change behavior if followed by an '='.
333 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
334 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
335 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
336 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
337 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
338 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
339 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
340 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
341 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
342 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
343 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
344 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
345 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
346 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
347}
348
Chris Lattnerafa40122008-01-15 05:22:14 +0000349/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner400f0242008-01-15 05:14:19 +0000350static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner400f0242008-01-15 05:14:19 +0000351 if (!Tok.needsCleaning()) {
352 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnercdf600e2009-01-16 07:00:02 +0000353 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattner400f0242008-01-15 05:14:19 +0000354 == 'L';
355 }
356
357 if (Tok.getLength() < 256) {
Chris Lattnerafa40122008-01-15 05:22:14 +0000358 char Buffer[256];
Chris Lattner400f0242008-01-15 05:14:19 +0000359 const char *TokPtr = Buffer;
360 PP.getSpelling(Tok, TokPtr);
361 return TokPtr[0] == 'L';
362 }
363
364 return PP.getSpelling(Tok)[0] == 'L';
365}
366
Chris Lattnerafa40122008-01-15 05:22:14 +0000367/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
368static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
369 if (!Tok.needsCleaning()) {
370 if (Tok.getLength() != 1)
371 return false;
372 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnercdf600e2009-01-16 07:00:02 +0000373 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattnerafa40122008-01-15 05:22:14 +0000374 == 'L';
375 }
376
377 if (Tok.getLength() < 256) {
378 char Buffer[256];
379 const char *TokPtr = Buffer;
380 if (PP.getSpelling(Tok, TokPtr) != 1)
381 return false;
382 return TokPtr[0] == 'L';
383 }
384
385 return PP.getSpelling(Tok) == "L";
386}
387
388
Chris Lattner4b009652007-07-25 00:24:17 +0000389/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
390/// the two individual tokens to be lexed as a single token, return true (which
391/// causes a space to be printed between them). This allows the output of -E
392/// mode to be lexed to the same token stream as lexing the input directly
393/// would.
394///
395/// This code must conservatively return true if it doesn't want to be 100%
396/// accurate. This will cause the output to include extra space characters, but
397/// the resulting output won't have incorrect concatenations going on. Examples
398/// include "..", which we print with a space between, because we don't want to
399/// track enough to tell "x.." from "...".
400bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
401 const Token &Tok) {
402 char Buffer[256];
403
404 tok::TokenKind PrevKind = PrevTok.getKind();
405 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
406 PrevKind = tok::identifier;
407
408 // Look up information on when we should avoid concatenation with prevtok.
409 unsigned ConcatInfo = TokenInfo[PrevKind];
410
411 // If prevtok never causes a problem for anything after it, return quickly.
412 if (ConcatInfo == 0) return false;
413
414 if (ConcatInfo & aci_avoid_equal) {
415 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner3b494152007-10-09 18:03:42 +0000416 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattner4b009652007-07-25 00:24:17 +0000417 return true;
418 ConcatInfo &= ~aci_avoid_equal;
419 }
420
421 if (ConcatInfo == 0) return false;
422
423
424
425 // Basic algorithm: we look at the first character of the second token, and
426 // determine whether it, if appended to the first token, would form (or would
427 // contribute) to a larger token if concatenated.
428 char FirstChar = 0;
429 if (ConcatInfo & aci_custom) {
430 // If the token does not need to know the first character, don't get it.
431 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
432 // Avoid spelling identifiers, the most common form of token.
433 FirstChar = II->getName()[0];
434 } else if (!Tok.needsCleaning()) {
Chris Lattnerdb8f9572009-01-26 19:33:54 +0000435 if (Tok.isLiteral() && Tok.getLiteralData()) {
436 FirstChar = *Tok.getLiteralData();
437 } else {
438 SourceManager &SrcMgr = PP.getSourceManager();
439 FirstChar =
440 *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
441 }
Chris Lattner4b009652007-07-25 00:24:17 +0000442 } else if (Tok.getLength() < 256) {
443 const char *TokPtr = Buffer;
444 PP.getSpelling(Tok, TokPtr);
445 FirstChar = TokPtr[0];
446 } else {
447 FirstChar = PP.getSpelling(Tok)[0];
448 }
449
450 switch (PrevKind) {
451 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
452 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner3b494152007-10-09 18:03:42 +0000453 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
454 Tok.is(tok::wide_string_literal) /* ||
455 Tok.is(tok::wide_char_literal)*/)
Chris Lattner4b009652007-07-25 00:24:17 +0000456 return true;
Chris Lattner400f0242008-01-15 05:14:19 +0000457
458 // If this isn't identifier + string, we're done.
459 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner4b009652007-07-25 00:24:17 +0000460 return false;
461
462 // FIXME: need a wide_char_constant!
Chris Lattner400f0242008-01-15 05:14:19 +0000463
464 // If the string was a wide string L"foo" or wide char L'f', it would concat
465 // with the previous identifier into fooL"bar". Avoid this.
466 if (StartsWithL(Tok, PP))
467 return true;
468
Chris Lattnerafa40122008-01-15 05:22:14 +0000469 // Otherwise, this is a narrow character or string. If the *identifier* is
470 // a literal 'L', avoid pasting L "foo" -> L"foo".
471 return IsIdentifierL(PrevTok, PP);
Chris Lattner4b009652007-07-25 00:24:17 +0000472 case tok::numeric_constant:
Chris Lattner3b494152007-10-09 18:03:42 +0000473 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Chris Lattner4b009652007-07-25 00:24:17 +0000474 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
475 case tok::period: // ..., .*, .1234
Chris Lattner6a316812009-01-11 19:48:19 +0000476 return FirstChar == '.' || isdigit(FirstChar) ||
477 (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
Chris Lattner4b009652007-07-25 00:24:17 +0000478 case tok::amp: // &&
479 return FirstChar == '&';
480 case tok::plus: // ++
481 return FirstChar == '+';
482 case tok::minus: // --, ->, ->*
483 return FirstChar == '-' || FirstChar == '>';
484 case tok::slash: //, /*, //
485 return FirstChar == '*' || FirstChar == '/';
486 case tok::less: // <<, <<=, <:, <%
487 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
488 case tok::greater: // >>, >>=
489 return FirstChar == '>';
490 case tok::pipe: // ||
491 return FirstChar == '|';
492 case tok::percent: // %>, %:
Chris Lattner6a316812009-01-11 19:48:19 +0000493 return (FirstChar == '>' || FirstChar == ':') &&
494 PP.getLangOptions().Digraphs;
Chris Lattner4b009652007-07-25 00:24:17 +0000495 case tok::colon: // ::, :>
Chris Lattner6a316812009-01-11 19:48:19 +0000496 return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
497 (FirstChar == '>' && PP.getLangOptions().Digraphs);
Chris Lattner4b009652007-07-25 00:24:17 +0000498 case tok::hash: // ##, #@, %:%:
499 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
500 case tok::arrow: // ->*
501 return FirstChar == '*';
502 }
503}
504
505/// DoPrintPreprocessedInput - This implements -E mode.
506///
Chris Lattner6619f662008-04-08 04:16:20 +0000507void clang::DoPrintPreprocessedInput(Preprocessor &PP,
508 const std::string &OutFile) {
Chris Lattner4b009652007-07-25 00:24:17 +0000509 // Inform the preprocessor whether we want it to retain comments or not, due
510 // to -C or -CC.
511 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
Chris Lattner4b009652007-07-25 00:24:17 +0000512 InitAvoidConcatTokenInfo();
Chris Lattner21494222008-08-17 03:12:02 +0000513
514
515 // Open the output buffer.
Chris Lattner1be96902008-08-17 03:54:39 +0000516 std::string Err;
Daniel Dunbar8fc9ba62008-11-13 05:09:21 +0000517 llvm::raw_fd_ostream OS(OutFile.empty() ? "-" : OutFile.c_str(), false, Err);
Chris Lattner1be96902008-08-17 03:54:39 +0000518 if (!Err.empty()) {
519 fprintf(stderr, "%s\n", Err.c_str());
520 exit(1);
Chris Lattner21494222008-08-17 03:12:02 +0000521 }
Chris Lattner21494222008-08-17 03:12:02 +0000522
Chris Lattner1be96902008-08-17 03:54:39 +0000523 OS.SetBufferSize(64*1024);
524
Chris Lattner4b009652007-07-25 00:24:17 +0000525
526 Token Tok, PrevTok;
527 char Buffer[256];
Chris Lattner21494222008-08-17 03:12:02 +0000528 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP, OS);
Chris Lattner4b009652007-07-25 00:24:17 +0000529 PP.setPPCallbacks(Callbacks);
530
531 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
532 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
533
534 // After we have configured the preprocessor, enter the main file.
535
536 // Start parsing the specified input file.
Ted Kremenek17861c52007-12-19 22:51:13 +0000537 PP.EnterMainSourceFile();
Chris Lattner3eddc862007-10-10 20:45:16 +0000538
539 // Consume all of the tokens that come from the predefines buffer. Those
540 // should not be emitted into the output and are guaranteed to be at the
541 // start.
542 const SourceManager &SourceMgr = PP.getSourceManager();
543 do PP.Lex(Tok);
Chris Lattner890c5932007-10-10 23:31:03 +0000544 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner836774b2009-01-27 07:57:44 +0000545 !strcmp(SourceMgr.getPresumedLoc(Tok.getLocation()).getFilename(),
546 "<predefines>"));
Chris Lattner3eddc862007-10-10 20:45:16 +0000547
548 while (1) {
Chris Lattner4b009652007-07-25 00:24:17 +0000549
550 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner6c451292007-12-09 21:11:08 +0000551 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
552 // done.
Chris Lattner4b009652007-07-25 00:24:17 +0000553 } else if (Tok.hasLeadingSpace() ||
554 // If we haven't emitted a token on this line yet, PrevTok isn't
555 // useful to look at and no concatenation could happen anyway.
556 (Callbacks->hasEmittedTokensOnThisLine() &&
557 // Don't print "-" next to "-", it would form "--".
558 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattner21494222008-08-17 03:12:02 +0000559 OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000560 }
561
562 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Chris Lattnerdb8f9572009-01-26 19:33:54 +0000563 OS.write(II->getName(), II->getLength());
564 } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
565 Tok.getLiteralData()) {
566 OS.write(Tok.getLiteralData(), Tok.getLength());
Chris Lattner4b009652007-07-25 00:24:17 +0000567 } else if (Tok.getLength() < 256) {
568 const char *TokPtr = Buffer;
569 unsigned Len = PP.getSpelling(Tok, TokPtr);
Chris Lattner21494222008-08-17 03:12:02 +0000570 OS.write(TokPtr, Len);
Chris Lattner4b009652007-07-25 00:24:17 +0000571 } else {
572 std::string S = PP.getSpelling(Tok);
Chris Lattner21494222008-08-17 03:12:02 +0000573 OS.write(&S[0], S.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000574 }
575 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner3eddc862007-10-10 20:45:16 +0000576
577 if (Tok.is(tok::eof)) break;
578
579 PrevTok = Tok;
580 PP.Lex(Tok);
581 }
Chris Lattner21494222008-08-17 03:12:02 +0000582 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000583
Chris Lattnercb7f1802008-08-17 07:07:01 +0000584 // Flush the ostream.
585 OS.flush();
Chris Lattner21494222008-08-17 03:12:02 +0000586
587 // If an error occurred, remove the output file.
588 if (PP.getDiagnostics().hasErrorOccurred() && !OutFile.empty())
589 llvm::sys::Path(OutFile).eraseFromDisk();
Chris Lattner4b009652007-07-25 00:24:17 +0000590}
591