blob: 64cc4c8be65bdcbaba8f39ac99f23cf8f69334c8 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner4b009652007-07-25 00:24:17 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner6619f662008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattner4b009652007-07-25 00:24:17 +000021#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringExtras.h"
Chris Lattner6619f662008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Chris Lattner4b009652007-07-25 00:24:17 +000025#include "llvm/Config/config.h"
Chris Lattner93b4f302008-08-17 01:47:12 +000026#include "llvm/Support/raw_ostream.h"
Chris Lattner4b009652007-07-25 00:24:17 +000027#include <cstdio>
28using namespace clang;
29
Chris Lattner4b009652007-07-25 00:24:17 +000030//===----------------------------------------------------------------------===//
31// Preprocessed token printer
32//===----------------------------------------------------------------------===//
33
34static llvm::cl::opt<bool>
35DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
36static llvm::cl::opt<bool>
37EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
38static llvm::cl::opt<bool>
39EnableMacroCommentOutput("CC",
40 llvm::cl::desc("Enable comment output in -E mode, "
41 "even from macro expansions"));
42
43namespace {
44class PrintPPOutputPPCallbacks : public PPCallbacks {
45 Preprocessor &PP;
Chris Lattner21494222008-08-17 03:12:02 +000046public:
47 llvm::raw_ostream &OS;
48private:
Chris Lattner4b009652007-07-25 00:24:17 +000049 unsigned CurLine;
50 bool EmittedTokensOnThisLine;
Chris Lattner7a4864e2008-10-27 01:19:25 +000051 SrcMgr::CharacteristicKind FileType;
Chris Lattner4b009652007-07-25 00:24:17 +000052 llvm::SmallString<512> CurFilename;
Daniel Dunbare0d59462008-09-05 03:22:57 +000053 bool Initialized;
Chris Lattner4b009652007-07-25 00:24:17 +000054public:
Chris Lattner21494222008-08-17 03:12:02 +000055 PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os)
56 : PP(pp), OS(os) {
Chris Lattner4b009652007-07-25 00:24:17 +000057 CurLine = 0;
58 CurFilename += "<uninit>";
59 EmittedTokensOnThisLine = false;
Chris Lattner6f044062008-09-26 21:18:42 +000060 FileType = SrcMgr::C_User;
Daniel Dunbare0d59462008-09-05 03:22:57 +000061 Initialized = false;
Chris Lattner4b009652007-07-25 00:24:17 +000062 }
63
64 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
65 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
66
67 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
Chris Lattner7a4864e2008-10-27 01:19:25 +000068 SrcMgr::CharacteristicKind FileType);
Chris Lattner4b009652007-07-25 00:24:17 +000069 virtual void Ident(SourceLocation Loc, const std::string &str);
Chris Lattner183b8392009-01-16 19:25:54 +000070 virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
71 const std::string &Str);
72
Chris Lattner4b009652007-07-25 00:24:17 +000073
Chris Lattner6c451292007-12-09 21:11:08 +000074 bool HandleFirstTokOnLine(Token &Tok);
75 bool MoveToLine(SourceLocation Loc);
Chris Lattner4b009652007-07-25 00:24:17 +000076 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Daniel Dunbare0d59462008-09-05 03:22:57 +000077 void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
Chris Lattner4b009652007-07-25 00:24:17 +000078};
Chris Lattner6619f662008-04-08 04:16:20 +000079} // end anonymous namespace
Chris Lattner4b009652007-07-25 00:24:17 +000080
Daniel Dunbare0d59462008-09-05 03:22:57 +000081void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
82 const char *Extra,
83 unsigned ExtraLen) {
84 if (EmittedTokensOnThisLine) {
85 OS << '\n';
86 EmittedTokensOnThisLine = false;
87 }
88
89 OS << '#' << ' ' << LineNo << ' ' << '"';
90 OS.write(&CurFilename[0], CurFilename.size());
91 OS << '"';
92
93 if (ExtraLen)
94 OS.write(Extra, ExtraLen);
95
Chris Lattner6f044062008-09-26 21:18:42 +000096 if (FileType == SrcMgr::C_System)
Daniel Dunbare0d59462008-09-05 03:22:57 +000097 OS.write(" 3", 2);
Chris Lattner6f044062008-09-26 21:18:42 +000098 else if (FileType == SrcMgr::C_ExternCSystem)
Daniel Dunbare0d59462008-09-05 03:22:57 +000099 OS.write(" 3 4", 4);
100 OS << '\n';
101}
102
Chris Lattner4b009652007-07-25 00:24:17 +0000103/// MoveToLine - Move the output to the source line specified by the location
104/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner6c451292007-12-09 21:11:08 +0000105/// #line directive. This returns false if already at the specified line, true
106/// if some newlines were emitted.
107bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattner18c8dc02009-01-16 07:36:28 +0000108 unsigned LineNo = PP.getSourceManager().getInstantiationLineNumber(Loc);
Daniel Dunbare0d59462008-09-05 03:22:57 +0000109
Chris Lattner4b009652007-07-25 00:24:17 +0000110 if (DisableLineMarkers) {
Chris Lattner6c451292007-12-09 21:11:08 +0000111 if (LineNo == CurLine) return false;
112
113 CurLine = LineNo;
114
115 if (!EmittedTokensOnThisLine)
116 return true;
117
Chris Lattner21494222008-08-17 03:12:02 +0000118 OS << '\n';
Chris Lattner6c451292007-12-09 21:11:08 +0000119 EmittedTokensOnThisLine = false;
120 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000121 }
Daniel Dunbare0d59462008-09-05 03:22:57 +0000122
Chris Lattner4b009652007-07-25 00:24:17 +0000123 // If this line is "close enough" to the original line, just print newlines,
124 // otherwise print a #line directive.
Daniel Dunbar6fa81e72008-09-26 01:13:35 +0000125 if (LineNo-CurLine <= 8) {
Chris Lattner4b009652007-07-25 00:24:17 +0000126 if (LineNo-CurLine == 1)
Chris Lattner21494222008-08-17 03:12:02 +0000127 OS << '\n';
Chris Lattner6c451292007-12-09 21:11:08 +0000128 else if (LineNo == CurLine)
Chris Lattner18c8dc02009-01-16 07:36:28 +0000129 return false; // Spelling line moved, but instantiation line didn't.
Chris Lattner4b009652007-07-25 00:24:17 +0000130 else {
131 const char *NewLines = "\n\n\n\n\n\n\n\n";
Chris Lattner21494222008-08-17 03:12:02 +0000132 OS.write(NewLines, LineNo-CurLine);
Chris Lattner4b009652007-07-25 00:24:17 +0000133 }
134 } else {
Daniel Dunbare0d59462008-09-05 03:22:57 +0000135 WriteLineInfo(LineNo, 0, 0);
Chris Lattner4b009652007-07-25 00:24:17 +0000136 }
Daniel Dunbare0d59462008-09-05 03:22:57 +0000137
138 CurLine = LineNo;
Chris Lattner6c451292007-12-09 21:11:08 +0000139 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000140}
141
142
143/// FileChanged - Whenever the preprocessor enters or exits a #include file
144/// it invokes this handler. Update our conception of the current source
145/// position.
146void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
147 FileChangeReason Reason,
Chris Lattner7a4864e2008-10-27 01:19:25 +0000148 SrcMgr::CharacteristicKind NewFileType) {
Chris Lattner4b009652007-07-25 00:24:17 +0000149 // Unless we are exiting a #include, make sure to skip ahead to the line the
150 // #include directive was at.
151 SourceManager &SourceMgr = PP.getSourceManager();
152 if (Reason == PPCallbacks::EnterFile) {
153 MoveToLine(SourceMgr.getIncludeLoc(Loc));
154 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
155 MoveToLine(Loc);
156
157 // TODO GCC emits the # directive for this directive on the line AFTER the
158 // directive and emits a bunch of spaces that aren't needed. Emulate this
159 // strange behavior.
160 }
161
Chris Lattner18c8dc02009-01-16 07:36:28 +0000162 Loc = SourceMgr.getInstantiationLoc(Loc);
Chris Lattner4b009652007-07-25 00:24:17 +0000163 CurLine = SourceMgr.getLineNumber(Loc);
Daniel Dunbare0d59462008-09-05 03:22:57 +0000164
Chris Lattner6c451292007-12-09 21:11:08 +0000165 if (DisableLineMarkers) return;
166
Chris Lattner4b009652007-07-25 00:24:17 +0000167 CurFilename.clear();
168 CurFilename += SourceMgr.getSourceName(Loc);
169 Lexer::Stringify(CurFilename);
Daniel Dunbare0d59462008-09-05 03:22:57 +0000170 FileType = NewFileType;
171
172 if (!Initialized) {
173 WriteLineInfo(CurLine);
174 Initialized = true;
Chris Lattner4b009652007-07-25 00:24:17 +0000175 }
Daniel Dunbare0d59462008-09-05 03:22:57 +0000176
Chris Lattner4b009652007-07-25 00:24:17 +0000177 switch (Reason) {
178 case PPCallbacks::EnterFile:
Daniel Dunbare0d59462008-09-05 03:22:57 +0000179 WriteLineInfo(CurLine, " 1", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000180 break;
181 case PPCallbacks::ExitFile:
Daniel Dunbare0d59462008-09-05 03:22:57 +0000182 WriteLineInfo(CurLine, " 2", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000183 break;
Daniel Dunbare0d59462008-09-05 03:22:57 +0000184 case PPCallbacks::SystemHeaderPragma:
185 case PPCallbacks::RenameFile:
186 WriteLineInfo(CurLine);
187 break;
Chris Lattner4b009652007-07-25 00:24:17 +0000188 }
Chris Lattner4b009652007-07-25 00:24:17 +0000189}
190
Chris Lattner183b8392009-01-16 19:25:54 +0000191/// Ident - Handle #ident directives when read by the preprocessor.
Chris Lattner4b009652007-07-25 00:24:17 +0000192///
193void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
194 MoveToLine(Loc);
195
Chris Lattner21494222008-08-17 03:12:02 +0000196 OS.write("#ident ", strlen("#ident "));
197 OS.write(&S[0], S.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000198 EmittedTokensOnThisLine = true;
199}
200
Chris Lattner183b8392009-01-16 19:25:54 +0000201void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
202 const IdentifierInfo *Kind,
203 const std::string &Str) {
204 MoveToLine(Loc);
205 OS << "#pragma comment(" << Kind->getName();
206
207 if (!Str.empty()) {
208 OS << ", \"";
209
210 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
211 unsigned char Char = Str[i];
Chris Lattnere1a7e712009-01-16 22:13:37 +0000212 if (isprint(Char) && Char != '\\' && Char != '"')
Chris Lattner183b8392009-01-16 19:25:54 +0000213 OS << (char)Char;
214 else // Output anything hard as an octal escape.
215 OS << '\\'
216 << (char)('0'+ ((Char >> 6) & 7))
217 << (char)('0'+ ((Char >> 3) & 7))
218 << (char)('0'+ ((Char >> 0) & 7));
219 }
220 OS << '"';
221 }
222
223 OS << ')';
224 EmittedTokensOnThisLine = true;
225}
226
227
Chris Lattner4b009652007-07-25 00:24:17 +0000228/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner6c451292007-12-09 21:11:08 +0000229/// is called for the first token on each new line. If this really is the start
230/// of a new logical line, handle it and return true, otherwise return false.
231/// This may not be the start of a logical line because the "start of line"
Chris Lattner18c8dc02009-01-16 07:36:28 +0000232/// marker is set for spelling lines, not instantiation ones.
Chris Lattner6c451292007-12-09 21:11:08 +0000233bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Chris Lattner4b009652007-07-25 00:24:17 +0000234 // Figure out what line we went to and insert the appropriate number of
235 // newline characters.
Chris Lattner6c451292007-12-09 21:11:08 +0000236 if (!MoveToLine(Tok.getLocation()))
237 return false;
Chris Lattner4b009652007-07-25 00:24:17 +0000238
239 // Print out space characters so that the first token on a line is
240 // indented for easy reading.
241 const SourceManager &SourceMgr = PP.getSourceManager();
Chris Lattner18c8dc02009-01-16 07:36:28 +0000242 unsigned ColNo = SourceMgr.getInstantiationColumnNumber(Tok.getLocation());
Chris Lattner4b009652007-07-25 00:24:17 +0000243
244 // This hack prevents stuff like:
245 // #define HASH #
246 // HASH define foo bar
247 // From having the # character end up at column 1, which makes it so it
248 // is not handled as a #define next time through the preprocessor if in
249 // -fpreprocessed mode.
Chris Lattner3b494152007-10-09 18:03:42 +0000250 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattner21494222008-08-17 03:12:02 +0000251 OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000252
253 // Otherwise, indent the appropriate number of spaces.
254 for (; ColNo > 1; --ColNo)
Chris Lattner21494222008-08-17 03:12:02 +0000255 OS << ' ';
Chris Lattner6c451292007-12-09 21:11:08 +0000256
257 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000258}
259
260namespace {
261struct UnknownPragmaHandler : public PragmaHandler {
262 const char *Prefix;
263 PrintPPOutputPPCallbacks *Callbacks;
264
265 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
266 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
267 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
268 // Figure out what line we went to and insert the appropriate number of
269 // newline characters.
270 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattner21494222008-08-17 03:12:02 +0000271 Callbacks->OS.write(Prefix, strlen(Prefix));
Chris Lattner4b009652007-07-25 00:24:17 +0000272
273 // Read and print all of the pragma tokens.
Chris Lattner3b494152007-10-09 18:03:42 +0000274 while (PragmaTok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000275 if (PragmaTok.hasLeadingSpace())
Chris Lattner21494222008-08-17 03:12:02 +0000276 Callbacks->OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000277 std::string TokSpell = PP.getSpelling(PragmaTok);
Chris Lattner21494222008-08-17 03:12:02 +0000278 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000279 PP.LexUnexpandedToken(PragmaTok);
280 }
Chris Lattner21494222008-08-17 03:12:02 +0000281 Callbacks->OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000282 }
283};
284} // end anonymous namespace
285
286
287enum AvoidConcatInfo {
288 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
289 /// ',', ')', etc) don't cause a problem when concatenated.
290 aci_never_avoid_concat = 0,
291
292 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
293 /// token's requirements, and it needs to know the first character of the
294 /// token.
295 aci_custom_firstchar = 1,
296
297 /// aci_custom - AvoidConcat contains custom code to handle this token's
298 /// requirements, but it doesn't need to know the first character of the
299 /// token.
300 aci_custom = 2,
301
302 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
303 /// character. For example, "<<" turns into "<<=" when followed by an =.
304 aci_avoid_equal = 4
305};
306
307/// This array contains information for each token on what action to take when
308/// avoiding concatenation of tokens in the AvoidConcat method.
309static char TokenInfo[tok::NUM_TOKENS];
310
311/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
312/// marked by this function.
313static void InitAvoidConcatTokenInfo() {
314 // These tokens have custom code in AvoidConcat.
315 TokenInfo[tok::identifier ] |= aci_custom;
316 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
317 TokenInfo[tok::period ] |= aci_custom_firstchar;
318 TokenInfo[tok::amp ] |= aci_custom_firstchar;
319 TokenInfo[tok::plus ] |= aci_custom_firstchar;
320 TokenInfo[tok::minus ] |= aci_custom_firstchar;
321 TokenInfo[tok::slash ] |= aci_custom_firstchar;
322 TokenInfo[tok::less ] |= aci_custom_firstchar;
323 TokenInfo[tok::greater ] |= aci_custom_firstchar;
324 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
325 TokenInfo[tok::percent ] |= aci_custom_firstchar;
326 TokenInfo[tok::colon ] |= aci_custom_firstchar;
327 TokenInfo[tok::hash ] |= aci_custom_firstchar;
328 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
329
330 // These tokens change behavior if followed by an '='.
331 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
332 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
333 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
334 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
335 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
336 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
337 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
338 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
339 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
340 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
341 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
342 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
343 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
344 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
345}
346
Chris Lattnerafa40122008-01-15 05:22:14 +0000347/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner400f0242008-01-15 05:14:19 +0000348static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner400f0242008-01-15 05:14:19 +0000349 if (!Tok.needsCleaning()) {
350 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnercdf600e2009-01-16 07:00:02 +0000351 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattner400f0242008-01-15 05:14:19 +0000352 == 'L';
353 }
354
355 if (Tok.getLength() < 256) {
Chris Lattnerafa40122008-01-15 05:22:14 +0000356 char Buffer[256];
Chris Lattner400f0242008-01-15 05:14:19 +0000357 const char *TokPtr = Buffer;
358 PP.getSpelling(Tok, TokPtr);
359 return TokPtr[0] == 'L';
360 }
361
362 return PP.getSpelling(Tok)[0] == 'L';
363}
364
Chris Lattnerafa40122008-01-15 05:22:14 +0000365/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
366static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
367 if (!Tok.needsCleaning()) {
368 if (Tok.getLength() != 1)
369 return false;
370 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnercdf600e2009-01-16 07:00:02 +0000371 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattnerafa40122008-01-15 05:22:14 +0000372 == 'L';
373 }
374
375 if (Tok.getLength() < 256) {
376 char Buffer[256];
377 const char *TokPtr = Buffer;
378 if (PP.getSpelling(Tok, TokPtr) != 1)
379 return false;
380 return TokPtr[0] == 'L';
381 }
382
383 return PP.getSpelling(Tok) == "L";
384}
385
386
Chris Lattner4b009652007-07-25 00:24:17 +0000387/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
388/// the two individual tokens to be lexed as a single token, return true (which
389/// causes a space to be printed between them). This allows the output of -E
390/// mode to be lexed to the same token stream as lexing the input directly
391/// would.
392///
393/// This code must conservatively return true if it doesn't want to be 100%
394/// accurate. This will cause the output to include extra space characters, but
395/// the resulting output won't have incorrect concatenations going on. Examples
396/// include "..", which we print with a space between, because we don't want to
397/// track enough to tell "x.." from "...".
398bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
399 const Token &Tok) {
400 char Buffer[256];
401
402 tok::TokenKind PrevKind = PrevTok.getKind();
403 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
404 PrevKind = tok::identifier;
405
406 // Look up information on when we should avoid concatenation with prevtok.
407 unsigned ConcatInfo = TokenInfo[PrevKind];
408
409 // If prevtok never causes a problem for anything after it, return quickly.
410 if (ConcatInfo == 0) return false;
411
412 if (ConcatInfo & aci_avoid_equal) {
413 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner3b494152007-10-09 18:03:42 +0000414 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattner4b009652007-07-25 00:24:17 +0000415 return true;
416 ConcatInfo &= ~aci_avoid_equal;
417 }
418
419 if (ConcatInfo == 0) return false;
420
421
422
423 // Basic algorithm: we look at the first character of the second token, and
424 // determine whether it, if appended to the first token, would form (or would
425 // contribute) to a larger token if concatenated.
426 char FirstChar = 0;
427 if (ConcatInfo & aci_custom) {
428 // If the token does not need to know the first character, don't get it.
429 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
430 // Avoid spelling identifiers, the most common form of token.
431 FirstChar = II->getName()[0];
432 } else if (!Tok.needsCleaning()) {
433 SourceManager &SrcMgr = PP.getSourceManager();
434 FirstChar =
Chris Lattnercdf600e2009-01-16 07:00:02 +0000435 *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
Chris Lattner4b009652007-07-25 00:24:17 +0000436 } else if (Tok.getLength() < 256) {
437 const char *TokPtr = Buffer;
438 PP.getSpelling(Tok, TokPtr);
439 FirstChar = TokPtr[0];
440 } else {
441 FirstChar = PP.getSpelling(Tok)[0];
442 }
443
444 switch (PrevKind) {
445 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
446 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner3b494152007-10-09 18:03:42 +0000447 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
448 Tok.is(tok::wide_string_literal) /* ||
449 Tok.is(tok::wide_char_literal)*/)
Chris Lattner4b009652007-07-25 00:24:17 +0000450 return true;
Chris Lattner400f0242008-01-15 05:14:19 +0000451
452 // If this isn't identifier + string, we're done.
453 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner4b009652007-07-25 00:24:17 +0000454 return false;
455
456 // FIXME: need a wide_char_constant!
Chris Lattner400f0242008-01-15 05:14:19 +0000457
458 // If the string was a wide string L"foo" or wide char L'f', it would concat
459 // with the previous identifier into fooL"bar". Avoid this.
460 if (StartsWithL(Tok, PP))
461 return true;
462
Chris Lattnerafa40122008-01-15 05:22:14 +0000463 // Otherwise, this is a narrow character or string. If the *identifier* is
464 // a literal 'L', avoid pasting L "foo" -> L"foo".
465 return IsIdentifierL(PrevTok, PP);
Chris Lattner4b009652007-07-25 00:24:17 +0000466 case tok::numeric_constant:
Chris Lattner3b494152007-10-09 18:03:42 +0000467 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Chris Lattner4b009652007-07-25 00:24:17 +0000468 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
469 case tok::period: // ..., .*, .1234
Chris Lattner6a316812009-01-11 19:48:19 +0000470 return FirstChar == '.' || isdigit(FirstChar) ||
471 (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
Chris Lattner4b009652007-07-25 00:24:17 +0000472 case tok::amp: // &&
473 return FirstChar == '&';
474 case tok::plus: // ++
475 return FirstChar == '+';
476 case tok::minus: // --, ->, ->*
477 return FirstChar == '-' || FirstChar == '>';
478 case tok::slash: //, /*, //
479 return FirstChar == '*' || FirstChar == '/';
480 case tok::less: // <<, <<=, <:, <%
481 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
482 case tok::greater: // >>, >>=
483 return FirstChar == '>';
484 case tok::pipe: // ||
485 return FirstChar == '|';
486 case tok::percent: // %>, %:
Chris Lattner6a316812009-01-11 19:48:19 +0000487 return (FirstChar == '>' || FirstChar == ':') &&
488 PP.getLangOptions().Digraphs;
Chris Lattner4b009652007-07-25 00:24:17 +0000489 case tok::colon: // ::, :>
Chris Lattner6a316812009-01-11 19:48:19 +0000490 return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
491 (FirstChar == '>' && PP.getLangOptions().Digraphs);
Chris Lattner4b009652007-07-25 00:24:17 +0000492 case tok::hash: // ##, #@, %:%:
493 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
494 case tok::arrow: // ->*
495 return FirstChar == '*';
496 }
497}
498
499/// DoPrintPreprocessedInput - This implements -E mode.
500///
Chris Lattner6619f662008-04-08 04:16:20 +0000501void clang::DoPrintPreprocessedInput(Preprocessor &PP,
502 const std::string &OutFile) {
Chris Lattner4b009652007-07-25 00:24:17 +0000503 // Inform the preprocessor whether we want it to retain comments or not, due
504 // to -C or -CC.
505 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
Chris Lattner4b009652007-07-25 00:24:17 +0000506 InitAvoidConcatTokenInfo();
Chris Lattner21494222008-08-17 03:12:02 +0000507
508
509 // Open the output buffer.
Chris Lattner1be96902008-08-17 03:54:39 +0000510 std::string Err;
Daniel Dunbar8fc9ba62008-11-13 05:09:21 +0000511 llvm::raw_fd_ostream OS(OutFile.empty() ? "-" : OutFile.c_str(), false, Err);
Chris Lattner1be96902008-08-17 03:54:39 +0000512 if (!Err.empty()) {
513 fprintf(stderr, "%s\n", Err.c_str());
514 exit(1);
Chris Lattner21494222008-08-17 03:12:02 +0000515 }
Chris Lattner21494222008-08-17 03:12:02 +0000516
Chris Lattner1be96902008-08-17 03:54:39 +0000517 OS.SetBufferSize(64*1024);
518
Chris Lattner4b009652007-07-25 00:24:17 +0000519
520 Token Tok, PrevTok;
521 char Buffer[256];
Chris Lattner21494222008-08-17 03:12:02 +0000522 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP, OS);
Chris Lattner4b009652007-07-25 00:24:17 +0000523 PP.setPPCallbacks(Callbacks);
524
525 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
526 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
527
528 // After we have configured the preprocessor, enter the main file.
529
530 // Start parsing the specified input file.
Ted Kremenek17861c52007-12-19 22:51:13 +0000531 PP.EnterMainSourceFile();
Chris Lattner3eddc862007-10-10 20:45:16 +0000532
533 // Consume all of the tokens that come from the predefines buffer. Those
534 // should not be emitted into the output and are guaranteed to be at the
535 // start.
536 const SourceManager &SourceMgr = PP.getSourceManager();
537 do PP.Lex(Tok);
Chris Lattner890c5932007-10-10 23:31:03 +0000538 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner3eddc862007-10-10 20:45:16 +0000539 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
540
541 while (1) {
Chris Lattner4b009652007-07-25 00:24:17 +0000542
543 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner6c451292007-12-09 21:11:08 +0000544 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
545 // done.
Chris Lattner4b009652007-07-25 00:24:17 +0000546 } else if (Tok.hasLeadingSpace() ||
547 // If we haven't emitted a token on this line yet, PrevTok isn't
548 // useful to look at and no concatenation could happen anyway.
549 (Callbacks->hasEmittedTokensOnThisLine() &&
550 // Don't print "-" next to "-", it would form "--".
551 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattner21494222008-08-17 03:12:02 +0000552 OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000553 }
554
555 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
556 const char *Str = II->getName();
557 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
Chris Lattner21494222008-08-17 03:12:02 +0000558 OS.write(Str, Len);
Chris Lattner4b009652007-07-25 00:24:17 +0000559 } else if (Tok.getLength() < 256) {
560 const char *TokPtr = Buffer;
561 unsigned Len = PP.getSpelling(Tok, TokPtr);
Chris Lattner21494222008-08-17 03:12:02 +0000562 OS.write(TokPtr, Len);
Chris Lattner4b009652007-07-25 00:24:17 +0000563 } else {
564 std::string S = PP.getSpelling(Tok);
Chris Lattner21494222008-08-17 03:12:02 +0000565 OS.write(&S[0], S.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000566 }
567 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner3eddc862007-10-10 20:45:16 +0000568
569 if (Tok.is(tok::eof)) break;
570
571 PrevTok = Tok;
572 PP.Lex(Tok);
573 }
Chris Lattner21494222008-08-17 03:12:02 +0000574 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000575
Chris Lattnercb7f1802008-08-17 07:07:01 +0000576 // Flush the ostream.
577 OS.flush();
Chris Lattner21494222008-08-17 03:12:02 +0000578
579 // If an error occurred, remove the output file.
580 if (PP.getDiagnostics().hasErrorOccurred() && !OutFile.empty())
581 llvm::sys::Path(OutFile).eraseFromDisk();
Chris Lattner4b009652007-07-25 00:24:17 +0000582}
583