blob: 5c4a140fa9ae8fe7e4140970d752f4259de55b4f [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner5db17c92008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattnerd8e30832007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include "llvm/ADT/StringExtras.h"
Chris Lattner5db17c92008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000025#include "llvm/Config/config.h"
Chris Lattnerdceb6a72008-08-17 01:47:12 +000026#include "llvm/Support/raw_ostream.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000027#include <cstdio>
28using namespace clang;
29
Reid Spencer5f016e22007-07-11 17:01:13 +000030//===----------------------------------------------------------------------===//
31// Preprocessed token printer
32//===----------------------------------------------------------------------===//
33
34static llvm::cl::opt<bool>
35DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
36static llvm::cl::opt<bool>
37EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
38static llvm::cl::opt<bool>
39EnableMacroCommentOutput("CC",
40 llvm::cl::desc("Enable comment output in -E mode, "
41 "even from macro expansions"));
42
43namespace {
44class PrintPPOutputPPCallbacks : public PPCallbacks {
45 Preprocessor &PP;
Chris Lattnere96de3e2008-08-17 03:12:02 +000046public:
47 llvm::raw_ostream &OS;
48private:
Reid Spencer5f016e22007-07-11 17:01:13 +000049 unsigned CurLine;
Reid Spencer5f016e22007-07-11 17:01:13 +000050 bool EmittedTokensOnThisLine;
Chris Lattner9d728512008-10-27 01:19:25 +000051 SrcMgr::CharacteristicKind FileType;
Chris Lattnerd8e30832007-07-24 06:57:14 +000052 llvm::SmallString<512> CurFilename;
Daniel Dunbar737bdb42008-09-05 03:22:57 +000053 bool Initialized;
Reid Spencer5f016e22007-07-11 17:01:13 +000054public:
Chris Lattnere96de3e2008-08-17 03:12:02 +000055 PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os)
56 : PP(pp), OS(os) {
Reid Spencer5f016e22007-07-11 17:01:13 +000057 CurLine = 0;
Chris Lattnerd8e30832007-07-24 06:57:14 +000058 CurFilename += "<uninit>";
Reid Spencer5f016e22007-07-11 17:01:13 +000059 EmittedTokensOnThisLine = false;
Chris Lattner0b9e7362008-09-26 21:18:42 +000060 FileType = SrcMgr::C_User;
Daniel Dunbar737bdb42008-09-05 03:22:57 +000061 Initialized = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000062 }
63
64 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattnerf0f2b292007-07-23 06:09:34 +000065 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer5f016e22007-07-11 17:01:13 +000066
67 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
Chris Lattner9d728512008-10-27 01:19:25 +000068 SrcMgr::CharacteristicKind FileType);
Reid Spencer5f016e22007-07-11 17:01:13 +000069 virtual void Ident(SourceLocation Loc, const std::string &str);
Chris Lattnerc7d945d2009-01-16 19:25:54 +000070 virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
71 const std::string &Str);
72
Reid Spencer5f016e22007-07-11 17:01:13 +000073
Chris Lattner5f180322007-12-09 21:11:08 +000074 bool HandleFirstTokOnLine(Token &Tok);
75 bool MoveToLine(SourceLocation Loc);
Chris Lattnerd2177732007-07-20 16:59:19 +000076 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Daniel Dunbar737bdb42008-09-05 03:22:57 +000077 void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
Reid Spencer5f016e22007-07-11 17:01:13 +000078};
Chris Lattner5db17c92008-04-08 04:16:20 +000079} // end anonymous namespace
Reid Spencer5f016e22007-07-11 17:01:13 +000080
Daniel Dunbar737bdb42008-09-05 03:22:57 +000081void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
82 const char *Extra,
83 unsigned ExtraLen) {
84 if (EmittedTokensOnThisLine) {
85 OS << '\n';
86 EmittedTokensOnThisLine = false;
87 }
88
89 OS << '#' << ' ' << LineNo << ' ' << '"';
90 OS.write(&CurFilename[0], CurFilename.size());
91 OS << '"';
92
93 if (ExtraLen)
94 OS.write(Extra, ExtraLen);
95
Chris Lattner0b9e7362008-09-26 21:18:42 +000096 if (FileType == SrcMgr::C_System)
Daniel Dunbar737bdb42008-09-05 03:22:57 +000097 OS.write(" 3", 2);
Chris Lattner0b9e7362008-09-26 21:18:42 +000098 else if (FileType == SrcMgr::C_ExternCSystem)
Daniel Dunbar737bdb42008-09-05 03:22:57 +000099 OS.write(" 3 4", 4);
100 OS << '\n';
101}
102
Reid Spencer5f016e22007-07-11 17:01:13 +0000103/// MoveToLine - Move the output to the source line specified by the location
104/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner5f180322007-12-09 21:11:08 +0000105/// #line directive. This returns false if already at the specified line, true
106/// if some newlines were emitted.
107bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000108 unsigned LineNo = PP.getSourceManager().getInstantiationLineNumber(Loc);
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000109
Reid Spencer5f016e22007-07-11 17:01:13 +0000110 if (DisableLineMarkers) {
Chris Lattner5f180322007-12-09 21:11:08 +0000111 if (LineNo == CurLine) return false;
112
113 CurLine = LineNo;
114
115 if (!EmittedTokensOnThisLine)
116 return true;
117
Chris Lattnere96de3e2008-08-17 03:12:02 +0000118 OS << '\n';
Chris Lattner5f180322007-12-09 21:11:08 +0000119 EmittedTokensOnThisLine = false;
120 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000121 }
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000122
Reid Spencer5f016e22007-07-11 17:01:13 +0000123 // If this line is "close enough" to the original line, just print newlines,
124 // otherwise print a #line directive.
Daniel Dunbarfd966842008-09-26 01:13:35 +0000125 if (LineNo-CurLine <= 8) {
Chris Lattner822f9402007-07-23 05:14:05 +0000126 if (LineNo-CurLine == 1)
Chris Lattnere96de3e2008-08-17 03:12:02 +0000127 OS << '\n';
Chris Lattner5f180322007-12-09 21:11:08 +0000128 else if (LineNo == CurLine)
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000129 return false; // Spelling line moved, but instantiation line didn't.
Chris Lattner822f9402007-07-23 05:14:05 +0000130 else {
131 const char *NewLines = "\n\n\n\n\n\n\n\n";
Chris Lattnere96de3e2008-08-17 03:12:02 +0000132 OS.write(NewLines, LineNo-CurLine);
Chris Lattner822f9402007-07-23 05:14:05 +0000133 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000134 } else {
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000135 WriteLineInfo(LineNo, 0, 0);
Reid Spencer5f016e22007-07-11 17:01:13 +0000136 }
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000137
138 CurLine = LineNo;
Chris Lattner5f180322007-12-09 21:11:08 +0000139 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000140}
141
142
143/// FileChanged - Whenever the preprocessor enters or exits a #include file
144/// it invokes this handler. Update our conception of the current source
145/// position.
146void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
147 FileChangeReason Reason,
Chris Lattner9d728512008-10-27 01:19:25 +0000148 SrcMgr::CharacteristicKind NewFileType) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000149 // Unless we are exiting a #include, make sure to skip ahead to the line the
150 // #include directive was at.
151 SourceManager &SourceMgr = PP.getSourceManager();
152 if (Reason == PPCallbacks::EnterFile) {
Chris Lattnerb9c3f962009-01-27 07:57:44 +0000153 MoveToLine(SourceMgr.getPresumedLoc(Loc).getIncludeLoc());
Reid Spencer5f016e22007-07-11 17:01:13 +0000154 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
155 MoveToLine(Loc);
156
157 // TODO GCC emits the # directive for this directive on the line AFTER the
158 // directive and emits a bunch of spaces that aren't needed. Emulate this
159 // strange behavior.
160 }
161
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000162 Loc = SourceMgr.getInstantiationLoc(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000163 CurLine = SourceMgr.getLineNumber(Loc);
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000164
Chris Lattner5f180322007-12-09 21:11:08 +0000165 if (DisableLineMarkers) return;
166
Chris Lattnerd8e30832007-07-24 06:57:14 +0000167 CurFilename.clear();
Chris Lattnerb9c3f962009-01-27 07:57:44 +0000168 CurFilename += SourceMgr.getPresumedLoc(Loc).getFilename();
Chris Lattnerd8e30832007-07-24 06:57:14 +0000169 Lexer::Stringify(CurFilename);
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000170 FileType = NewFileType;
171
172 if (!Initialized) {
173 WriteLineInfo(CurLine);
174 Initialized = true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000175 }
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000176
Reid Spencer5f016e22007-07-11 17:01:13 +0000177 switch (Reason) {
178 case PPCallbacks::EnterFile:
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000179 WriteLineInfo(CurLine, " 1", 2);
Reid Spencer5f016e22007-07-11 17:01:13 +0000180 break;
181 case PPCallbacks::ExitFile:
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000182 WriteLineInfo(CurLine, " 2", 2);
Reid Spencer5f016e22007-07-11 17:01:13 +0000183 break;
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000184 case PPCallbacks::SystemHeaderPragma:
185 case PPCallbacks::RenameFile:
186 WriteLineInfo(CurLine);
187 break;
Reid Spencer5f016e22007-07-11 17:01:13 +0000188 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000189}
190
Chris Lattnerc7d945d2009-01-16 19:25:54 +0000191/// Ident - Handle #ident directives when read by the preprocessor.
Reid Spencer5f016e22007-07-11 17:01:13 +0000192///
193void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
194 MoveToLine(Loc);
195
Chris Lattnere96de3e2008-08-17 03:12:02 +0000196 OS.write("#ident ", strlen("#ident "));
197 OS.write(&S[0], S.size());
Reid Spencer5f016e22007-07-11 17:01:13 +0000198 EmittedTokensOnThisLine = true;
199}
200
Chris Lattnerc7d945d2009-01-16 19:25:54 +0000201void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
202 const IdentifierInfo *Kind,
203 const std::string &Str) {
204 MoveToLine(Loc);
205 OS << "#pragma comment(" << Kind->getName();
206
207 if (!Str.empty()) {
208 OS << ", \"";
209
210 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
211 unsigned char Char = Str[i];
Chris Lattner52a3e9e2009-01-16 22:13:37 +0000212 if (isprint(Char) && Char != '\\' && Char != '"')
Chris Lattnerc7d945d2009-01-16 19:25:54 +0000213 OS << (char)Char;
214 else // Output anything hard as an octal escape.
215 OS << '\\'
216 << (char)('0'+ ((Char >> 6) & 7))
217 << (char)('0'+ ((Char >> 3) & 7))
218 << (char)('0'+ ((Char >> 0) & 7));
219 }
220 OS << '"';
221 }
222
223 OS << ')';
224 EmittedTokensOnThisLine = true;
225}
226
227
Reid Spencer5f016e22007-07-11 17:01:13 +0000228/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner5f180322007-12-09 21:11:08 +0000229/// is called for the first token on each new line. If this really is the start
230/// of a new logical line, handle it and return true, otherwise return false.
231/// This may not be the start of a logical line because the "start of line"
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000232/// marker is set for spelling lines, not instantiation ones.
Chris Lattner5f180322007-12-09 21:11:08 +0000233bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000234 // Figure out what line we went to and insert the appropriate number of
235 // newline characters.
Chris Lattner5f180322007-12-09 21:11:08 +0000236 if (!MoveToLine(Tok.getLocation()))
237 return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000238
239 // Print out space characters so that the first token on a line is
240 // indented for easy reading.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000241 const SourceManager &SourceMgr = PP.getSourceManager();
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000242 unsigned ColNo = SourceMgr.getInstantiationColumnNumber(Tok.getLocation());
Reid Spencer5f016e22007-07-11 17:01:13 +0000243
244 // This hack prevents stuff like:
245 // #define HASH #
246 // HASH define foo bar
247 // From having the # character end up at column 1, which makes it so it
248 // is not handled as a #define next time through the preprocessor if in
249 // -fpreprocessed mode.
Chris Lattner057aaf62007-10-09 18:03:42 +0000250 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattnere96de3e2008-08-17 03:12:02 +0000251 OS << ' ';
Reid Spencer5f016e22007-07-11 17:01:13 +0000252
253 // Otherwise, indent the appropriate number of spaces.
254 for (; ColNo > 1; --ColNo)
Chris Lattnere96de3e2008-08-17 03:12:02 +0000255 OS << ' ';
Chris Lattner5f180322007-12-09 21:11:08 +0000256
257 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000258}
259
260namespace {
261struct UnknownPragmaHandler : public PragmaHandler {
262 const char *Prefix;
263 PrintPPOutputPPCallbacks *Callbacks;
264
265 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
266 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattnerd2177732007-07-20 16:59:19 +0000267 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000268 // Figure out what line we went to and insert the appropriate number of
269 // newline characters.
270 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattnere96de3e2008-08-17 03:12:02 +0000271 Callbacks->OS.write(Prefix, strlen(Prefix));
Reid Spencer5f016e22007-07-11 17:01:13 +0000272
273 // Read and print all of the pragma tokens.
Chris Lattner057aaf62007-10-09 18:03:42 +0000274 while (PragmaTok.isNot(tok::eom)) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000275 if (PragmaTok.hasLeadingSpace())
Chris Lattnere96de3e2008-08-17 03:12:02 +0000276 Callbacks->OS << ' ';
Reid Spencer5f016e22007-07-11 17:01:13 +0000277 std::string TokSpell = PP.getSpelling(PragmaTok);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000278 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
Reid Spencer5f016e22007-07-11 17:01:13 +0000279 PP.LexUnexpandedToken(PragmaTok);
280 }
Chris Lattnere96de3e2008-08-17 03:12:02 +0000281 Callbacks->OS << '\n';
Reid Spencer5f016e22007-07-11 17:01:13 +0000282 }
283};
284} // end anonymous namespace
285
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000286
287enum AvoidConcatInfo {
288 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
289 /// ',', ')', etc) don't cause a problem when concatenated.
290 aci_never_avoid_concat = 0,
291
292 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
293 /// token's requirements, and it needs to know the first character of the
294 /// token.
295 aci_custom_firstchar = 1,
296
297 /// aci_custom - AvoidConcat contains custom code to handle this token's
298 /// requirements, but it doesn't need to know the first character of the
299 /// token.
300 aci_custom = 2,
301
302 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
303 /// character. For example, "<<" turns into "<<=" when followed by an =.
304 aci_avoid_equal = 4
305};
306
307/// This array contains information for each token on what action to take when
308/// avoiding concatenation of tokens in the AvoidConcat method.
309static char TokenInfo[tok::NUM_TOKENS];
310
311/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
312/// marked by this function.
313static void InitAvoidConcatTokenInfo() {
314 // These tokens have custom code in AvoidConcat.
315 TokenInfo[tok::identifier ] |= aci_custom;
316 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
317 TokenInfo[tok::period ] |= aci_custom_firstchar;
318 TokenInfo[tok::amp ] |= aci_custom_firstchar;
319 TokenInfo[tok::plus ] |= aci_custom_firstchar;
320 TokenInfo[tok::minus ] |= aci_custom_firstchar;
321 TokenInfo[tok::slash ] |= aci_custom_firstchar;
322 TokenInfo[tok::less ] |= aci_custom_firstchar;
323 TokenInfo[tok::greater ] |= aci_custom_firstchar;
324 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
325 TokenInfo[tok::percent ] |= aci_custom_firstchar;
326 TokenInfo[tok::colon ] |= aci_custom_firstchar;
327 TokenInfo[tok::hash ] |= aci_custom_firstchar;
328 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
329
330 // These tokens change behavior if followed by an '='.
331 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
332 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
333 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
334 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
335 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
336 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
337 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
338 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
339 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
340 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
341 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
342 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
343 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
344 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
345}
346
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000347/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000348static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000349 if (!Tok.needsCleaning()) {
350 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnerdf7c17a2009-01-16 07:00:02 +0000351 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000352 == 'L';
353 }
354
355 if (Tok.getLength() < 256) {
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000356 char Buffer[256];
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000357 const char *TokPtr = Buffer;
358 PP.getSpelling(Tok, TokPtr);
359 return TokPtr[0] == 'L';
360 }
361
362 return PP.getSpelling(Tok)[0] == 'L';
363}
364
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000365/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
366static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
367 if (!Tok.needsCleaning()) {
368 if (Tok.getLength() != 1)
369 return false;
370 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnerdf7c17a2009-01-16 07:00:02 +0000371 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000372 == 'L';
373 }
374
375 if (Tok.getLength() < 256) {
376 char Buffer[256];
377 const char *TokPtr = Buffer;
378 if (PP.getSpelling(Tok, TokPtr) != 1)
379 return false;
380 return TokPtr[0] == 'L';
381 }
382
383 return PP.getSpelling(Tok) == "L";
384}
385
386
Reid Spencer5f016e22007-07-11 17:01:13 +0000387/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
388/// the two individual tokens to be lexed as a single token, return true (which
389/// causes a space to be printed between them). This allows the output of -E
390/// mode to be lexed to the same token stream as lexing the input directly
391/// would.
392///
393/// This code must conservatively return true if it doesn't want to be 100%
394/// accurate. This will cause the output to include extra space characters, but
395/// the resulting output won't have incorrect concatenations going on. Examples
396/// include "..", which we print with a space between, because we don't want to
397/// track enough to tell "x.." from "...".
Chris Lattnerd2177732007-07-20 16:59:19 +0000398bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
399 const Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000400 char Buffer[256];
401
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000402 tok::TokenKind PrevKind = PrevTok.getKind();
403 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
404 PrevKind = tok::identifier;
405
406 // Look up information on when we should avoid concatenation with prevtok.
407 unsigned ConcatInfo = TokenInfo[PrevKind];
408
409 // If prevtok never causes a problem for anything after it, return quickly.
410 if (ConcatInfo == 0) return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000411
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000412 if (ConcatInfo & aci_avoid_equal) {
413 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner057aaf62007-10-09 18:03:42 +0000414 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000415 return true;
Chris Lattnerb638a302007-07-23 23:21:34 +0000416 ConcatInfo &= ~aci_avoid_equal;
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000417 }
418
419 if (ConcatInfo == 0) return false;
420
421
422
Reid Spencer5f016e22007-07-11 17:01:13 +0000423 // Basic algorithm: we look at the first character of the second token, and
424 // determine whether it, if appended to the first token, would form (or would
425 // contribute) to a larger token if concatenated.
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000426 char FirstChar = 0;
427 if (ConcatInfo & aci_custom) {
428 // If the token does not need to know the first character, don't get it.
429 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000430 // Avoid spelling identifiers, the most common form of token.
431 FirstChar = II->getName()[0];
Chris Lattnerb19f5e82007-07-23 05:18:42 +0000432 } else if (!Tok.needsCleaning()) {
Chris Lattner33116d62009-01-26 19:33:54 +0000433 if (Tok.isLiteral() && Tok.getLiteralData()) {
434 FirstChar = *Tok.getLiteralData();
435 } else {
436 SourceManager &SrcMgr = PP.getSourceManager();
437 FirstChar =
438 *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
439 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000440 } else if (Tok.getLength() < 256) {
441 const char *TokPtr = Buffer;
442 PP.getSpelling(Tok, TokPtr);
443 FirstChar = TokPtr[0];
444 } else {
445 FirstChar = PP.getSpelling(Tok)[0];
446 }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000447
Reid Spencer5f016e22007-07-11 17:01:13 +0000448 switch (PrevKind) {
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000449 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer5f016e22007-07-11 17:01:13 +0000450 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner057aaf62007-10-09 18:03:42 +0000451 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
452 Tok.is(tok::wide_string_literal) /* ||
453 Tok.is(tok::wide_char_literal)*/)
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000454 return true;
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000455
456 // If this isn't identifier + string, we're done.
457 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000458 return false;
459
460 // FIXME: need a wide_char_constant!
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000461
462 // If the string was a wide string L"foo" or wide char L'f', it would concat
463 // with the previous identifier into fooL"bar". Avoid this.
464 if (StartsWithL(Tok, PP))
465 return true;
466
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000467 // Otherwise, this is a narrow character or string. If the *identifier* is
468 // a literal 'L', avoid pasting L "foo" -> L"foo".
469 return IsIdentifierL(PrevTok, PP);
Reid Spencer5f016e22007-07-11 17:01:13 +0000470 case tok::numeric_constant:
Chris Lattner057aaf62007-10-09 18:03:42 +0000471 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Reid Spencer5f016e22007-07-11 17:01:13 +0000472 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
473 case tok::period: // ..., .*, .1234
Chris Lattnerd7a7c002009-01-11 19:48:19 +0000474 return FirstChar == '.' || isdigit(FirstChar) ||
475 (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000476 case tok::amp: // &&
477 return FirstChar == '&';
478 case tok::plus: // ++
479 return FirstChar == '+';
480 case tok::minus: // --, ->, ->*
481 return FirstChar == '-' || FirstChar == '>';
482 case tok::slash: //, /*, //
483 return FirstChar == '*' || FirstChar == '/';
484 case tok::less: // <<, <<=, <:, <%
485 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
486 case tok::greater: // >>, >>=
487 return FirstChar == '>';
488 case tok::pipe: // ||
489 return FirstChar == '|';
490 case tok::percent: // %>, %:
Chris Lattnerd7a7c002009-01-11 19:48:19 +0000491 return (FirstChar == '>' || FirstChar == ':') &&
492 PP.getLangOptions().Digraphs;
Reid Spencer5f016e22007-07-11 17:01:13 +0000493 case tok::colon: // ::, :>
Chris Lattnerd7a7c002009-01-11 19:48:19 +0000494 return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
495 (FirstChar == '>' && PP.getLangOptions().Digraphs);
Reid Spencer5f016e22007-07-11 17:01:13 +0000496 case tok::hash: // ##, #@, %:%:
497 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
498 case tok::arrow: // ->*
499 return FirstChar == '*';
Reid Spencer5f016e22007-07-11 17:01:13 +0000500 }
501}
502
503/// DoPrintPreprocessedInput - This implements -E mode.
504///
Chris Lattner5db17c92008-04-08 04:16:20 +0000505void clang::DoPrintPreprocessedInput(Preprocessor &PP,
506 const std::string &OutFile) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000507 // Inform the preprocessor whether we want it to retain comments or not, due
508 // to -C or -CC.
509 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000510 InitAvoidConcatTokenInfo();
Chris Lattnere96de3e2008-08-17 03:12:02 +0000511
512
513 // Open the output buffer.
Chris Lattner202e9ac2008-08-17 03:54:39 +0000514 std::string Err;
Daniel Dunbar26fb2722008-11-13 05:09:21 +0000515 llvm::raw_fd_ostream OS(OutFile.empty() ? "-" : OutFile.c_str(), false, Err);
Chris Lattner202e9ac2008-08-17 03:54:39 +0000516 if (!Err.empty()) {
517 fprintf(stderr, "%s\n", Err.c_str());
518 exit(1);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000519 }
Chris Lattnere96de3e2008-08-17 03:12:02 +0000520
Chris Lattner202e9ac2008-08-17 03:54:39 +0000521 OS.SetBufferSize(64*1024);
522
Reid Spencer5f016e22007-07-11 17:01:13 +0000523
Chris Lattnerd2177732007-07-20 16:59:19 +0000524 Token Tok, PrevTok;
Reid Spencer5f016e22007-07-11 17:01:13 +0000525 char Buffer[256];
Chris Lattnere96de3e2008-08-17 03:12:02 +0000526 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP, OS);
Reid Spencer5f016e22007-07-11 17:01:13 +0000527 PP.setPPCallbacks(Callbacks);
528
529 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
530 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
531
532 // After we have configured the preprocessor, enter the main file.
533
534 // Start parsing the specified input file.
Ted Kremenek95041a22007-12-19 22:51:13 +0000535 PP.EnterMainSourceFile();
Chris Lattner6f688e12007-10-10 20:45:16 +0000536
537 // Consume all of the tokens that come from the predefines buffer. Those
538 // should not be emitted into the output and are guaranteed to be at the
539 // start.
540 const SourceManager &SourceMgr = PP.getSourceManager();
541 do PP.Lex(Tok);
Chris Lattnera1a51782007-10-10 23:31:03 +0000542 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattnerb9c3f962009-01-27 07:57:44 +0000543 !strcmp(SourceMgr.getPresumedLoc(Tok.getLocation()).getFilename(),
544 "<predefines>"));
Chris Lattner6f688e12007-10-10 20:45:16 +0000545
546 while (1) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000547
548 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner5f180322007-12-09 21:11:08 +0000549 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
550 // done.
Reid Spencer5f016e22007-07-11 17:01:13 +0000551 } else if (Tok.hasLeadingSpace() ||
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000552 // If we haven't emitted a token on this line yet, PrevTok isn't
553 // useful to look at and no concatenation could happen anyway.
Chris Lattnerb638a302007-07-23 23:21:34 +0000554 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000555 // Don't print "-" next to "-", it would form "--".
556 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattnere96de3e2008-08-17 03:12:02 +0000557 OS << ' ';
Reid Spencer5f016e22007-07-11 17:01:13 +0000558 }
559
Chris Lattner2933f412007-07-23 06:14:36 +0000560 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Chris Lattner33116d62009-01-26 19:33:54 +0000561 OS.write(II->getName(), II->getLength());
562 } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
563 Tok.getLiteralData()) {
564 OS.write(Tok.getLiteralData(), Tok.getLength());
Chris Lattner2933f412007-07-23 06:14:36 +0000565 } else if (Tok.getLength() < 256) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000566 const char *TokPtr = Buffer;
567 unsigned Len = PP.getSpelling(Tok, TokPtr);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000568 OS.write(TokPtr, Len);
Reid Spencer5f016e22007-07-11 17:01:13 +0000569 } else {
570 std::string S = PP.getSpelling(Tok);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000571 OS.write(&S[0], S.size());
Reid Spencer5f016e22007-07-11 17:01:13 +0000572 }
573 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner6f688e12007-10-10 20:45:16 +0000574
575 if (Tok.is(tok::eof)) break;
576
577 PrevTok = Tok;
578 PP.Lex(Tok);
579 }
Chris Lattnere96de3e2008-08-17 03:12:02 +0000580 OS << '\n';
Reid Spencer5f016e22007-07-11 17:01:13 +0000581
Chris Lattner76b3a722008-08-17 07:07:01 +0000582 // Flush the ostream.
583 OS.flush();
Chris Lattnere96de3e2008-08-17 03:12:02 +0000584
585 // If an error occurred, remove the output file.
586 if (PP.getDiagnostics().hasErrorOccurred() && !OutFile.empty())
587 llvm::sys::Path(OutFile).eraseFromDisk();
Reid Spencer5f016e22007-07-11 17:01:13 +0000588}
589