blob: 8bd4e96ae3bafc8f211b2c6a9c8731e54ce42703 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
Chris Lattnerf73903a2009-02-06 06:45:26 +000016#include "clang/Lex/MacroInfo.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000017#include "clang/Lex/PPCallbacks.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/Pragma.h"
20#include "clang/Basic/SourceManager.h"
Chris Lattner5db17c92008-04-08 04:16:20 +000021#include "clang/Basic/Diagnostic.h"
Chris Lattnerd8e30832007-07-24 06:57:14 +000022#include "llvm/ADT/SmallString.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000023#include "llvm/ADT/StringExtras.h"
Chris Lattner5db17c92008-04-08 04:16:20 +000024#include "llvm/System/Path.h"
25#include "llvm/Support/CommandLine.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000026#include "llvm/Config/config.h"
Chris Lattnerdceb6a72008-08-17 01:47:12 +000027#include "llvm/Support/raw_ostream.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000028#include <cstdio>
29using namespace clang;
30
Reid Spencer5f016e22007-07-11 17:01:13 +000031//===----------------------------------------------------------------------===//
32// Preprocessed token printer
33//===----------------------------------------------------------------------===//
34
35static llvm::cl::opt<bool>
36DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
37static llvm::cl::opt<bool>
38EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
39static llvm::cl::opt<bool>
40EnableMacroCommentOutput("CC",
41 llvm::cl::desc("Enable comment output in -E mode, "
42 "even from macro expansions"));
Chris Lattnerf73903a2009-02-06 06:45:26 +000043static llvm::cl::opt<bool>
44DumpMacros("dM", llvm::cl::desc("Print macro definitions in -E mode instead of"
45 " normal output"));
46
Reid Spencer5f016e22007-07-11 17:01:13 +000047
48namespace {
49class PrintPPOutputPPCallbacks : public PPCallbacks {
50 Preprocessor &PP;
Chris Lattnere96de3e2008-08-17 03:12:02 +000051public:
52 llvm::raw_ostream &OS;
53private:
Reid Spencer5f016e22007-07-11 17:01:13 +000054 unsigned CurLine;
Reid Spencer5f016e22007-07-11 17:01:13 +000055 bool EmittedTokensOnThisLine;
Chris Lattner9d728512008-10-27 01:19:25 +000056 SrcMgr::CharacteristicKind FileType;
Chris Lattnerd8e30832007-07-24 06:57:14 +000057 llvm::SmallString<512> CurFilename;
Daniel Dunbar737bdb42008-09-05 03:22:57 +000058 bool Initialized;
Reid Spencer5f016e22007-07-11 17:01:13 +000059public:
Chris Lattnere96de3e2008-08-17 03:12:02 +000060 PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os)
61 : PP(pp), OS(os) {
Reid Spencer5f016e22007-07-11 17:01:13 +000062 CurLine = 0;
Chris Lattnerd8e30832007-07-24 06:57:14 +000063 CurFilename += "<uninit>";
Reid Spencer5f016e22007-07-11 17:01:13 +000064 EmittedTokensOnThisLine = false;
Chris Lattner0b9e7362008-09-26 21:18:42 +000065 FileType = SrcMgr::C_User;
Daniel Dunbar737bdb42008-09-05 03:22:57 +000066 Initialized = false;
Reid Spencer5f016e22007-07-11 17:01:13 +000067 }
68
69 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattnerf0f2b292007-07-23 06:09:34 +000070 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer5f016e22007-07-11 17:01:13 +000071
72 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
Chris Lattner9d728512008-10-27 01:19:25 +000073 SrcMgr::CharacteristicKind FileType);
Reid Spencer5f016e22007-07-11 17:01:13 +000074 virtual void Ident(SourceLocation Loc, const std::string &str);
Chris Lattnerc7d945d2009-01-16 19:25:54 +000075 virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
76 const std::string &Str);
77
Reid Spencer5f016e22007-07-11 17:01:13 +000078
Chris Lattner5f180322007-12-09 21:11:08 +000079 bool HandleFirstTokOnLine(Token &Tok);
80 bool MoveToLine(SourceLocation Loc);
Chris Lattnerd2177732007-07-20 16:59:19 +000081 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Daniel Dunbar737bdb42008-09-05 03:22:57 +000082 void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
Reid Spencer5f016e22007-07-11 17:01:13 +000083};
Chris Lattner5db17c92008-04-08 04:16:20 +000084} // end anonymous namespace
Reid Spencer5f016e22007-07-11 17:01:13 +000085
Daniel Dunbar737bdb42008-09-05 03:22:57 +000086void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
87 const char *Extra,
88 unsigned ExtraLen) {
89 if (EmittedTokensOnThisLine) {
90 OS << '\n';
91 EmittedTokensOnThisLine = false;
92 }
93
94 OS << '#' << ' ' << LineNo << ' ' << '"';
95 OS.write(&CurFilename[0], CurFilename.size());
96 OS << '"';
97
98 if (ExtraLen)
99 OS.write(Extra, ExtraLen);
100
Chris Lattner0b9e7362008-09-26 21:18:42 +0000101 if (FileType == SrcMgr::C_System)
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000102 OS.write(" 3", 2);
Chris Lattner0b9e7362008-09-26 21:18:42 +0000103 else if (FileType == SrcMgr::C_ExternCSystem)
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000104 OS.write(" 3 4", 4);
105 OS << '\n';
106}
107
Reid Spencer5f016e22007-07-11 17:01:13 +0000108/// MoveToLine - Move the output to the source line specified by the location
109/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner5f180322007-12-09 21:11:08 +0000110/// #line directive. This returns false if already at the specified line, true
111/// if some newlines were emitted.
112bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000113 unsigned LineNo = PP.getSourceManager().getInstantiationLineNumber(Loc);
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000114
Reid Spencer5f016e22007-07-11 17:01:13 +0000115 if (DisableLineMarkers) {
Chris Lattner5f180322007-12-09 21:11:08 +0000116 if (LineNo == CurLine) return false;
117
118 CurLine = LineNo;
119
120 if (!EmittedTokensOnThisLine)
121 return true;
122
Chris Lattnere96de3e2008-08-17 03:12:02 +0000123 OS << '\n';
Chris Lattner5f180322007-12-09 21:11:08 +0000124 EmittedTokensOnThisLine = false;
125 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000126 }
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000127
Reid Spencer5f016e22007-07-11 17:01:13 +0000128 // If this line is "close enough" to the original line, just print newlines,
129 // otherwise print a #line directive.
Daniel Dunbarfd966842008-09-26 01:13:35 +0000130 if (LineNo-CurLine <= 8) {
Chris Lattner822f9402007-07-23 05:14:05 +0000131 if (LineNo-CurLine == 1)
Chris Lattnere96de3e2008-08-17 03:12:02 +0000132 OS << '\n';
Chris Lattner5f180322007-12-09 21:11:08 +0000133 else if (LineNo == CurLine)
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000134 return false; // Spelling line moved, but instantiation line didn't.
Chris Lattner822f9402007-07-23 05:14:05 +0000135 else {
136 const char *NewLines = "\n\n\n\n\n\n\n\n";
Chris Lattnere96de3e2008-08-17 03:12:02 +0000137 OS.write(NewLines, LineNo-CurLine);
Chris Lattner822f9402007-07-23 05:14:05 +0000138 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000139 } else {
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000140 WriteLineInfo(LineNo, 0, 0);
Reid Spencer5f016e22007-07-11 17:01:13 +0000141 }
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000142
143 CurLine = LineNo;
Chris Lattner5f180322007-12-09 21:11:08 +0000144 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000145}
146
147
148/// FileChanged - Whenever the preprocessor enters or exits a #include file
149/// it invokes this handler. Update our conception of the current source
150/// position.
151void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
152 FileChangeReason Reason,
Chris Lattner9d728512008-10-27 01:19:25 +0000153 SrcMgr::CharacteristicKind NewFileType) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000154 // Unless we are exiting a #include, make sure to skip ahead to the line the
155 // #include directive was at.
156 SourceManager &SourceMgr = PP.getSourceManager();
157 if (Reason == PPCallbacks::EnterFile) {
Chris Lattner71d8bfb2009-01-30 18:44:17 +0000158 SourceLocation IncludeLoc = SourceMgr.getPresumedLoc(Loc).getIncludeLoc();
159 if (IncludeLoc.isValid())
160 MoveToLine(IncludeLoc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000161 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
162 MoveToLine(Loc);
163
164 // TODO GCC emits the # directive for this directive on the line AFTER the
165 // directive and emits a bunch of spaces that aren't needed. Emulate this
166 // strange behavior.
167 }
168
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000169 Loc = SourceMgr.getInstantiationLoc(Loc);
Chris Lattner30fc9332009-02-04 01:06:56 +0000170 CurLine = SourceMgr.getInstantiationLineNumber(Loc);
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000171
Chris Lattner5f180322007-12-09 21:11:08 +0000172 if (DisableLineMarkers) return;
173
Chris Lattnerd8e30832007-07-24 06:57:14 +0000174 CurFilename.clear();
Chris Lattnerb9c3f962009-01-27 07:57:44 +0000175 CurFilename += SourceMgr.getPresumedLoc(Loc).getFilename();
Chris Lattnerd8e30832007-07-24 06:57:14 +0000176 Lexer::Stringify(CurFilename);
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000177 FileType = NewFileType;
178
179 if (!Initialized) {
180 WriteLineInfo(CurLine);
181 Initialized = true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000182 }
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000183
Reid Spencer5f016e22007-07-11 17:01:13 +0000184 switch (Reason) {
185 case PPCallbacks::EnterFile:
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000186 WriteLineInfo(CurLine, " 1", 2);
Reid Spencer5f016e22007-07-11 17:01:13 +0000187 break;
188 case PPCallbacks::ExitFile:
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000189 WriteLineInfo(CurLine, " 2", 2);
Reid Spencer5f016e22007-07-11 17:01:13 +0000190 break;
Daniel Dunbar737bdb42008-09-05 03:22:57 +0000191 case PPCallbacks::SystemHeaderPragma:
192 case PPCallbacks::RenameFile:
193 WriteLineInfo(CurLine);
194 break;
Reid Spencer5f016e22007-07-11 17:01:13 +0000195 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000196}
197
Chris Lattnerc7d945d2009-01-16 19:25:54 +0000198/// Ident - Handle #ident directives when read by the preprocessor.
Reid Spencer5f016e22007-07-11 17:01:13 +0000199///
200void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
201 MoveToLine(Loc);
202
Chris Lattnere96de3e2008-08-17 03:12:02 +0000203 OS.write("#ident ", strlen("#ident "));
204 OS.write(&S[0], S.size());
Reid Spencer5f016e22007-07-11 17:01:13 +0000205 EmittedTokensOnThisLine = true;
206}
207
Chris Lattnerc7d945d2009-01-16 19:25:54 +0000208void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
209 const IdentifierInfo *Kind,
210 const std::string &Str) {
211 MoveToLine(Loc);
212 OS << "#pragma comment(" << Kind->getName();
213
214 if (!Str.empty()) {
215 OS << ", \"";
216
217 for (unsigned i = 0, e = Str.size(); i != e; ++i) {
218 unsigned char Char = Str[i];
Chris Lattner52a3e9e2009-01-16 22:13:37 +0000219 if (isprint(Char) && Char != '\\' && Char != '"')
Chris Lattnerc7d945d2009-01-16 19:25:54 +0000220 OS << (char)Char;
221 else // Output anything hard as an octal escape.
222 OS << '\\'
223 << (char)('0'+ ((Char >> 6) & 7))
224 << (char)('0'+ ((Char >> 3) & 7))
225 << (char)('0'+ ((Char >> 0) & 7));
226 }
227 OS << '"';
228 }
229
230 OS << ')';
231 EmittedTokensOnThisLine = true;
232}
233
234
Reid Spencer5f016e22007-07-11 17:01:13 +0000235/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner5f180322007-12-09 21:11:08 +0000236/// is called for the first token on each new line. If this really is the start
237/// of a new logical line, handle it and return true, otherwise return false.
238/// This may not be the start of a logical line because the "start of line"
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000239/// marker is set for spelling lines, not instantiation ones.
Chris Lattner5f180322007-12-09 21:11:08 +0000240bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000241 // Figure out what line we went to and insert the appropriate number of
242 // newline characters.
Chris Lattner5f180322007-12-09 21:11:08 +0000243 if (!MoveToLine(Tok.getLocation()))
244 return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000245
246 // Print out space characters so that the first token on a line is
247 // indented for easy reading.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000248 const SourceManager &SourceMgr = PP.getSourceManager();
Chris Lattnerf7cf85b2009-01-16 07:36:28 +0000249 unsigned ColNo = SourceMgr.getInstantiationColumnNumber(Tok.getLocation());
Reid Spencer5f016e22007-07-11 17:01:13 +0000250
251 // This hack prevents stuff like:
252 // #define HASH #
253 // HASH define foo bar
254 // From having the # character end up at column 1, which makes it so it
255 // is not handled as a #define next time through the preprocessor if in
256 // -fpreprocessed mode.
Chris Lattner057aaf62007-10-09 18:03:42 +0000257 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattnere96de3e2008-08-17 03:12:02 +0000258 OS << ' ';
Reid Spencer5f016e22007-07-11 17:01:13 +0000259
260 // Otherwise, indent the appropriate number of spaces.
261 for (; ColNo > 1; --ColNo)
Chris Lattnere96de3e2008-08-17 03:12:02 +0000262 OS << ' ';
Chris Lattner5f180322007-12-09 21:11:08 +0000263
264 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000265}
266
267namespace {
268struct UnknownPragmaHandler : public PragmaHandler {
269 const char *Prefix;
270 PrintPPOutputPPCallbacks *Callbacks;
271
272 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
273 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattnerd2177732007-07-20 16:59:19 +0000274 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000275 // Figure out what line we went to and insert the appropriate number of
276 // newline characters.
277 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattnere96de3e2008-08-17 03:12:02 +0000278 Callbacks->OS.write(Prefix, strlen(Prefix));
Reid Spencer5f016e22007-07-11 17:01:13 +0000279
280 // Read and print all of the pragma tokens.
Chris Lattner057aaf62007-10-09 18:03:42 +0000281 while (PragmaTok.isNot(tok::eom)) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000282 if (PragmaTok.hasLeadingSpace())
Chris Lattnere96de3e2008-08-17 03:12:02 +0000283 Callbacks->OS << ' ';
Reid Spencer5f016e22007-07-11 17:01:13 +0000284 std::string TokSpell = PP.getSpelling(PragmaTok);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000285 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
Reid Spencer5f016e22007-07-11 17:01:13 +0000286 PP.LexUnexpandedToken(PragmaTok);
287 }
Chris Lattnere96de3e2008-08-17 03:12:02 +0000288 Callbacks->OS << '\n';
Reid Spencer5f016e22007-07-11 17:01:13 +0000289 }
290};
291} // end anonymous namespace
292
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000293
294enum AvoidConcatInfo {
295 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
296 /// ',', ')', etc) don't cause a problem when concatenated.
297 aci_never_avoid_concat = 0,
298
299 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
300 /// token's requirements, and it needs to know the first character of the
301 /// token.
302 aci_custom_firstchar = 1,
303
304 /// aci_custom - AvoidConcat contains custom code to handle this token's
305 /// requirements, but it doesn't need to know the first character of the
306 /// token.
307 aci_custom = 2,
308
309 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
310 /// character. For example, "<<" turns into "<<=" when followed by an =.
311 aci_avoid_equal = 4
312};
313
314/// This array contains information for each token on what action to take when
315/// avoiding concatenation of tokens in the AvoidConcat method.
316static char TokenInfo[tok::NUM_TOKENS];
317
318/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
319/// marked by this function.
320static void InitAvoidConcatTokenInfo() {
321 // These tokens have custom code in AvoidConcat.
322 TokenInfo[tok::identifier ] |= aci_custom;
323 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
324 TokenInfo[tok::period ] |= aci_custom_firstchar;
325 TokenInfo[tok::amp ] |= aci_custom_firstchar;
326 TokenInfo[tok::plus ] |= aci_custom_firstchar;
327 TokenInfo[tok::minus ] |= aci_custom_firstchar;
328 TokenInfo[tok::slash ] |= aci_custom_firstchar;
329 TokenInfo[tok::less ] |= aci_custom_firstchar;
330 TokenInfo[tok::greater ] |= aci_custom_firstchar;
331 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
332 TokenInfo[tok::percent ] |= aci_custom_firstchar;
333 TokenInfo[tok::colon ] |= aci_custom_firstchar;
334 TokenInfo[tok::hash ] |= aci_custom_firstchar;
335 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
336
337 // These tokens change behavior if followed by an '='.
338 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
339 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
340 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
341 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
342 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
343 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
344 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
345 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
346 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
347 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
348 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
349 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
350 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
351 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
352}
353
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000354/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000355static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000356 if (!Tok.needsCleaning()) {
357 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnerdf7c17a2009-01-16 07:00:02 +0000358 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000359 == 'L';
360 }
361
362 if (Tok.getLength() < 256) {
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000363 char Buffer[256];
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000364 const char *TokPtr = Buffer;
365 PP.getSpelling(Tok, TokPtr);
366 return TokPtr[0] == 'L';
367 }
368
369 return PP.getSpelling(Tok)[0] == 'L';
370}
371
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000372/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
373static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
374 if (!Tok.needsCleaning()) {
375 if (Tok.getLength() != 1)
376 return false;
377 SourceManager &SrcMgr = PP.getSourceManager();
Chris Lattnerdf7c17a2009-01-16 07:00:02 +0000378 return *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()))
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000379 == 'L';
380 }
381
382 if (Tok.getLength() < 256) {
383 char Buffer[256];
384 const char *TokPtr = Buffer;
385 if (PP.getSpelling(Tok, TokPtr) != 1)
386 return false;
387 return TokPtr[0] == 'L';
388 }
389
390 return PP.getSpelling(Tok) == "L";
391}
392
393
Reid Spencer5f016e22007-07-11 17:01:13 +0000394/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
395/// the two individual tokens to be lexed as a single token, return true (which
396/// causes a space to be printed between them). This allows the output of -E
397/// mode to be lexed to the same token stream as lexing the input directly
398/// would.
399///
400/// This code must conservatively return true if it doesn't want to be 100%
401/// accurate. This will cause the output to include extra space characters, but
402/// the resulting output won't have incorrect concatenations going on. Examples
403/// include "..", which we print with a space between, because we don't want to
404/// track enough to tell "x.." from "...".
Chris Lattnerd2177732007-07-20 16:59:19 +0000405bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
406 const Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000407 char Buffer[256];
408
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000409 tok::TokenKind PrevKind = PrevTok.getKind();
410 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
411 PrevKind = tok::identifier;
412
413 // Look up information on when we should avoid concatenation with prevtok.
414 unsigned ConcatInfo = TokenInfo[PrevKind];
415
416 // If prevtok never causes a problem for anything after it, return quickly.
417 if (ConcatInfo == 0) return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000418
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000419 if (ConcatInfo & aci_avoid_equal) {
420 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner057aaf62007-10-09 18:03:42 +0000421 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000422 return true;
Chris Lattnerb638a302007-07-23 23:21:34 +0000423 ConcatInfo &= ~aci_avoid_equal;
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000424 }
425
426 if (ConcatInfo == 0) return false;
427
428
429
Reid Spencer5f016e22007-07-11 17:01:13 +0000430 // Basic algorithm: we look at the first character of the second token, and
431 // determine whether it, if appended to the first token, would form (or would
432 // contribute) to a larger token if concatenated.
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000433 char FirstChar = 0;
434 if (ConcatInfo & aci_custom) {
435 // If the token does not need to know the first character, don't get it.
436 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000437 // Avoid spelling identifiers, the most common form of token.
438 FirstChar = II->getName()[0];
Chris Lattnerb19f5e82007-07-23 05:18:42 +0000439 } else if (!Tok.needsCleaning()) {
Chris Lattner33116d62009-01-26 19:33:54 +0000440 if (Tok.isLiteral() && Tok.getLiteralData()) {
441 FirstChar = *Tok.getLiteralData();
442 } else {
443 SourceManager &SrcMgr = PP.getSourceManager();
444 FirstChar =
445 *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
446 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000447 } else if (Tok.getLength() < 256) {
448 const char *TokPtr = Buffer;
449 PP.getSpelling(Tok, TokPtr);
450 FirstChar = TokPtr[0];
451 } else {
452 FirstChar = PP.getSpelling(Tok)[0];
453 }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000454
Reid Spencer5f016e22007-07-11 17:01:13 +0000455 switch (PrevKind) {
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000456 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer5f016e22007-07-11 17:01:13 +0000457 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner057aaf62007-10-09 18:03:42 +0000458 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
459 Tok.is(tok::wide_string_literal) /* ||
460 Tok.is(tok::wide_char_literal)*/)
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000461 return true;
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000462
463 // If this isn't identifier + string, we're done.
464 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000465 return false;
466
467 // FIXME: need a wide_char_constant!
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000468
469 // If the string was a wide string L"foo" or wide char L'f', it would concat
470 // with the previous identifier into fooL"bar". Avoid this.
471 if (StartsWithL(Tok, PP))
472 return true;
473
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000474 // Otherwise, this is a narrow character or string. If the *identifier* is
475 // a literal 'L', avoid pasting L "foo" -> L"foo".
476 return IsIdentifierL(PrevTok, PP);
Reid Spencer5f016e22007-07-11 17:01:13 +0000477 case tok::numeric_constant:
Chris Lattner057aaf62007-10-09 18:03:42 +0000478 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Reid Spencer5f016e22007-07-11 17:01:13 +0000479 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
480 case tok::period: // ..., .*, .1234
Chris Lattnerd7a7c002009-01-11 19:48:19 +0000481 return FirstChar == '.' || isdigit(FirstChar) ||
482 (FirstChar == '*' && PP.getLangOptions().CPlusPlus);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000483 case tok::amp: // &&
484 return FirstChar == '&';
485 case tok::plus: // ++
486 return FirstChar == '+';
487 case tok::minus: // --, ->, ->*
488 return FirstChar == '-' || FirstChar == '>';
489 case tok::slash: //, /*, //
490 return FirstChar == '*' || FirstChar == '/';
491 case tok::less: // <<, <<=, <:, <%
492 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
493 case tok::greater: // >>, >>=
494 return FirstChar == '>';
495 case tok::pipe: // ||
496 return FirstChar == '|';
497 case tok::percent: // %>, %:
Chris Lattnerd7a7c002009-01-11 19:48:19 +0000498 return (FirstChar == '>' || FirstChar == ':') &&
499 PP.getLangOptions().Digraphs;
Reid Spencer5f016e22007-07-11 17:01:13 +0000500 case tok::colon: // ::, :>
Chris Lattnerd7a7c002009-01-11 19:48:19 +0000501 return (FirstChar == ':' && PP.getLangOptions().CPlusPlus) ||
502 (FirstChar == '>' && PP.getLangOptions().Digraphs);
Reid Spencer5f016e22007-07-11 17:01:13 +0000503 case tok::hash: // ##, #@, %:%:
504 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
505 case tok::arrow: // ->*
506 return FirstChar == '*';
Reid Spencer5f016e22007-07-11 17:01:13 +0000507 }
508}
509
Chris Lattner59076ab2009-02-06 05:56:11 +0000510static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
511 PrintPPOutputPPCallbacks *Callbacks,
512 llvm::raw_ostream &OS) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000513 char Buffer[256];
Chris Lattner59076ab2009-02-06 05:56:11 +0000514 Token PrevTok;
Chris Lattner6f688e12007-10-10 20:45:16 +0000515 while (1) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000516
517 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner5f180322007-12-09 21:11:08 +0000518 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
519 // done.
Reid Spencer5f016e22007-07-11 17:01:13 +0000520 } else if (Tok.hasLeadingSpace() ||
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000521 // If we haven't emitted a token on this line yet, PrevTok isn't
522 // useful to look at and no concatenation could happen anyway.
Chris Lattnerb638a302007-07-23 23:21:34 +0000523 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000524 // Don't print "-" next to "-", it would form "--".
525 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattnere96de3e2008-08-17 03:12:02 +0000526 OS << ' ';
Reid Spencer5f016e22007-07-11 17:01:13 +0000527 }
528
Chris Lattner2933f412007-07-23 06:14:36 +0000529 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Chris Lattner33116d62009-01-26 19:33:54 +0000530 OS.write(II->getName(), II->getLength());
531 } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
532 Tok.getLiteralData()) {
533 OS.write(Tok.getLiteralData(), Tok.getLength());
Chris Lattner2933f412007-07-23 06:14:36 +0000534 } else if (Tok.getLength() < 256) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000535 const char *TokPtr = Buffer;
536 unsigned Len = PP.getSpelling(Tok, TokPtr);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000537 OS.write(TokPtr, Len);
Reid Spencer5f016e22007-07-11 17:01:13 +0000538 } else {
539 std::string S = PP.getSpelling(Tok);
Chris Lattnere96de3e2008-08-17 03:12:02 +0000540 OS.write(&S[0], S.size());
Reid Spencer5f016e22007-07-11 17:01:13 +0000541 }
542 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner6f688e12007-10-10 20:45:16 +0000543
544 if (Tok.is(tok::eof)) break;
Chris Lattner59076ab2009-02-06 05:56:11 +0000545
Chris Lattner6f688e12007-10-10 20:45:16 +0000546 PrevTok = Tok;
547 PP.Lex(Tok);
548 }
Chris Lattner59076ab2009-02-06 05:56:11 +0000549}
550
Chris Lattnerf73903a2009-02-06 06:45:26 +0000551/// PrintMacroDefinition - Print a macro definition in a form that will be
552/// properly accepted back as a definition.
553static void PrintMacroDefinition(IdentifierInfo &II, const MacroInfo &MI,
554 Preprocessor &PP, llvm::raw_ostream &OS) {
555 // Ignore computed macros like __LINE__ and friends.
556 if (MI.isBuiltinMacro()) return;
557 OS << "#define " << II.getName();
558
559 if (MI.isFunctionLike()) {
560 OS << '(';
561 if (MI.arg_empty())
562 ;
563 else if (MI.getNumArgs() == 1)
564 OS << (*MI.arg_begin())->getName();
565 else {
566 MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
567 OS << (*AI++)->getName();
568 while (AI != E)
569 OS << ',' << (*AI++)->getName();
570 }
571
572 if (MI.isVariadic()) {
573 if (!MI.arg_empty())
574 OS << ',';
575 OS << "...";
576 }
577 OS << ')';
578 }
579
580 // GCC always emits a space, even if the macro body is empty. However, do not
581 // want to emit two spaces if the first token has a leading space.
582 if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
583 OS << ' ';
584
585 for (MacroInfo::tokens_iterator I = MI.tokens_begin(), E = MI.tokens_end();
586 I != E; ++I) {
587 if (I->hasLeadingSpace())
588 OS << ' ';
589 OS << PP.getSpelling(*I);
590 }
591 OS << "\n";
592}
593
Chris Lattner59076ab2009-02-06 05:56:11 +0000594
595/// DoPrintPreprocessedInput - This implements -E mode.
596///
597void clang::DoPrintPreprocessedInput(Preprocessor &PP,
598 const std::string &OutFile) {
599 // Inform the preprocessor whether we want it to retain comments or not, due
600 // to -C or -CC.
601 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
602 InitAvoidConcatTokenInfo();
603
604
605 // Open the output buffer.
606 std::string Err;
607 llvm::raw_fd_ostream OS(OutFile.empty() ? "-" : OutFile.c_str(), false, Err);
608 if (!Err.empty()) {
609 fprintf(stderr, "%s\n", Err.c_str());
610 exit(1);
611 }
612
613 OS.SetBufferSize(64*1024);
614
Chris Lattnerf73903a2009-02-06 06:45:26 +0000615 if (DumpMacros) {
616 // -dM mode just scans and ignores all tokens in the files, then dumps out
617 // the macro table at the end.
618 PP.EnterMainSourceFile();
619
620 Token Tok;
621 do PP.Lex(Tok);
622 while (Tok.isNot(tok::eof));
623
624 for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
625 I != E; ++I)
626 PrintMacroDefinition(*I->first, *I->second, PP, OS);
627
628 } else {
629 PrintPPOutputPPCallbacks *Callbacks;
630 Callbacks = new PrintPPOutputPPCallbacks(PP, OS);
631 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
632 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",
633 Callbacks));
Chris Lattner59076ab2009-02-06 05:56:11 +0000634
Chris Lattnerf73903a2009-02-06 06:45:26 +0000635 PP.setPPCallbacks(Callbacks);
Chris Lattner59076ab2009-02-06 05:56:11 +0000636
Chris Lattnerf73903a2009-02-06 06:45:26 +0000637 // After we have configured the preprocessor, enter the main file.
638 PP.EnterMainSourceFile();
Chris Lattner59076ab2009-02-06 05:56:11 +0000639
Chris Lattnerf73903a2009-02-06 06:45:26 +0000640 // Consume all of the tokens that come from the predefines buffer. Those
641 // should not be emitted into the output and are guaranteed to be at the
642 // start.
643 const SourceManager &SourceMgr = PP.getSourceManager();
644 Token Tok;
645 do PP.Lex(Tok);
646 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
647 !strcmp(SourceMgr.getPresumedLoc(Tok.getLocation()).getFilename(),
648 "<predefines>"));
Chris Lattner59076ab2009-02-06 05:56:11 +0000649
Chris Lattnerf73903a2009-02-06 06:45:26 +0000650 // Read all the preprocessed tokens, printing them out to the stream.
651 PrintPreprocessedTokens(PP, Tok, Callbacks, OS);
652 OS << '\n';
653 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000654
Chris Lattner76b3a722008-08-17 07:07:01 +0000655 // Flush the ostream.
656 OS.flush();
Chris Lattnere96de3e2008-08-17 03:12:02 +0000657
658 // If an error occurred, remove the output file.
659 if (PP.getDiagnostics().hasErrorOccurred() && !OutFile.empty())
660 llvm::sys::Path(OutFile).eraseFromDisk();
Reid Spencer5f016e22007-07-11 17:01:13 +0000661}
662