blob: 3035b654643441a2242032b37f55097415f386ba [file] [log] [blame]
Chris Lattner09e3cdf2006-07-04 19:04:05 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner5b12ab82007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner09e3cdf2006-07-04 19:04:05 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +000016#include "clang/Lex/PPCallbacks.h"
Chris Lattner09e3cdf2006-07-04 19:04:05 +000017#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner21632652008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattner4c4a2452007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Chris Lattnerf46be6c2006-07-04 22:19:33 +000022#include "llvm/ADT/StringExtras.h"
Chris Lattner21632652008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Chris Lattnerf46be6c2006-07-04 22:19:33 +000025#include "llvm/Config/config.h"
Chris Lattnerb5a92f82008-08-17 01:47:12 +000026#include "llvm/Support/raw_ostream.h"
Chris Lattnerdeb37012006-07-04 19:24:06 +000027#include <cstdio>
Chris Lattner09e3cdf2006-07-04 19:04:05 +000028using namespace clang;
29
Chris Lattner21632652008-04-08 04:16:20 +000030static std::string OutputFilename;
Chris Lattnerb5a92f82008-08-17 01:47:12 +000031static llvm::raw_ostream *OutStream;
Chris Lattnerf46be6c2006-07-04 22:19:33 +000032
33/// InitOutputBuffer - Initialize our output buffer.
34///
Chris Lattner02bf4442008-01-27 23:55:11 +000035static void InitOutputBuffer(const std::string& Output) {
Chris Lattnerb5a92f82008-08-17 01:47:12 +000036 if (!Output.size() || Output == "-") {
37 OutputFilename = "<stdout>";
38 OutStream = new llvm::raw_stdout_ostream();
39 } else {
Chris Lattner21632652008-04-08 04:16:20 +000040 OutputFilename = Output;
Chris Lattnerb5a92f82008-08-17 01:47:12 +000041 std::string Err;
42 OutStream = new llvm::raw_fd_ostream(Output.c_str(), Err);
Chris Lattner47fb9ef2008-04-11 06:14:11 +000043
Chris Lattnerb5a92f82008-08-17 01:47:12 +000044 if (!Err.empty()) {
45 delete OutStream;
46 fprintf(stderr, "%s\n", Err.c_str());
Chris Lattner47fb9ef2008-04-11 06:14:11 +000047 exit(1);
48 }
Chris Lattner21632652008-04-08 04:16:20 +000049 }
Chris Lattnerb5a92f82008-08-17 01:47:12 +000050 OutStream->SetBufferSize(64*1024);
Chris Lattnerf46be6c2006-07-04 22:19:33 +000051}
52
Chris Lattnerf46be6c2006-07-04 22:19:33 +000053/// CleanupOutputBuffer - Finish up output.
54///
Chris Lattner21632652008-04-08 04:16:20 +000055static void CleanupOutputBuffer(bool ErrorOccurred) {
Chris Lattnerb5a92f82008-08-17 01:47:12 +000056 delete OutStream;
Chris Lattner21632652008-04-08 04:16:20 +000057
58 // If an error occurred, remove the output file.
59 if (ErrorOccurred && !OutputFilename.empty())
60 llvm::sys::Path(OutputFilename).eraseFromDisk();
Chris Lattnerf46be6c2006-07-04 22:19:33 +000061}
62
Chris Lattnerb5a92f82008-08-17 01:47:12 +000063static inline void OutputChar(char c) {
64 *OutStream << c;
Chris Lattnerf46be6c2006-07-04 22:19:33 +000065}
66
Chris Lattnerb5a92f82008-08-17 01:47:12 +000067static inline void OutputString(const char *Ptr, unsigned Size) {
68 OutStream->write(Ptr, Size);
Chris Lattnerf46be6c2006-07-04 22:19:33 +000069}
70
71
72//===----------------------------------------------------------------------===//
73// Preprocessed token printer
74//===----------------------------------------------------------------------===//
75
Chris Lattner23b7eb62007-06-15 23:05:46 +000076static llvm::cl::opt<bool>
77DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
78static llvm::cl::opt<bool>
79EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
80static llvm::cl::opt<bool>
81EnableMacroCommentOutput("CC",
82 llvm::cl::desc("Enable comment output in -E mode, "
Chris Lattner457fc152006-07-29 06:30:25 +000083 "even from macro expansions"));
Chris Lattner09e3cdf2006-07-04 19:04:05 +000084
Chris Lattner87f267e2006-11-21 05:02:33 +000085namespace {
86class PrintPPOutputPPCallbacks : public PPCallbacks {
87 Preprocessor &PP;
88 unsigned CurLine;
Chris Lattner87f267e2006-11-21 05:02:33 +000089 bool EmittedTokensOnThisLine;
90 DirectoryLookup::DirType FileType;
Chris Lattner4c4a2452007-07-24 06:57:14 +000091 llvm::SmallString<512> CurFilename;
Chris Lattner87f267e2006-11-21 05:02:33 +000092public:
93 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
94 CurLine = 0;
Chris Lattner4c4a2452007-07-24 06:57:14 +000095 CurFilename += "<uninit>";
Chris Lattner87f267e2006-11-21 05:02:33 +000096 EmittedTokensOnThisLine = false;
97 FileType = DirectoryLookup::NormalHeaderDir;
98 }
99
100 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattner4418ce12007-07-23 06:09:34 +0000101 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Chris Lattner87f267e2006-11-21 05:02:33 +0000102
103 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
104 DirectoryLookup::DirType FileType);
105 virtual void Ident(SourceLocation Loc, const std::string &str);
106
107
Chris Lattner3ed83c12007-12-09 21:11:08 +0000108 bool HandleFirstTokOnLine(Token &Tok);
109 bool MoveToLine(SourceLocation Loc);
Chris Lattner146762e2007-07-20 16:59:19 +0000110 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Chris Lattner87f267e2006-11-21 05:02:33 +0000111};
Chris Lattner21632652008-04-08 04:16:20 +0000112} // end anonymous namespace
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000113
Chris Lattner5cdfebb2007-07-23 06:31:11 +0000114/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
115/// endptr points to the end of the buffer.
116static char *UToStr(unsigned N, char *EndPtr) {
117 // Null terminate the buffer.
118 *--EndPtr = '\0';
119 if (N == 0) // Zero is a special case.
120 *--EndPtr = '0';
121 while (N) {
122 *--EndPtr = '0' + char(N % 10);
123 N /= 10;
124 }
125 return EndPtr;
126}
127
128
Chris Lattner728b4dc2006-07-04 21:28:37 +0000129/// MoveToLine - Move the output to the source line specified by the location
130/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner3ed83c12007-12-09 21:11:08 +0000131/// #line directive. This returns false if already at the specified line, true
132/// if some newlines were emitted.
133bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000134 if (DisableLineMarkers) {
Chris Lattner3ed83c12007-12-09 21:11:08 +0000135 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
136 if (LineNo == CurLine) return false;
137
138 CurLine = LineNo;
139
140 if (!EmittedTokensOnThisLine)
141 return true;
142
143 OutputChar('\n');
144 EmittedTokensOnThisLine = false;
145 return true;
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000146 }
Chris Lattner87f267e2006-11-21 05:02:33 +0000147
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000148 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Chris Lattner3338ba82006-07-04 21:19:39 +0000149
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000150 // If this line is "close enough" to the original line, just print newlines,
151 // otherwise print a #line directive.
Chris Lattner87f267e2006-11-21 05:02:33 +0000152 if (LineNo-CurLine < 8) {
Chris Lattner5f075822007-07-23 05:14:05 +0000153 if (LineNo-CurLine == 1)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000154 OutputChar('\n');
Chris Lattner3ed83c12007-12-09 21:11:08 +0000155 else if (LineNo == CurLine)
156 return false; // Phys line moved, but logical line didn't.
Chris Lattner5f075822007-07-23 05:14:05 +0000157 else {
158 const char *NewLines = "\n\n\n\n\n\n\n\n";
159 OutputString(NewLines, LineNo-CurLine);
Chris Lattner5f075822007-07-23 05:14:05 +0000160 }
Chris Lattner84f45072007-12-09 20:45:43 +0000161 CurLine = LineNo;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000162 } else {
Chris Lattner87f267e2006-11-21 05:02:33 +0000163 if (EmittedTokensOnThisLine) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000164 OutputChar('\n');
Chris Lattner87f267e2006-11-21 05:02:33 +0000165 EmittedTokensOnThisLine = false;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000166 }
167
Chris Lattner87f267e2006-11-21 05:02:33 +0000168 CurLine = LineNo;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000169
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000170 OutputChar('#');
171 OutputChar(' ');
Chris Lattner5cdfebb2007-07-23 06:31:11 +0000172 char NumberBuffer[20];
173 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
174 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000175 OutputChar(' ');
Chris Lattner9b796242007-07-22 06:38:50 +0000176 OutputChar('"');
Chris Lattner87f267e2006-11-21 05:02:33 +0000177 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner9b796242007-07-22 06:38:50 +0000178 OutputChar('"');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000179
Chris Lattner87f267e2006-11-21 05:02:33 +0000180 if (FileType == DirectoryLookup::SystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000181 OutputString(" 3", 2);
Chris Lattner87f267e2006-11-21 05:02:33 +0000182 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000183 OutputString(" 3 4", 4);
184 OutputChar('\n');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000185 }
Chris Lattner3ed83c12007-12-09 21:11:08 +0000186 return true;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000187}
188
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000189
190/// FileChanged - Whenever the preprocessor enters or exits a #include file
191/// it invokes this handler. Update our conception of the current source
192/// position.
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000193void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
194 FileChangeReason Reason,
195 DirectoryLookup::DirType FileType) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000196 // Unless we are exiting a #include, make sure to skip ahead to the line the
197 // #include directive was at.
Chris Lattner87f267e2006-11-21 05:02:33 +0000198 SourceManager &SourceMgr = PP.getSourceManager();
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000199 if (Reason == PPCallbacks::EnterFile) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000200 MoveToLine(SourceMgr.getIncludeLoc(Loc));
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000201 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
Chris Lattner3338ba82006-07-04 21:19:39 +0000202 MoveToLine(Loc);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000203
204 // TODO GCC emits the # directive for this directive on the line AFTER the
205 // directive and emits a bunch of spaces that aren't needed. Emulate this
206 // strange behavior.
207 }
208
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000209 Loc = SourceMgr.getLogicalLoc(Loc);
Chris Lattner87f267e2006-11-21 05:02:33 +0000210 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner3ed83c12007-12-09 21:11:08 +0000211
212 if (DisableLineMarkers) return;
213
Chris Lattner4c4a2452007-07-24 06:57:14 +0000214 CurFilename.clear();
215 CurFilename += SourceMgr.getSourceName(Loc);
216 Lexer::Stringify(CurFilename);
Chris Lattner87f267e2006-11-21 05:02:33 +0000217 FileType = FileType;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000218
Chris Lattner87f267e2006-11-21 05:02:33 +0000219 if (EmittedTokensOnThisLine) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000220 OutputChar('\n');
Chris Lattner87f267e2006-11-21 05:02:33 +0000221 EmittedTokensOnThisLine = false;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000222 }
223
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000224 OutputChar('#');
225 OutputChar(' ');
Chris Lattner1af0e012007-07-24 06:59:01 +0000226
227 char NumberBuffer[20];
228 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
229 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000230 OutputChar(' ');
Chris Lattner9b796242007-07-22 06:38:50 +0000231 OutputChar('"');
Chris Lattner87f267e2006-11-21 05:02:33 +0000232 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner9b796242007-07-22 06:38:50 +0000233 OutputChar('"');
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000234
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000235 switch (Reason) {
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000236 case PPCallbacks::EnterFile:
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000237 OutputString(" 1", 2);
Chris Lattner3338ba82006-07-04 21:19:39 +0000238 break;
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000239 case PPCallbacks::ExitFile:
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000240 OutputString(" 2", 2);
Chris Lattner3338ba82006-07-04 21:19:39 +0000241 break;
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000242 case PPCallbacks::SystemHeaderPragma: break;
243 case PPCallbacks::RenameFile: break;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000244 }
245
246 if (FileType == DirectoryLookup::SystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000247 OutputString(" 3", 2);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000248 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000249 OutputString(" 3 4", 4);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000250
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000251 OutputChar('\n');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000252}
253
Chris Lattner728b4dc2006-07-04 21:28:37 +0000254/// HandleIdent - Handle #ident directives when read by the preprocessor.
255///
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000256void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
Chris Lattner3338ba82006-07-04 21:19:39 +0000257 MoveToLine(Loc);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000258
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000259 OutputString("#ident ", strlen("#ident "));
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000260 OutputString(&S[0], S.size());
Chris Lattner87f267e2006-11-21 05:02:33 +0000261 EmittedTokensOnThisLine = true;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000262}
263
264/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner3ed83c12007-12-09 21:11:08 +0000265/// is called for the first token on each new line. If this really is the start
266/// of a new logical line, handle it and return true, otherwise return false.
267/// This may not be the start of a logical line because the "start of line"
268/// marker is set for physical lines, not logical ones.
269bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000270 // Figure out what line we went to and insert the appropriate number of
271 // newline characters.
Chris Lattner3ed83c12007-12-09 21:11:08 +0000272 if (!MoveToLine(Tok.getLocation()))
273 return false;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000274
275 // Print out space characters so that the first token on a line is
276 // indented for easy reading.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000277 const SourceManager &SourceMgr = PP.getSourceManager();
278 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000279
280 // This hack prevents stuff like:
281 // #define HASH #
282 // HASH define foo bar
283 // From having the # character end up at column 1, which makes it so it
284 // is not handled as a #define next time through the preprocessor if in
285 // -fpreprocessed mode.
Chris Lattner3c69f122007-10-09 18:03:42 +0000286 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000287 OutputChar(' ');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000288
289 // Otherwise, indent the appropriate number of spaces.
290 for (; ColNo > 1; --ColNo)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000291 OutputChar(' ');
Chris Lattner3ed83c12007-12-09 21:11:08 +0000292
293 return true;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000294}
295
Chris Lattner5de858c2006-07-04 19:04:44 +0000296namespace {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000297struct UnknownPragmaHandler : public PragmaHandler {
298 const char *Prefix;
Chris Lattner87f267e2006-11-21 05:02:33 +0000299 PrintPPOutputPPCallbacks *Callbacks;
300
301 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
302 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattner146762e2007-07-20 16:59:19 +0000303 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000304 // Figure out what line we went to and insert the appropriate number of
305 // newline characters.
Chris Lattner87f267e2006-11-21 05:02:33 +0000306 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000307 OutputString(Prefix, strlen(Prefix));
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000308
309 // Read and print all of the pragma tokens.
Chris Lattner3c69f122007-10-09 18:03:42 +0000310 while (PragmaTok.isNot(tok::eom)) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000311 if (PragmaTok.hasLeadingSpace())
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000312 OutputChar(' ');
313 std::string TokSpell = PP.getSpelling(PragmaTok);
314 OutputString(&TokSpell[0], TokSpell.size());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000315 PP.LexUnexpandedToken(PragmaTok);
316 }
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000317 OutputChar('\n');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000318 }
319};
Chris Lattner5de858c2006-07-04 19:04:44 +0000320} // end anonymous namespace
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000321
Chris Lattner4418ce12007-07-23 06:09:34 +0000322
323enum AvoidConcatInfo {
324 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
325 /// ',', ')', etc) don't cause a problem when concatenated.
326 aci_never_avoid_concat = 0,
327
328 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
329 /// token's requirements, and it needs to know the first character of the
330 /// token.
331 aci_custom_firstchar = 1,
332
333 /// aci_custom - AvoidConcat contains custom code to handle this token's
334 /// requirements, but it doesn't need to know the first character of the
335 /// token.
336 aci_custom = 2,
337
338 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
339 /// character. For example, "<<" turns into "<<=" when followed by an =.
340 aci_avoid_equal = 4
341};
342
343/// This array contains information for each token on what action to take when
344/// avoiding concatenation of tokens in the AvoidConcat method.
345static char TokenInfo[tok::NUM_TOKENS];
346
347/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
348/// marked by this function.
349static void InitAvoidConcatTokenInfo() {
350 // These tokens have custom code in AvoidConcat.
351 TokenInfo[tok::identifier ] |= aci_custom;
352 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
353 TokenInfo[tok::period ] |= aci_custom_firstchar;
354 TokenInfo[tok::amp ] |= aci_custom_firstchar;
355 TokenInfo[tok::plus ] |= aci_custom_firstchar;
356 TokenInfo[tok::minus ] |= aci_custom_firstchar;
357 TokenInfo[tok::slash ] |= aci_custom_firstchar;
358 TokenInfo[tok::less ] |= aci_custom_firstchar;
359 TokenInfo[tok::greater ] |= aci_custom_firstchar;
360 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
361 TokenInfo[tok::percent ] |= aci_custom_firstchar;
362 TokenInfo[tok::colon ] |= aci_custom_firstchar;
363 TokenInfo[tok::hash ] |= aci_custom_firstchar;
364 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
365
366 // These tokens change behavior if followed by an '='.
367 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
368 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
369 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
370 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
371 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
372 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
373 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
374 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
375 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
376 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
377 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
378 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
379 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
380 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
381}
382
Chris Lattner15346fa2008-01-15 05:22:14 +0000383/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner877ca772008-01-15 05:14:19 +0000384static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner877ca772008-01-15 05:14:19 +0000385 if (!Tok.needsCleaning()) {
386 SourceManager &SrcMgr = PP.getSourceManager();
387 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
388 == 'L';
389 }
390
391 if (Tok.getLength() < 256) {
Chris Lattner15346fa2008-01-15 05:22:14 +0000392 char Buffer[256];
Chris Lattner877ca772008-01-15 05:14:19 +0000393 const char *TokPtr = Buffer;
394 PP.getSpelling(Tok, TokPtr);
395 return TokPtr[0] == 'L';
396 }
397
398 return PP.getSpelling(Tok)[0] == 'L';
399}
400
Chris Lattner15346fa2008-01-15 05:22:14 +0000401/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
402static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
403 if (!Tok.needsCleaning()) {
404 if (Tok.getLength() != 1)
405 return false;
406 SourceManager &SrcMgr = PP.getSourceManager();
407 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
408 == 'L';
409 }
410
411 if (Tok.getLength() < 256) {
412 char Buffer[256];
413 const char *TokPtr = Buffer;
414 if (PP.getSpelling(Tok, TokPtr) != 1)
415 return false;
416 return TokPtr[0] == 'L';
417 }
418
419 return PP.getSpelling(Tok) == "L";
420}
421
422
Chris Lattner331ad772006-07-28 06:56:01 +0000423/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
424/// the two individual tokens to be lexed as a single token, return true (which
425/// causes a space to be printed between them). This allows the output of -E
426/// mode to be lexed to the same token stream as lexing the input directly
427/// would.
428///
429/// This code must conservatively return true if it doesn't want to be 100%
430/// accurate. This will cause the output to include extra space characters, but
431/// the resulting output won't have incorrect concatenations going on. Examples
432/// include "..", which we print with a space between, because we don't want to
433/// track enough to tell "x.." from "...".
Chris Lattner146762e2007-07-20 16:59:19 +0000434bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
435 const Token &Tok) {
Chris Lattner331ad772006-07-28 06:56:01 +0000436 char Buffer[256];
437
Chris Lattner4418ce12007-07-23 06:09:34 +0000438 tok::TokenKind PrevKind = PrevTok.getKind();
439 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
440 PrevKind = tok::identifier;
441
442 // Look up information on when we should avoid concatenation with prevtok.
443 unsigned ConcatInfo = TokenInfo[PrevKind];
444
445 // If prevtok never causes a problem for anything after it, return quickly.
446 if (ConcatInfo == 0) return false;
Chris Lattner331ad772006-07-28 06:56:01 +0000447
Chris Lattner4418ce12007-07-23 06:09:34 +0000448 if (ConcatInfo & aci_avoid_equal) {
449 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner3c69f122007-10-09 18:03:42 +0000450 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattner4418ce12007-07-23 06:09:34 +0000451 return true;
Chris Lattnerd63c8a52007-07-23 23:21:34 +0000452 ConcatInfo &= ~aci_avoid_equal;
Chris Lattner4418ce12007-07-23 06:09:34 +0000453 }
454
455 if (ConcatInfo == 0) return false;
456
457
458
Chris Lattner331ad772006-07-28 06:56:01 +0000459 // Basic algorithm: we look at the first character of the second token, and
460 // determine whether it, if appended to the first token, would form (or would
461 // contribute) to a larger token if concatenated.
Chris Lattner4418ce12007-07-23 06:09:34 +0000462 char FirstChar = 0;
463 if (ConcatInfo & aci_custom) {
464 // If the token does not need to know the first character, don't get it.
465 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Chris Lattner331ad772006-07-28 06:56:01 +0000466 // Avoid spelling identifiers, the most common form of token.
467 FirstChar = II->getName()[0];
Chris Lattnere4c566c2007-07-23 05:18:42 +0000468 } else if (!Tok.needsCleaning()) {
469 SourceManager &SrcMgr = PP.getSourceManager();
470 FirstChar =
471 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Chris Lattner331ad772006-07-28 06:56:01 +0000472 } else if (Tok.getLength() < 256) {
Chris Lattner9f547a42006-10-18 06:06:41 +0000473 const char *TokPtr = Buffer;
474 PP.getSpelling(Tok, TokPtr);
475 FirstChar = TokPtr[0];
Chris Lattner331ad772006-07-28 06:56:01 +0000476 } else {
477 FirstChar = PP.getSpelling(Tok)[0];
478 }
Chris Lattner4418ce12007-07-23 06:09:34 +0000479
Chris Lattner331ad772006-07-28 06:56:01 +0000480 switch (PrevKind) {
Chris Lattner4418ce12007-07-23 06:09:34 +0000481 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Chris Lattner331ad772006-07-28 06:56:01 +0000482 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner3c69f122007-10-09 18:03:42 +0000483 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
484 Tok.is(tok::wide_string_literal) /* ||
485 Tok.is(tok::wide_char_literal)*/)
Chris Lattner4418ce12007-07-23 06:09:34 +0000486 return true;
Chris Lattner877ca772008-01-15 05:14:19 +0000487
488 // If this isn't identifier + string, we're done.
489 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner4418ce12007-07-23 06:09:34 +0000490 return false;
491
492 // FIXME: need a wide_char_constant!
Chris Lattner877ca772008-01-15 05:14:19 +0000493
494 // If the string was a wide string L"foo" or wide char L'f', it would concat
495 // with the previous identifier into fooL"bar". Avoid this.
496 if (StartsWithL(Tok, PP))
497 return true;
498
Chris Lattner15346fa2008-01-15 05:22:14 +0000499 // Otherwise, this is a narrow character or string. If the *identifier* is
500 // a literal 'L', avoid pasting L "foo" -> L"foo".
501 return IsIdentifierL(PrevTok, PP);
Chris Lattner331ad772006-07-28 06:56:01 +0000502 case tok::numeric_constant:
Chris Lattner3c69f122007-10-09 18:03:42 +0000503 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Chris Lattner331ad772006-07-28 06:56:01 +0000504 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
505 case tok::period: // ..., .*, .1234
506 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
Chris Lattner4418ce12007-07-23 06:09:34 +0000507 case tok::amp: // &&
508 return FirstChar == '&';
509 case tok::plus: // ++
510 return FirstChar == '+';
511 case tok::minus: // --, ->, ->*
512 return FirstChar == '-' || FirstChar == '>';
513 case tok::slash: //, /*, //
514 return FirstChar == '*' || FirstChar == '/';
515 case tok::less: // <<, <<=, <:, <%
516 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
517 case tok::greater: // >>, >>=
518 return FirstChar == '>';
519 case tok::pipe: // ||
520 return FirstChar == '|';
521 case tok::percent: // %>, %:
522 return FirstChar == '>' || FirstChar == ':';
Chris Lattner331ad772006-07-28 06:56:01 +0000523 case tok::colon: // ::, :>
524 return FirstChar == ':' || FirstChar == '>';
525 case tok::hash: // ##, #@, %:%:
526 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
Chris Lattner331ad772006-07-28 06:56:01 +0000527 case tok::arrow: // ->*
528 return FirstChar == '*';
Chris Lattner331ad772006-07-28 06:56:01 +0000529 }
530}
531
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000532/// DoPrintPreprocessedInput - This implements -E mode.
Chris Lattner728b4dc2006-07-04 21:28:37 +0000533///
Chris Lattner21632652008-04-08 04:16:20 +0000534void clang::DoPrintPreprocessedInput(Preprocessor &PP,
535 const std::string &OutFile) {
Chris Lattnerb352e3e2006-11-21 06:17:10 +0000536 // Inform the preprocessor whether we want it to retain comments or not, due
537 // to -C or -CC.
538 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
Chris Lattner457fc152006-07-29 06:30:25 +0000539
Chris Lattner02bf4442008-01-27 23:55:11 +0000540 InitOutputBuffer(OutFile);
Chris Lattner4418ce12007-07-23 06:09:34 +0000541 InitAvoidConcatTokenInfo();
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000542
Chris Lattner146762e2007-07-20 16:59:19 +0000543 Token Tok, PrevTok;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000544 char Buffer[256];
Chris Lattner87f267e2006-11-21 05:02:33 +0000545 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
546 PP.setPPCallbacks(Callbacks);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000547
Chris Lattner87f267e2006-11-21 05:02:33 +0000548 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
549 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
Chris Lattnercd028fc2006-07-29 06:35:08 +0000550
551 // After we have configured the preprocessor, enter the main file.
552
553 // Start parsing the specified input file.
Ted Kremenek230bd912007-12-19 22:51:13 +0000554 PP.EnterMainSourceFile();
Chris Lattner3ff2e692007-10-10 20:45:16 +0000555
556 // Consume all of the tokens that come from the predefines buffer. Those
557 // should not be emitted into the output and are guaranteed to be at the
558 // start.
559 const SourceManager &SourceMgr = PP.getSourceManager();
560 do PP.Lex(Tok);
Chris Lattner7b4ab652007-10-10 23:31:03 +0000561 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner3ff2e692007-10-10 20:45:16 +0000562 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
563
564 while (1) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000565
Chris Lattner67c38482006-07-04 23:24:26 +0000566 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner3ed83c12007-12-09 21:11:08 +0000567 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
568 // done.
Chris Lattner331ad772006-07-28 06:56:01 +0000569 } else if (Tok.hasLeadingSpace() ||
Chris Lattner4418ce12007-07-23 06:09:34 +0000570 // If we haven't emitted a token on this line yet, PrevTok isn't
571 // useful to look at and no concatenation could happen anyway.
Chris Lattnerd63c8a52007-07-23 23:21:34 +0000572 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattner4418ce12007-07-23 06:09:34 +0000573 // Don't print "-" next to "-", it would form "--".
574 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000575 OutputChar(' ');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000576 }
577
Chris Lattner0af98232007-07-23 06:14:36 +0000578 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
579 const char *Str = II->getName();
580 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
581 OutputString(Str, Len);
582 } else if (Tok.getLength() < 256) {
Chris Lattneref9eae12006-07-04 22:33:12 +0000583 const char *TokPtr = Buffer;
584 unsigned Len = PP.getSpelling(Tok, TokPtr);
585 OutputString(TokPtr, Len);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000586 } else {
Chris Lattnerdeb37012006-07-04 19:24:06 +0000587 std::string S = PP.getSpelling(Tok);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000588 OutputString(&S[0], S.size());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000589 }
Chris Lattner87f267e2006-11-21 05:02:33 +0000590 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner3ff2e692007-10-10 20:45:16 +0000591
592 if (Tok.is(tok::eof)) break;
593
594 PrevTok = Tok;
595 PP.Lex(Tok);
596 }
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000597 OutputChar('\n');
598
Chris Lattner21632652008-04-08 04:16:20 +0000599 CleanupOutputBuffer(PP.getDiagnostics().hasErrorOccurred());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000600}
601