blob: f338b0722572910158c84f4bb789bbd92f409da8 [file] [log] [blame]
Chris Lattner09e3cdf2006-07-04 19:04:05 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner5b12ab82007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner09e3cdf2006-07-04 19:04:05 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +000016#include "clang/Lex/PPCallbacks.h"
Chris Lattner09e3cdf2006-07-04 19:04:05 +000017#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
20#include "llvm/Support/CommandLine.h"
Chris Lattner4c4a2452007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Chris Lattnerf46be6c2006-07-04 22:19:33 +000022#include "llvm/ADT/StringExtras.h"
23#include "llvm/Config/config.h"
Chris Lattnerdeb37012006-07-04 19:24:06 +000024#include <cstdio>
Chris Lattner09e3cdf2006-07-04 19:04:05 +000025using namespace clang;
26
Chris Lattnerf46be6c2006-07-04 22:19:33 +000027//===----------------------------------------------------------------------===//
28// Simple buffered I/O
29//===----------------------------------------------------------------------===//
30//
31// Empirically, iostream is over 30% slower than stdio for this workload, and
32// stdio itself isn't very well suited. The problem with stdio is use of
33// putchar_unlocked. We have many newline characters that need to be emitted,
34// but stdio needs to do extra checks to handle line buffering mode. These
35// extra checks make putchar_unlocked fall off its inlined code path, hitting
36// slow system code. In practice, using 'write' directly makes 'clang -E -P'
37// about 10% faster than using the stdio path on darwin.
38
Chris Lattner02bf4442008-01-27 23:55:11 +000039#if defined(HAVE_UNISTD_H) && defined(HAVE_FCNTL_H)
Chris Lattnerf46be6c2006-07-04 22:19:33 +000040#include <unistd.h>
Chris Lattner02bf4442008-01-27 23:55:11 +000041#include <fcntl.h>
Chris Lattnerf46be6c2006-07-04 22:19:33 +000042#else
43#define USE_STDIO 1
44#endif
45
Chris Lattner02bf4442008-01-27 23:55:11 +000046#ifdef USE_STDIO
47FILE *OutputFILE;
48#else
49static int OutputFD;
Chris Lattnerf46be6c2006-07-04 22:19:33 +000050static char *OutBufStart = 0, *OutBufEnd, *OutBufCur;
Chris Lattner02bf4442008-01-27 23:55:11 +000051#endif
Chris Lattnerf46be6c2006-07-04 22:19:33 +000052
53/// InitOutputBuffer - Initialize our output buffer.
54///
Chris Lattner02bf4442008-01-27 23:55:11 +000055static void InitOutputBuffer(const std::string& Output) {
56#ifdef USE_STDIO
57 if (!Output.size() || Output == "-")
58 OutputFILE = stdout;
59 else
60 OutputFILE = fopen(Output.c_str(), "w+");
61
62 assert(OutputFILE && "failed to open output file");
63#else
Chris Lattnerf46be6c2006-07-04 22:19:33 +000064 OutBufStart = new char[64*1024];
65 OutBufEnd = OutBufStart+64*1024;
66 OutBufCur = OutBufStart;
Chris Lattner02bf4442008-01-27 23:55:11 +000067
68 if (!Output.size() || Output == "-")
69 OutputFD = STDOUT_FILENO;
70 else
71 OutputFD = open(Output.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
72
73 assert(OutputFD >= 0 && "failed to open output file");
Chris Lattnerf46be6c2006-07-04 22:19:33 +000074#endif
75}
76
Chris Lattner02bf4442008-01-27 23:55:11 +000077#ifndef USE_STDIO
Chris Lattnerf46be6c2006-07-04 22:19:33 +000078/// FlushBuffer - Write the accumulated bytes to the output stream.
79///
80static void FlushBuffer() {
Chris Lattner02bf4442008-01-27 23:55:11 +000081 write(OutputFD, OutBufStart, OutBufCur-OutBufStart);
Chris Lattnerf46be6c2006-07-04 22:19:33 +000082 OutBufCur = OutBufStart;
Chris Lattnerf46be6c2006-07-04 22:19:33 +000083}
Chris Lattner02bf4442008-01-27 23:55:11 +000084#endif
Chris Lattnerf46be6c2006-07-04 22:19:33 +000085
86/// CleanupOutputBuffer - Finish up output.
87///
88static void CleanupOutputBuffer() {
89#ifndef USE_STDIO
90 FlushBuffer();
91 delete [] OutBufStart;
92#endif
93}
94
95static void OutputChar(char c) {
Chris Lattnercd1e8d22007-09-03 18:24:56 +000096#if defined(_MSC_VER)
Chris Lattner02bf4442008-01-27 23:55:11 +000097 putc(c, OutputFILE);
Chris Lattnercd1e8d22007-09-03 18:24:56 +000098#elif defined(USE_STDIO)
Chris Lattner02bf4442008-01-27 23:55:11 +000099 putc_unlocked(c, OutputFILE);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000100#else
101 if (OutBufCur >= OutBufEnd)
102 FlushBuffer();
103 *OutBufCur++ = c;
104#endif
105}
106
107static void OutputString(const char *Ptr, unsigned Size) {
108#ifdef USE_STDIO
Chris Lattner02bf4442008-01-27 23:55:11 +0000109 fwrite(Ptr, Size, 1, OutputFILE);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000110#else
111 if (OutBufCur+Size >= OutBufEnd)
112 FlushBuffer();
Chris Lattner93c4ea72007-07-23 06:23:07 +0000113
114 switch (Size) {
115 default:
116 memcpy(OutBufCur, Ptr, Size);
117 break;
118 case 3:
119 OutBufCur[2] = Ptr[2];
120 case 2:
121 OutBufCur[1] = Ptr[1];
122 case 1:
123 OutBufCur[0] = Ptr[0];
124 case 0:
125 break;
126 }
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000127 OutBufCur += Size;
128#endif
129}
130
131
132//===----------------------------------------------------------------------===//
133// Preprocessed token printer
134//===----------------------------------------------------------------------===//
135
Chris Lattner23b7eb62007-06-15 23:05:46 +0000136static llvm::cl::opt<bool>
137DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
138static llvm::cl::opt<bool>
139EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
140static llvm::cl::opt<bool>
141EnableMacroCommentOutput("CC",
142 llvm::cl::desc("Enable comment output in -E mode, "
Chris Lattner457fc152006-07-29 06:30:25 +0000143 "even from macro expansions"));
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000144
Chris Lattner87f267e2006-11-21 05:02:33 +0000145namespace {
146class PrintPPOutputPPCallbacks : public PPCallbacks {
147 Preprocessor &PP;
148 unsigned CurLine;
Chris Lattner87f267e2006-11-21 05:02:33 +0000149 bool EmittedTokensOnThisLine;
150 DirectoryLookup::DirType FileType;
Chris Lattner4c4a2452007-07-24 06:57:14 +0000151 llvm::SmallString<512> CurFilename;
Chris Lattner87f267e2006-11-21 05:02:33 +0000152public:
153 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
154 CurLine = 0;
Chris Lattner4c4a2452007-07-24 06:57:14 +0000155 CurFilename += "<uninit>";
Chris Lattner87f267e2006-11-21 05:02:33 +0000156 EmittedTokensOnThisLine = false;
157 FileType = DirectoryLookup::NormalHeaderDir;
158 }
159
160 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattner4418ce12007-07-23 06:09:34 +0000161 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Chris Lattner87f267e2006-11-21 05:02:33 +0000162
163 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
164 DirectoryLookup::DirType FileType);
165 virtual void Ident(SourceLocation Loc, const std::string &str);
166
167
Chris Lattner3ed83c12007-12-09 21:11:08 +0000168 bool HandleFirstTokOnLine(Token &Tok);
169 bool MoveToLine(SourceLocation Loc);
Chris Lattner146762e2007-07-20 16:59:19 +0000170 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Chris Lattner87f267e2006-11-21 05:02:33 +0000171};
172}
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000173
Chris Lattner5cdfebb2007-07-23 06:31:11 +0000174/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
175/// endptr points to the end of the buffer.
176static char *UToStr(unsigned N, char *EndPtr) {
177 // Null terminate the buffer.
178 *--EndPtr = '\0';
179 if (N == 0) // Zero is a special case.
180 *--EndPtr = '0';
181 while (N) {
182 *--EndPtr = '0' + char(N % 10);
183 N /= 10;
184 }
185 return EndPtr;
186}
187
188
Chris Lattner728b4dc2006-07-04 21:28:37 +0000189/// MoveToLine - Move the output to the source line specified by the location
190/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner3ed83c12007-12-09 21:11:08 +0000191/// #line directive. This returns false if already at the specified line, true
192/// if some newlines were emitted.
193bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000194 if (DisableLineMarkers) {
Chris Lattner3ed83c12007-12-09 21:11:08 +0000195 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
196 if (LineNo == CurLine) return false;
197
198 CurLine = LineNo;
199
200 if (!EmittedTokensOnThisLine)
201 return true;
202
203 OutputChar('\n');
204 EmittedTokensOnThisLine = false;
205 return true;
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000206 }
Chris Lattner87f267e2006-11-21 05:02:33 +0000207
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000208 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Chris Lattner3338ba82006-07-04 21:19:39 +0000209
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000210 // If this line is "close enough" to the original line, just print newlines,
211 // otherwise print a #line directive.
Chris Lattner87f267e2006-11-21 05:02:33 +0000212 if (LineNo-CurLine < 8) {
Chris Lattner5f075822007-07-23 05:14:05 +0000213 if (LineNo-CurLine == 1)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000214 OutputChar('\n');
Chris Lattner3ed83c12007-12-09 21:11:08 +0000215 else if (LineNo == CurLine)
216 return false; // Phys line moved, but logical line didn't.
Chris Lattner5f075822007-07-23 05:14:05 +0000217 else {
218 const char *NewLines = "\n\n\n\n\n\n\n\n";
219 OutputString(NewLines, LineNo-CurLine);
Chris Lattner5f075822007-07-23 05:14:05 +0000220 }
Chris Lattner84f45072007-12-09 20:45:43 +0000221 CurLine = LineNo;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000222 } else {
Chris Lattner87f267e2006-11-21 05:02:33 +0000223 if (EmittedTokensOnThisLine) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000224 OutputChar('\n');
Chris Lattner87f267e2006-11-21 05:02:33 +0000225 EmittedTokensOnThisLine = false;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000226 }
227
Chris Lattner87f267e2006-11-21 05:02:33 +0000228 CurLine = LineNo;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000229
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000230 OutputChar('#');
231 OutputChar(' ');
Chris Lattner5cdfebb2007-07-23 06:31:11 +0000232 char NumberBuffer[20];
233 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
234 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000235 OutputChar(' ');
Chris Lattner9b796242007-07-22 06:38:50 +0000236 OutputChar('"');
Chris Lattner87f267e2006-11-21 05:02:33 +0000237 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner9b796242007-07-22 06:38:50 +0000238 OutputChar('"');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000239
Chris Lattner87f267e2006-11-21 05:02:33 +0000240 if (FileType == DirectoryLookup::SystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000241 OutputString(" 3", 2);
Chris Lattner87f267e2006-11-21 05:02:33 +0000242 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000243 OutputString(" 3 4", 4);
244 OutputChar('\n');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000245 }
Chris Lattner3ed83c12007-12-09 21:11:08 +0000246 return true;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000247}
248
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000249
250/// FileChanged - Whenever the preprocessor enters or exits a #include file
251/// it invokes this handler. Update our conception of the current source
252/// position.
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000253void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
254 FileChangeReason Reason,
255 DirectoryLookup::DirType FileType) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000256 // Unless we are exiting a #include, make sure to skip ahead to the line the
257 // #include directive was at.
Chris Lattner87f267e2006-11-21 05:02:33 +0000258 SourceManager &SourceMgr = PP.getSourceManager();
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000259 if (Reason == PPCallbacks::EnterFile) {
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000260 MoveToLine(SourceMgr.getIncludeLoc(Loc));
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000261 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
Chris Lattner3338ba82006-07-04 21:19:39 +0000262 MoveToLine(Loc);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000263
264 // TODO GCC emits the # directive for this directive on the line AFTER the
265 // directive and emits a bunch of spaces that aren't needed. Emulate this
266 // strange behavior.
267 }
268
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000269 Loc = SourceMgr.getLogicalLoc(Loc);
Chris Lattner87f267e2006-11-21 05:02:33 +0000270 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner3ed83c12007-12-09 21:11:08 +0000271
272 if (DisableLineMarkers) return;
273
Chris Lattner4c4a2452007-07-24 06:57:14 +0000274 CurFilename.clear();
275 CurFilename += SourceMgr.getSourceName(Loc);
276 Lexer::Stringify(CurFilename);
Chris Lattner87f267e2006-11-21 05:02:33 +0000277 FileType = FileType;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000278
Chris Lattner87f267e2006-11-21 05:02:33 +0000279 if (EmittedTokensOnThisLine) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000280 OutputChar('\n');
Chris Lattner87f267e2006-11-21 05:02:33 +0000281 EmittedTokensOnThisLine = false;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000282 }
283
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000284 OutputChar('#');
285 OutputChar(' ');
Chris Lattner1af0e012007-07-24 06:59:01 +0000286
287 char NumberBuffer[20];
288 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
289 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000290 OutputChar(' ');
Chris Lattner9b796242007-07-22 06:38:50 +0000291 OutputChar('"');
Chris Lattner87f267e2006-11-21 05:02:33 +0000292 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner9b796242007-07-22 06:38:50 +0000293 OutputChar('"');
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000294
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000295 switch (Reason) {
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000296 case PPCallbacks::EnterFile:
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000297 OutputString(" 1", 2);
Chris Lattner3338ba82006-07-04 21:19:39 +0000298 break;
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000299 case PPCallbacks::ExitFile:
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000300 OutputString(" 2", 2);
Chris Lattner3338ba82006-07-04 21:19:39 +0000301 break;
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000302 case PPCallbacks::SystemHeaderPragma: break;
303 case PPCallbacks::RenameFile: break;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000304 }
305
306 if (FileType == DirectoryLookup::SystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000307 OutputString(" 3", 2);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000308 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000309 OutputString(" 3 4", 4);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000310
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000311 OutputChar('\n');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000312}
313
Chris Lattner728b4dc2006-07-04 21:28:37 +0000314/// HandleIdent - Handle #ident directives when read by the preprocessor.
315///
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000316void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
Chris Lattner3338ba82006-07-04 21:19:39 +0000317 MoveToLine(Loc);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000318
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000319 OutputString("#ident ", strlen("#ident "));
Chris Lattnerb8d6d5a2006-11-21 04:09:30 +0000320 OutputString(&S[0], S.size());
Chris Lattner87f267e2006-11-21 05:02:33 +0000321 EmittedTokensOnThisLine = true;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000322}
323
324/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner3ed83c12007-12-09 21:11:08 +0000325/// is called for the first token on each new line. If this really is the start
326/// of a new logical line, handle it and return true, otherwise return false.
327/// This may not be the start of a logical line because the "start of line"
328/// marker is set for physical lines, not logical ones.
329bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000330 // Figure out what line we went to and insert the appropriate number of
331 // newline characters.
Chris Lattner3ed83c12007-12-09 21:11:08 +0000332 if (!MoveToLine(Tok.getLocation()))
333 return false;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000334
335 // Print out space characters so that the first token on a line is
336 // indented for easy reading.
Chris Lattnerdc5c0552007-07-20 16:37:10 +0000337 const SourceManager &SourceMgr = PP.getSourceManager();
338 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000339
340 // This hack prevents stuff like:
341 // #define HASH #
342 // HASH define foo bar
343 // From having the # character end up at column 1, which makes it so it
344 // is not handled as a #define next time through the preprocessor if in
345 // -fpreprocessed mode.
Chris Lattner3c69f122007-10-09 18:03:42 +0000346 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000347 OutputChar(' ');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000348
349 // Otherwise, indent the appropriate number of spaces.
350 for (; ColNo > 1; --ColNo)
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000351 OutputChar(' ');
Chris Lattner3ed83c12007-12-09 21:11:08 +0000352
353 return true;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000354}
355
Chris Lattner5de858c2006-07-04 19:04:44 +0000356namespace {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000357struct UnknownPragmaHandler : public PragmaHandler {
358 const char *Prefix;
Chris Lattner87f267e2006-11-21 05:02:33 +0000359 PrintPPOutputPPCallbacks *Callbacks;
360
361 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
362 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattner146762e2007-07-20 16:59:19 +0000363 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000364 // Figure out what line we went to and insert the appropriate number of
365 // newline characters.
Chris Lattner87f267e2006-11-21 05:02:33 +0000366 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000367 OutputString(Prefix, strlen(Prefix));
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000368
369 // Read and print all of the pragma tokens.
Chris Lattner3c69f122007-10-09 18:03:42 +0000370 while (PragmaTok.isNot(tok::eom)) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000371 if (PragmaTok.hasLeadingSpace())
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000372 OutputChar(' ');
373 std::string TokSpell = PP.getSpelling(PragmaTok);
374 OutputString(&TokSpell[0], TokSpell.size());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000375 PP.LexUnexpandedToken(PragmaTok);
376 }
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000377 OutputChar('\n');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000378 }
379};
Chris Lattner5de858c2006-07-04 19:04:44 +0000380} // end anonymous namespace
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000381
Chris Lattner4418ce12007-07-23 06:09:34 +0000382
383enum AvoidConcatInfo {
384 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
385 /// ',', ')', etc) don't cause a problem when concatenated.
386 aci_never_avoid_concat = 0,
387
388 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
389 /// token's requirements, and it needs to know the first character of the
390 /// token.
391 aci_custom_firstchar = 1,
392
393 /// aci_custom - AvoidConcat contains custom code to handle this token's
394 /// requirements, but it doesn't need to know the first character of the
395 /// token.
396 aci_custom = 2,
397
398 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
399 /// character. For example, "<<" turns into "<<=" when followed by an =.
400 aci_avoid_equal = 4
401};
402
403/// This array contains information for each token on what action to take when
404/// avoiding concatenation of tokens in the AvoidConcat method.
405static char TokenInfo[tok::NUM_TOKENS];
406
407/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
408/// marked by this function.
409static void InitAvoidConcatTokenInfo() {
410 // These tokens have custom code in AvoidConcat.
411 TokenInfo[tok::identifier ] |= aci_custom;
412 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
413 TokenInfo[tok::period ] |= aci_custom_firstchar;
414 TokenInfo[tok::amp ] |= aci_custom_firstchar;
415 TokenInfo[tok::plus ] |= aci_custom_firstchar;
416 TokenInfo[tok::minus ] |= aci_custom_firstchar;
417 TokenInfo[tok::slash ] |= aci_custom_firstchar;
418 TokenInfo[tok::less ] |= aci_custom_firstchar;
419 TokenInfo[tok::greater ] |= aci_custom_firstchar;
420 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
421 TokenInfo[tok::percent ] |= aci_custom_firstchar;
422 TokenInfo[tok::colon ] |= aci_custom_firstchar;
423 TokenInfo[tok::hash ] |= aci_custom_firstchar;
424 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
425
426 // These tokens change behavior if followed by an '='.
427 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
428 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
429 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
430 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
431 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
432 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
433 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
434 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
435 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
436 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
437 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
438 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
439 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
440 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
441}
442
Chris Lattner15346fa2008-01-15 05:22:14 +0000443/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner877ca772008-01-15 05:14:19 +0000444static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner877ca772008-01-15 05:14:19 +0000445 if (!Tok.needsCleaning()) {
446 SourceManager &SrcMgr = PP.getSourceManager();
447 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
448 == 'L';
449 }
450
451 if (Tok.getLength() < 256) {
Chris Lattner15346fa2008-01-15 05:22:14 +0000452 char Buffer[256];
Chris Lattner877ca772008-01-15 05:14:19 +0000453 const char *TokPtr = Buffer;
454 PP.getSpelling(Tok, TokPtr);
455 return TokPtr[0] == 'L';
456 }
457
458 return PP.getSpelling(Tok)[0] == 'L';
459}
460
Chris Lattner15346fa2008-01-15 05:22:14 +0000461/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
462static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
463 if (!Tok.needsCleaning()) {
464 if (Tok.getLength() != 1)
465 return false;
466 SourceManager &SrcMgr = PP.getSourceManager();
467 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
468 == 'L';
469 }
470
471 if (Tok.getLength() < 256) {
472 char Buffer[256];
473 const char *TokPtr = Buffer;
474 if (PP.getSpelling(Tok, TokPtr) != 1)
475 return false;
476 return TokPtr[0] == 'L';
477 }
478
479 return PP.getSpelling(Tok) == "L";
480}
481
482
Chris Lattner331ad772006-07-28 06:56:01 +0000483/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
484/// the two individual tokens to be lexed as a single token, return true (which
485/// causes a space to be printed between them). This allows the output of -E
486/// mode to be lexed to the same token stream as lexing the input directly
487/// would.
488///
489/// This code must conservatively return true if it doesn't want to be 100%
490/// accurate. This will cause the output to include extra space characters, but
491/// the resulting output won't have incorrect concatenations going on. Examples
492/// include "..", which we print with a space between, because we don't want to
493/// track enough to tell "x.." from "...".
Chris Lattner146762e2007-07-20 16:59:19 +0000494bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
495 const Token &Tok) {
Chris Lattner331ad772006-07-28 06:56:01 +0000496 char Buffer[256];
497
Chris Lattner4418ce12007-07-23 06:09:34 +0000498 tok::TokenKind PrevKind = PrevTok.getKind();
499 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
500 PrevKind = tok::identifier;
501
502 // Look up information on when we should avoid concatenation with prevtok.
503 unsigned ConcatInfo = TokenInfo[PrevKind];
504
505 // If prevtok never causes a problem for anything after it, return quickly.
506 if (ConcatInfo == 0) return false;
Chris Lattner331ad772006-07-28 06:56:01 +0000507
Chris Lattner4418ce12007-07-23 06:09:34 +0000508 if (ConcatInfo & aci_avoid_equal) {
509 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner3c69f122007-10-09 18:03:42 +0000510 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattner4418ce12007-07-23 06:09:34 +0000511 return true;
Chris Lattnerd63c8a52007-07-23 23:21:34 +0000512 ConcatInfo &= ~aci_avoid_equal;
Chris Lattner4418ce12007-07-23 06:09:34 +0000513 }
514
515 if (ConcatInfo == 0) return false;
516
517
518
Chris Lattner331ad772006-07-28 06:56:01 +0000519 // Basic algorithm: we look at the first character of the second token, and
520 // determine whether it, if appended to the first token, would form (or would
521 // contribute) to a larger token if concatenated.
Chris Lattner4418ce12007-07-23 06:09:34 +0000522 char FirstChar = 0;
523 if (ConcatInfo & aci_custom) {
524 // If the token does not need to know the first character, don't get it.
525 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Chris Lattner331ad772006-07-28 06:56:01 +0000526 // Avoid spelling identifiers, the most common form of token.
527 FirstChar = II->getName()[0];
Chris Lattnere4c566c2007-07-23 05:18:42 +0000528 } else if (!Tok.needsCleaning()) {
529 SourceManager &SrcMgr = PP.getSourceManager();
530 FirstChar =
531 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Chris Lattner331ad772006-07-28 06:56:01 +0000532 } else if (Tok.getLength() < 256) {
Chris Lattner9f547a42006-10-18 06:06:41 +0000533 const char *TokPtr = Buffer;
534 PP.getSpelling(Tok, TokPtr);
535 FirstChar = TokPtr[0];
Chris Lattner331ad772006-07-28 06:56:01 +0000536 } else {
537 FirstChar = PP.getSpelling(Tok)[0];
538 }
Chris Lattner4418ce12007-07-23 06:09:34 +0000539
Chris Lattner331ad772006-07-28 06:56:01 +0000540 switch (PrevKind) {
Chris Lattner4418ce12007-07-23 06:09:34 +0000541 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Chris Lattner331ad772006-07-28 06:56:01 +0000542 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner3c69f122007-10-09 18:03:42 +0000543 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
544 Tok.is(tok::wide_string_literal) /* ||
545 Tok.is(tok::wide_char_literal)*/)
Chris Lattner4418ce12007-07-23 06:09:34 +0000546 return true;
Chris Lattner877ca772008-01-15 05:14:19 +0000547
548 // If this isn't identifier + string, we're done.
549 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner4418ce12007-07-23 06:09:34 +0000550 return false;
551
552 // FIXME: need a wide_char_constant!
Chris Lattner877ca772008-01-15 05:14:19 +0000553
554 // If the string was a wide string L"foo" or wide char L'f', it would concat
555 // with the previous identifier into fooL"bar". Avoid this.
556 if (StartsWithL(Tok, PP))
557 return true;
558
Chris Lattner15346fa2008-01-15 05:22:14 +0000559 // Otherwise, this is a narrow character or string. If the *identifier* is
560 // a literal 'L', avoid pasting L "foo" -> L"foo".
561 return IsIdentifierL(PrevTok, PP);
Chris Lattner331ad772006-07-28 06:56:01 +0000562 case tok::numeric_constant:
Chris Lattner3c69f122007-10-09 18:03:42 +0000563 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Chris Lattner331ad772006-07-28 06:56:01 +0000564 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
565 case tok::period: // ..., .*, .1234
566 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
Chris Lattner4418ce12007-07-23 06:09:34 +0000567 case tok::amp: // &&
568 return FirstChar == '&';
569 case tok::plus: // ++
570 return FirstChar == '+';
571 case tok::minus: // --, ->, ->*
572 return FirstChar == '-' || FirstChar == '>';
573 case tok::slash: //, /*, //
574 return FirstChar == '*' || FirstChar == '/';
575 case tok::less: // <<, <<=, <:, <%
576 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
577 case tok::greater: // >>, >>=
578 return FirstChar == '>';
579 case tok::pipe: // ||
580 return FirstChar == '|';
581 case tok::percent: // %>, %:
582 return FirstChar == '>' || FirstChar == ':';
Chris Lattner331ad772006-07-28 06:56:01 +0000583 case tok::colon: // ::, :>
584 return FirstChar == ':' || FirstChar == '>';
585 case tok::hash: // ##, #@, %:%:
586 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
Chris Lattner331ad772006-07-28 06:56:01 +0000587 case tok::arrow: // ->*
588 return FirstChar == '*';
Chris Lattner331ad772006-07-28 06:56:01 +0000589 }
590}
591
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000592/// DoPrintPreprocessedInput - This implements -E mode.
Chris Lattner728b4dc2006-07-04 21:28:37 +0000593///
Chris Lattner02bf4442008-01-27 23:55:11 +0000594void clang::DoPrintPreprocessedInput(Preprocessor &PP, const std::string& OutFile) {
Chris Lattnerb352e3e2006-11-21 06:17:10 +0000595 // Inform the preprocessor whether we want it to retain comments or not, due
596 // to -C or -CC.
597 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
Chris Lattner457fc152006-07-29 06:30:25 +0000598
Chris Lattner02bf4442008-01-27 23:55:11 +0000599 InitOutputBuffer(OutFile);
Chris Lattner4418ce12007-07-23 06:09:34 +0000600 InitAvoidConcatTokenInfo();
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000601
Chris Lattner146762e2007-07-20 16:59:19 +0000602 Token Tok, PrevTok;
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000603 char Buffer[256];
Chris Lattner87f267e2006-11-21 05:02:33 +0000604 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
605 PP.setPPCallbacks(Callbacks);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000606
Chris Lattner87f267e2006-11-21 05:02:33 +0000607 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
608 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
Chris Lattnercd028fc2006-07-29 06:35:08 +0000609
610 // After we have configured the preprocessor, enter the main file.
611
612 // Start parsing the specified input file.
Ted Kremenek230bd912007-12-19 22:51:13 +0000613 PP.EnterMainSourceFile();
Chris Lattner3ff2e692007-10-10 20:45:16 +0000614
615 // Consume all of the tokens that come from the predefines buffer. Those
616 // should not be emitted into the output and are guaranteed to be at the
617 // start.
618 const SourceManager &SourceMgr = PP.getSourceManager();
619 do PP.Lex(Tok);
Chris Lattner7b4ab652007-10-10 23:31:03 +0000620 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner3ff2e692007-10-10 20:45:16 +0000621 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
622
623 while (1) {
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000624
Chris Lattner67c38482006-07-04 23:24:26 +0000625 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner3ed83c12007-12-09 21:11:08 +0000626 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
627 // done.
Chris Lattner331ad772006-07-28 06:56:01 +0000628 } else if (Tok.hasLeadingSpace() ||
Chris Lattner4418ce12007-07-23 06:09:34 +0000629 // If we haven't emitted a token on this line yet, PrevTok isn't
630 // useful to look at and no concatenation could happen anyway.
Chris Lattnerd63c8a52007-07-23 23:21:34 +0000631 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattner4418ce12007-07-23 06:09:34 +0000632 // Don't print "-" next to "-", it would form "--".
633 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000634 OutputChar(' ');
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000635 }
636
Chris Lattner0af98232007-07-23 06:14:36 +0000637 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
638 const char *Str = II->getName();
639 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
640 OutputString(Str, Len);
641 } else if (Tok.getLength() < 256) {
Chris Lattneref9eae12006-07-04 22:33:12 +0000642 const char *TokPtr = Buffer;
643 unsigned Len = PP.getSpelling(Tok, TokPtr);
644 OutputString(TokPtr, Len);
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000645 } else {
Chris Lattnerdeb37012006-07-04 19:24:06 +0000646 std::string S = PP.getSpelling(Tok);
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000647 OutputString(&S[0], S.size());
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000648 }
Chris Lattner87f267e2006-11-21 05:02:33 +0000649 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner3ff2e692007-10-10 20:45:16 +0000650
651 if (Tok.is(tok::eof)) break;
652
653 PrevTok = Tok;
654 PP.Lex(Tok);
655 }
Chris Lattnerf46be6c2006-07-04 22:19:33 +0000656 OutputChar('\n');
657
658 CleanupOutputBuffer();
Chris Lattner09e3cdf2006-07-04 19:04:05 +0000659}
660