blob: f338b0722572910158c84f4bb789bbd92f409da8 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
20#include "llvm/Support/CommandLine.h"
Chris Lattnerd8e30832007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include "llvm/ADT/StringExtras.h"
23#include "llvm/Config/config.h"
24#include <cstdio>
25using namespace clang;
26
27//===----------------------------------------------------------------------===//
28// Simple buffered I/O
29//===----------------------------------------------------------------------===//
30//
31// Empirically, iostream is over 30% slower than stdio for this workload, and
32// stdio itself isn't very well suited. The problem with stdio is use of
33// putchar_unlocked. We have many newline characters that need to be emitted,
34// but stdio needs to do extra checks to handle line buffering mode. These
35// extra checks make putchar_unlocked fall off its inlined code path, hitting
36// slow system code. In practice, using 'write' directly makes 'clang -E -P'
37// about 10% faster than using the stdio path on darwin.
38
Chris Lattnere988bc22008-01-27 23:55:11 +000039#if defined(HAVE_UNISTD_H) && defined(HAVE_FCNTL_H)
Reid Spencer5f016e22007-07-11 17:01:13 +000040#include <unistd.h>
Chris Lattnere988bc22008-01-27 23:55:11 +000041#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000042#else
43#define USE_STDIO 1
44#endif
45
Chris Lattnere988bc22008-01-27 23:55:11 +000046#ifdef USE_STDIO
47FILE *OutputFILE;
48#else
49static int OutputFD;
Reid Spencer5f016e22007-07-11 17:01:13 +000050static char *OutBufStart = 0, *OutBufEnd, *OutBufCur;
Chris Lattnere988bc22008-01-27 23:55:11 +000051#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000052
53/// InitOutputBuffer - Initialize our output buffer.
54///
Chris Lattnere988bc22008-01-27 23:55:11 +000055static void InitOutputBuffer(const std::string& Output) {
56#ifdef USE_STDIO
57 if (!Output.size() || Output == "-")
58 OutputFILE = stdout;
59 else
60 OutputFILE = fopen(Output.c_str(), "w+");
61
62 assert(OutputFILE && "failed to open output file");
63#else
Reid Spencer5f016e22007-07-11 17:01:13 +000064 OutBufStart = new char[64*1024];
65 OutBufEnd = OutBufStart+64*1024;
66 OutBufCur = OutBufStart;
Chris Lattnere988bc22008-01-27 23:55:11 +000067
68 if (!Output.size() || Output == "-")
69 OutputFD = STDOUT_FILENO;
70 else
71 OutputFD = open(Output.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
72
73 assert(OutputFD >= 0 && "failed to open output file");
Reid Spencer5f016e22007-07-11 17:01:13 +000074#endif
75}
76
Chris Lattnere988bc22008-01-27 23:55:11 +000077#ifndef USE_STDIO
Reid Spencer5f016e22007-07-11 17:01:13 +000078/// FlushBuffer - Write the accumulated bytes to the output stream.
79///
80static void FlushBuffer() {
Chris Lattnere988bc22008-01-27 23:55:11 +000081 write(OutputFD, OutBufStart, OutBufCur-OutBufStart);
Reid Spencer5f016e22007-07-11 17:01:13 +000082 OutBufCur = OutBufStart;
Reid Spencer5f016e22007-07-11 17:01:13 +000083}
Chris Lattnere988bc22008-01-27 23:55:11 +000084#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000085
86/// CleanupOutputBuffer - Finish up output.
87///
88static void CleanupOutputBuffer() {
89#ifndef USE_STDIO
90 FlushBuffer();
91 delete [] OutBufStart;
92#endif
93}
94
95static void OutputChar(char c) {
Chris Lattner6a4545e2007-09-03 18:24:56 +000096#if defined(_MSC_VER)
Chris Lattnere988bc22008-01-27 23:55:11 +000097 putc(c, OutputFILE);
Chris Lattner6a4545e2007-09-03 18:24:56 +000098#elif defined(USE_STDIO)
Chris Lattnere988bc22008-01-27 23:55:11 +000099 putc_unlocked(c, OutputFILE);
Reid Spencer5f016e22007-07-11 17:01:13 +0000100#else
101 if (OutBufCur >= OutBufEnd)
102 FlushBuffer();
103 *OutBufCur++ = c;
104#endif
105}
106
107static void OutputString(const char *Ptr, unsigned Size) {
108#ifdef USE_STDIO
Chris Lattnere988bc22008-01-27 23:55:11 +0000109 fwrite(Ptr, Size, 1, OutputFILE);
Reid Spencer5f016e22007-07-11 17:01:13 +0000110#else
111 if (OutBufCur+Size >= OutBufEnd)
112 FlushBuffer();
Chris Lattnere225e372007-07-23 06:23:07 +0000113
114 switch (Size) {
115 default:
116 memcpy(OutBufCur, Ptr, Size);
117 break;
118 case 3:
119 OutBufCur[2] = Ptr[2];
120 case 2:
121 OutBufCur[1] = Ptr[1];
122 case 1:
123 OutBufCur[0] = Ptr[0];
124 case 0:
125 break;
126 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000127 OutBufCur += Size;
128#endif
129}
130
131
132//===----------------------------------------------------------------------===//
133// Preprocessed token printer
134//===----------------------------------------------------------------------===//
135
136static llvm::cl::opt<bool>
137DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
138static llvm::cl::opt<bool>
139EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
140static llvm::cl::opt<bool>
141EnableMacroCommentOutput("CC",
142 llvm::cl::desc("Enable comment output in -E mode, "
143 "even from macro expansions"));
144
145namespace {
146class PrintPPOutputPPCallbacks : public PPCallbacks {
147 Preprocessor &PP;
148 unsigned CurLine;
Reid Spencer5f016e22007-07-11 17:01:13 +0000149 bool EmittedTokensOnThisLine;
150 DirectoryLookup::DirType FileType;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000151 llvm::SmallString<512> CurFilename;
Reid Spencer5f016e22007-07-11 17:01:13 +0000152public:
153 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
154 CurLine = 0;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000155 CurFilename += "<uninit>";
Reid Spencer5f016e22007-07-11 17:01:13 +0000156 EmittedTokensOnThisLine = false;
157 FileType = DirectoryLookup::NormalHeaderDir;
158 }
159
160 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000161 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer5f016e22007-07-11 17:01:13 +0000162
163 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
164 DirectoryLookup::DirType FileType);
165 virtual void Ident(SourceLocation Loc, const std::string &str);
166
167
Chris Lattner5f180322007-12-09 21:11:08 +0000168 bool HandleFirstTokOnLine(Token &Tok);
169 bool MoveToLine(SourceLocation Loc);
Chris Lattnerd2177732007-07-20 16:59:19 +0000170 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Reid Spencer5f016e22007-07-11 17:01:13 +0000171};
172}
173
Chris Lattnerf0637212007-07-23 06:31:11 +0000174/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
175/// endptr points to the end of the buffer.
176static char *UToStr(unsigned N, char *EndPtr) {
177 // Null terminate the buffer.
178 *--EndPtr = '\0';
179 if (N == 0) // Zero is a special case.
180 *--EndPtr = '0';
181 while (N) {
182 *--EndPtr = '0' + char(N % 10);
183 N /= 10;
184 }
185 return EndPtr;
186}
187
188
Reid Spencer5f016e22007-07-11 17:01:13 +0000189/// MoveToLine - Move the output to the source line specified by the location
190/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner5f180322007-12-09 21:11:08 +0000191/// #line directive. This returns false if already at the specified line, true
192/// if some newlines were emitted.
193bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000194 if (DisableLineMarkers) {
Chris Lattner5f180322007-12-09 21:11:08 +0000195 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
196 if (LineNo == CurLine) return false;
197
198 CurLine = LineNo;
199
200 if (!EmittedTokensOnThisLine)
201 return true;
202
203 OutputChar('\n');
204 EmittedTokensOnThisLine = false;
205 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000206 }
207
Chris Lattner9dc1f532007-07-20 16:37:10 +0000208 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000209
210 // If this line is "close enough" to the original line, just print newlines,
211 // otherwise print a #line directive.
212 if (LineNo-CurLine < 8) {
Chris Lattner822f9402007-07-23 05:14:05 +0000213 if (LineNo-CurLine == 1)
Reid Spencer5f016e22007-07-11 17:01:13 +0000214 OutputChar('\n');
Chris Lattner5f180322007-12-09 21:11:08 +0000215 else if (LineNo == CurLine)
216 return false; // Phys line moved, but logical line didn't.
Chris Lattner822f9402007-07-23 05:14:05 +0000217 else {
218 const char *NewLines = "\n\n\n\n\n\n\n\n";
219 OutputString(NewLines, LineNo-CurLine);
Chris Lattner822f9402007-07-23 05:14:05 +0000220 }
Chris Lattner5c0887c2007-12-09 20:45:43 +0000221 CurLine = LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000222 } else {
223 if (EmittedTokensOnThisLine) {
224 OutputChar('\n');
225 EmittedTokensOnThisLine = false;
226 }
227
228 CurLine = LineNo;
229
230 OutputChar('#');
231 OutputChar(' ');
Chris Lattnerf0637212007-07-23 06:31:11 +0000232 char NumberBuffer[20];
233 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
234 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000235 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000236 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000237 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000238 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000239
240 if (FileType == DirectoryLookup::SystemHeaderDir)
241 OutputString(" 3", 2);
242 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
243 OutputString(" 3 4", 4);
244 OutputChar('\n');
245 }
Chris Lattner5f180322007-12-09 21:11:08 +0000246 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000247}
248
249
250/// FileChanged - Whenever the preprocessor enters or exits a #include file
251/// it invokes this handler. Update our conception of the current source
252/// position.
253void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
254 FileChangeReason Reason,
255 DirectoryLookup::DirType FileType) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000256 // Unless we are exiting a #include, make sure to skip ahead to the line the
257 // #include directive was at.
258 SourceManager &SourceMgr = PP.getSourceManager();
259 if (Reason == PPCallbacks::EnterFile) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000260 MoveToLine(SourceMgr.getIncludeLoc(Loc));
Reid Spencer5f016e22007-07-11 17:01:13 +0000261 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
262 MoveToLine(Loc);
263
264 // TODO GCC emits the # directive for this directive on the line AFTER the
265 // directive and emits a bunch of spaces that aren't needed. Emulate this
266 // strange behavior.
267 }
268
Chris Lattner9dc1f532007-07-20 16:37:10 +0000269 Loc = SourceMgr.getLogicalLoc(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000270 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner5f180322007-12-09 21:11:08 +0000271
272 if (DisableLineMarkers) return;
273
Chris Lattnerd8e30832007-07-24 06:57:14 +0000274 CurFilename.clear();
275 CurFilename += SourceMgr.getSourceName(Loc);
276 Lexer::Stringify(CurFilename);
Reid Spencer5f016e22007-07-11 17:01:13 +0000277 FileType = FileType;
278
279 if (EmittedTokensOnThisLine) {
280 OutputChar('\n');
281 EmittedTokensOnThisLine = false;
282 }
283
Reid Spencer5f016e22007-07-11 17:01:13 +0000284 OutputChar('#');
285 OutputChar(' ');
Chris Lattner51431962007-07-24 06:59:01 +0000286
287 char NumberBuffer[20];
288 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
289 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000290 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000291 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000292 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000293 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000294
295 switch (Reason) {
296 case PPCallbacks::EnterFile:
297 OutputString(" 1", 2);
298 break;
299 case PPCallbacks::ExitFile:
300 OutputString(" 2", 2);
301 break;
302 case PPCallbacks::SystemHeaderPragma: break;
303 case PPCallbacks::RenameFile: break;
304 }
305
306 if (FileType == DirectoryLookup::SystemHeaderDir)
307 OutputString(" 3", 2);
308 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
309 OutputString(" 3 4", 4);
310
311 OutputChar('\n');
312}
313
314/// HandleIdent - Handle #ident directives when read by the preprocessor.
315///
316void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
317 MoveToLine(Loc);
318
319 OutputString("#ident ", strlen("#ident "));
320 OutputString(&S[0], S.size());
321 EmittedTokensOnThisLine = true;
322}
323
324/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner5f180322007-12-09 21:11:08 +0000325/// is called for the first token on each new line. If this really is the start
326/// of a new logical line, handle it and return true, otherwise return false.
327/// This may not be the start of a logical line because the "start of line"
328/// marker is set for physical lines, not logical ones.
329bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000330 // Figure out what line we went to and insert the appropriate number of
331 // newline characters.
Chris Lattner5f180322007-12-09 21:11:08 +0000332 if (!MoveToLine(Tok.getLocation()))
333 return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000334
335 // Print out space characters so that the first token on a line is
336 // indented for easy reading.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000337 const SourceManager &SourceMgr = PP.getSourceManager();
338 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Reid Spencer5f016e22007-07-11 17:01:13 +0000339
340 // This hack prevents stuff like:
341 // #define HASH #
342 // HASH define foo bar
343 // From having the # character end up at column 1, which makes it so it
344 // is not handled as a #define next time through the preprocessor if in
345 // -fpreprocessed mode.
Chris Lattner057aaf62007-10-09 18:03:42 +0000346 if (ColNo <= 1 && Tok.is(tok::hash))
Reid Spencer5f016e22007-07-11 17:01:13 +0000347 OutputChar(' ');
348
349 // Otherwise, indent the appropriate number of spaces.
350 for (; ColNo > 1; --ColNo)
351 OutputChar(' ');
Chris Lattner5f180322007-12-09 21:11:08 +0000352
353 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000354}
355
356namespace {
357struct UnknownPragmaHandler : public PragmaHandler {
358 const char *Prefix;
359 PrintPPOutputPPCallbacks *Callbacks;
360
361 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
362 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattnerd2177732007-07-20 16:59:19 +0000363 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000364 // Figure out what line we went to and insert the appropriate number of
365 // newline characters.
366 Callbacks->MoveToLine(PragmaTok.getLocation());
367 OutputString(Prefix, strlen(Prefix));
368
369 // Read and print all of the pragma tokens.
Chris Lattner057aaf62007-10-09 18:03:42 +0000370 while (PragmaTok.isNot(tok::eom)) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000371 if (PragmaTok.hasLeadingSpace())
372 OutputChar(' ');
373 std::string TokSpell = PP.getSpelling(PragmaTok);
374 OutputString(&TokSpell[0], TokSpell.size());
375 PP.LexUnexpandedToken(PragmaTok);
376 }
377 OutputChar('\n');
378 }
379};
380} // end anonymous namespace
381
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000382
383enum AvoidConcatInfo {
384 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
385 /// ',', ')', etc) don't cause a problem when concatenated.
386 aci_never_avoid_concat = 0,
387
388 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
389 /// token's requirements, and it needs to know the first character of the
390 /// token.
391 aci_custom_firstchar = 1,
392
393 /// aci_custom - AvoidConcat contains custom code to handle this token's
394 /// requirements, but it doesn't need to know the first character of the
395 /// token.
396 aci_custom = 2,
397
398 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
399 /// character. For example, "<<" turns into "<<=" when followed by an =.
400 aci_avoid_equal = 4
401};
402
403/// This array contains information for each token on what action to take when
404/// avoiding concatenation of tokens in the AvoidConcat method.
405static char TokenInfo[tok::NUM_TOKENS];
406
407/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
408/// marked by this function.
409static void InitAvoidConcatTokenInfo() {
410 // These tokens have custom code in AvoidConcat.
411 TokenInfo[tok::identifier ] |= aci_custom;
412 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
413 TokenInfo[tok::period ] |= aci_custom_firstchar;
414 TokenInfo[tok::amp ] |= aci_custom_firstchar;
415 TokenInfo[tok::plus ] |= aci_custom_firstchar;
416 TokenInfo[tok::minus ] |= aci_custom_firstchar;
417 TokenInfo[tok::slash ] |= aci_custom_firstchar;
418 TokenInfo[tok::less ] |= aci_custom_firstchar;
419 TokenInfo[tok::greater ] |= aci_custom_firstchar;
420 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
421 TokenInfo[tok::percent ] |= aci_custom_firstchar;
422 TokenInfo[tok::colon ] |= aci_custom_firstchar;
423 TokenInfo[tok::hash ] |= aci_custom_firstchar;
424 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
425
426 // These tokens change behavior if followed by an '='.
427 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
428 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
429 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
430 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
431 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
432 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
433 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
434 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
435 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
436 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
437 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
438 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
439 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
440 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
441}
442
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000443/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000444static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000445 if (!Tok.needsCleaning()) {
446 SourceManager &SrcMgr = PP.getSourceManager();
447 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
448 == 'L';
449 }
450
451 if (Tok.getLength() < 256) {
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000452 char Buffer[256];
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000453 const char *TokPtr = Buffer;
454 PP.getSpelling(Tok, TokPtr);
455 return TokPtr[0] == 'L';
456 }
457
458 return PP.getSpelling(Tok)[0] == 'L';
459}
460
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000461/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
462static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
463 if (!Tok.needsCleaning()) {
464 if (Tok.getLength() != 1)
465 return false;
466 SourceManager &SrcMgr = PP.getSourceManager();
467 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
468 == 'L';
469 }
470
471 if (Tok.getLength() < 256) {
472 char Buffer[256];
473 const char *TokPtr = Buffer;
474 if (PP.getSpelling(Tok, TokPtr) != 1)
475 return false;
476 return TokPtr[0] == 'L';
477 }
478
479 return PP.getSpelling(Tok) == "L";
480}
481
482
Reid Spencer5f016e22007-07-11 17:01:13 +0000483/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
484/// the two individual tokens to be lexed as a single token, return true (which
485/// causes a space to be printed between them). This allows the output of -E
486/// mode to be lexed to the same token stream as lexing the input directly
487/// would.
488///
489/// This code must conservatively return true if it doesn't want to be 100%
490/// accurate. This will cause the output to include extra space characters, but
491/// the resulting output won't have incorrect concatenations going on. Examples
492/// include "..", which we print with a space between, because we don't want to
493/// track enough to tell "x.." from "...".
Chris Lattnerd2177732007-07-20 16:59:19 +0000494bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
495 const Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000496 char Buffer[256];
497
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000498 tok::TokenKind PrevKind = PrevTok.getKind();
499 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
500 PrevKind = tok::identifier;
501
502 // Look up information on when we should avoid concatenation with prevtok.
503 unsigned ConcatInfo = TokenInfo[PrevKind];
504
505 // If prevtok never causes a problem for anything after it, return quickly.
506 if (ConcatInfo == 0) return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000507
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000508 if (ConcatInfo & aci_avoid_equal) {
509 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner057aaf62007-10-09 18:03:42 +0000510 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000511 return true;
Chris Lattnerb638a302007-07-23 23:21:34 +0000512 ConcatInfo &= ~aci_avoid_equal;
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000513 }
514
515 if (ConcatInfo == 0) return false;
516
517
518
Reid Spencer5f016e22007-07-11 17:01:13 +0000519 // Basic algorithm: we look at the first character of the second token, and
520 // determine whether it, if appended to the first token, would form (or would
521 // contribute) to a larger token if concatenated.
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000522 char FirstChar = 0;
523 if (ConcatInfo & aci_custom) {
524 // If the token does not need to know the first character, don't get it.
525 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000526 // Avoid spelling identifiers, the most common form of token.
527 FirstChar = II->getName()[0];
Chris Lattnerb19f5e82007-07-23 05:18:42 +0000528 } else if (!Tok.needsCleaning()) {
529 SourceManager &SrcMgr = PP.getSourceManager();
530 FirstChar =
531 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Reid Spencer5f016e22007-07-11 17:01:13 +0000532 } else if (Tok.getLength() < 256) {
533 const char *TokPtr = Buffer;
534 PP.getSpelling(Tok, TokPtr);
535 FirstChar = TokPtr[0];
536 } else {
537 FirstChar = PP.getSpelling(Tok)[0];
538 }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000539
Reid Spencer5f016e22007-07-11 17:01:13 +0000540 switch (PrevKind) {
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000541 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer5f016e22007-07-11 17:01:13 +0000542 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner057aaf62007-10-09 18:03:42 +0000543 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
544 Tok.is(tok::wide_string_literal) /* ||
545 Tok.is(tok::wide_char_literal)*/)
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000546 return true;
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000547
548 // If this isn't identifier + string, we're done.
549 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000550 return false;
551
552 // FIXME: need a wide_char_constant!
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000553
554 // If the string was a wide string L"foo" or wide char L'f', it would concat
555 // with the previous identifier into fooL"bar". Avoid this.
556 if (StartsWithL(Tok, PP))
557 return true;
558
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000559 // Otherwise, this is a narrow character or string. If the *identifier* is
560 // a literal 'L', avoid pasting L "foo" -> L"foo".
561 return IsIdentifierL(PrevTok, PP);
Reid Spencer5f016e22007-07-11 17:01:13 +0000562 case tok::numeric_constant:
Chris Lattner057aaf62007-10-09 18:03:42 +0000563 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Reid Spencer5f016e22007-07-11 17:01:13 +0000564 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
565 case tok::period: // ..., .*, .1234
566 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000567 case tok::amp: // &&
568 return FirstChar == '&';
569 case tok::plus: // ++
570 return FirstChar == '+';
571 case tok::minus: // --, ->, ->*
572 return FirstChar == '-' || FirstChar == '>';
573 case tok::slash: //, /*, //
574 return FirstChar == '*' || FirstChar == '/';
575 case tok::less: // <<, <<=, <:, <%
576 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
577 case tok::greater: // >>, >>=
578 return FirstChar == '>';
579 case tok::pipe: // ||
580 return FirstChar == '|';
581 case tok::percent: // %>, %:
582 return FirstChar == '>' || FirstChar == ':';
Reid Spencer5f016e22007-07-11 17:01:13 +0000583 case tok::colon: // ::, :>
584 return FirstChar == ':' || FirstChar == '>';
585 case tok::hash: // ##, #@, %:%:
586 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
587 case tok::arrow: // ->*
588 return FirstChar == '*';
Reid Spencer5f016e22007-07-11 17:01:13 +0000589 }
590}
591
592/// DoPrintPreprocessedInput - This implements -E mode.
593///
Chris Lattnere988bc22008-01-27 23:55:11 +0000594void clang::DoPrintPreprocessedInput(Preprocessor &PP, const std::string& OutFile) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000595 // Inform the preprocessor whether we want it to retain comments or not, due
596 // to -C or -CC.
597 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
598
Chris Lattnere988bc22008-01-27 23:55:11 +0000599 InitOutputBuffer(OutFile);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000600 InitAvoidConcatTokenInfo();
Reid Spencer5f016e22007-07-11 17:01:13 +0000601
Chris Lattnerd2177732007-07-20 16:59:19 +0000602 Token Tok, PrevTok;
Reid Spencer5f016e22007-07-11 17:01:13 +0000603 char Buffer[256];
604 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
605 PP.setPPCallbacks(Callbacks);
606
607 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
608 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
609
610 // After we have configured the preprocessor, enter the main file.
611
612 // Start parsing the specified input file.
Ted Kremenek95041a22007-12-19 22:51:13 +0000613 PP.EnterMainSourceFile();
Chris Lattner6f688e12007-10-10 20:45:16 +0000614
615 // Consume all of the tokens that come from the predefines buffer. Those
616 // should not be emitted into the output and are guaranteed to be at the
617 // start.
618 const SourceManager &SourceMgr = PP.getSourceManager();
619 do PP.Lex(Tok);
Chris Lattnera1a51782007-10-10 23:31:03 +0000620 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner6f688e12007-10-10 20:45:16 +0000621 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
622
623 while (1) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000624
625 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner5f180322007-12-09 21:11:08 +0000626 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
627 // done.
Reid Spencer5f016e22007-07-11 17:01:13 +0000628 } else if (Tok.hasLeadingSpace() ||
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000629 // If we haven't emitted a token on this line yet, PrevTok isn't
630 // useful to look at and no concatenation could happen anyway.
Chris Lattnerb638a302007-07-23 23:21:34 +0000631 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000632 // Don't print "-" next to "-", it would form "--".
633 Callbacks->AvoidConcat(PrevTok, Tok))) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000634 OutputChar(' ');
635 }
636
Chris Lattner2933f412007-07-23 06:14:36 +0000637 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
638 const char *Str = II->getName();
639 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
640 OutputString(Str, Len);
641 } else if (Tok.getLength() < 256) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000642 const char *TokPtr = Buffer;
643 unsigned Len = PP.getSpelling(Tok, TokPtr);
644 OutputString(TokPtr, Len);
645 } else {
646 std::string S = PP.getSpelling(Tok);
647 OutputString(&S[0], S.size());
648 }
649 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner6f688e12007-10-10 20:45:16 +0000650
651 if (Tok.is(tok::eof)) break;
652
653 PrevTok = Tok;
654 PP.Lex(Tok);
655 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000656 OutputChar('\n');
657
658 CleanupOutputBuffer();
659}
660