blob: bdf6d04bc1ee819e89de28b5dd7df8e69e82433b [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
20#include "llvm/Support/CommandLine.h"
Chris Lattnerd8e30832007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include "llvm/ADT/StringExtras.h"
23#include "llvm/Config/config.h"
24#include <cstdio>
25using namespace clang;
26
27//===----------------------------------------------------------------------===//
28// Simple buffered I/O
29//===----------------------------------------------------------------------===//
30//
31// Empirically, iostream is over 30% slower than stdio for this workload, and
32// stdio itself isn't very well suited. The problem with stdio is use of
33// putchar_unlocked. We have many newline characters that need to be emitted,
34// but stdio needs to do extra checks to handle line buffering mode. These
35// extra checks make putchar_unlocked fall off its inlined code path, hitting
36// slow system code. In practice, using 'write' directly makes 'clang -E -P'
37// about 10% faster than using the stdio path on darwin.
38
39#ifdef HAVE_UNISTD_H
40#include <unistd.h>
41#else
42#define USE_STDIO 1
43#endif
44
45static char *OutBufStart = 0, *OutBufEnd, *OutBufCur;
46
47/// InitOutputBuffer - Initialize our output buffer.
48///
49static void InitOutputBuffer() {
50#ifndef USE_STDIO
51 OutBufStart = new char[64*1024];
52 OutBufEnd = OutBufStart+64*1024;
53 OutBufCur = OutBufStart;
54#endif
55}
56
57/// FlushBuffer - Write the accumulated bytes to the output stream.
58///
59static void FlushBuffer() {
60#ifndef USE_STDIO
61 write(STDOUT_FILENO, OutBufStart, OutBufCur-OutBufStart);
62 OutBufCur = OutBufStart;
63#endif
64}
65
66/// CleanupOutputBuffer - Finish up output.
67///
68static void CleanupOutputBuffer() {
69#ifndef USE_STDIO
70 FlushBuffer();
71 delete [] OutBufStart;
72#endif
73}
74
75static void OutputChar(char c) {
Chris Lattner6a4545e2007-09-03 18:24:56 +000076#if defined(_MSC_VER)
77 putchar(c);
78#elif defined(USE_STDIO)
Reid Spencer5f016e22007-07-11 17:01:13 +000079 putchar_unlocked(c);
80#else
81 if (OutBufCur >= OutBufEnd)
82 FlushBuffer();
83 *OutBufCur++ = c;
84#endif
85}
86
87static void OutputString(const char *Ptr, unsigned Size) {
88#ifdef USE_STDIO
89 fwrite(Ptr, Size, 1, stdout);
90#else
91 if (OutBufCur+Size >= OutBufEnd)
92 FlushBuffer();
Chris Lattnere225e372007-07-23 06:23:07 +000093
94 switch (Size) {
95 default:
96 memcpy(OutBufCur, Ptr, Size);
97 break;
98 case 3:
99 OutBufCur[2] = Ptr[2];
100 case 2:
101 OutBufCur[1] = Ptr[1];
102 case 1:
103 OutBufCur[0] = Ptr[0];
104 case 0:
105 break;
106 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000107 OutBufCur += Size;
108#endif
109}
110
111
112//===----------------------------------------------------------------------===//
113// Preprocessed token printer
114//===----------------------------------------------------------------------===//
115
116static llvm::cl::opt<bool>
117DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
118static llvm::cl::opt<bool>
119EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
120static llvm::cl::opt<bool>
121EnableMacroCommentOutput("CC",
122 llvm::cl::desc("Enable comment output in -E mode, "
123 "even from macro expansions"));
124
125namespace {
126class PrintPPOutputPPCallbacks : public PPCallbacks {
127 Preprocessor &PP;
128 unsigned CurLine;
Reid Spencer5f016e22007-07-11 17:01:13 +0000129 bool EmittedTokensOnThisLine;
130 DirectoryLookup::DirType FileType;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000131 llvm::SmallString<512> CurFilename;
Reid Spencer5f016e22007-07-11 17:01:13 +0000132public:
133 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
134 CurLine = 0;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000135 CurFilename += "<uninit>";
Reid Spencer5f016e22007-07-11 17:01:13 +0000136 EmittedTokensOnThisLine = false;
137 FileType = DirectoryLookup::NormalHeaderDir;
138 }
139
140 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000141 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer5f016e22007-07-11 17:01:13 +0000142
143 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
144 DirectoryLookup::DirType FileType);
145 virtual void Ident(SourceLocation Loc, const std::string &str);
146
147
Chris Lattner5f180322007-12-09 21:11:08 +0000148 bool HandleFirstTokOnLine(Token &Tok);
149 bool MoveToLine(SourceLocation Loc);
Chris Lattnerd2177732007-07-20 16:59:19 +0000150 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Reid Spencer5f016e22007-07-11 17:01:13 +0000151};
152}
153
Chris Lattnerf0637212007-07-23 06:31:11 +0000154/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
155/// endptr points to the end of the buffer.
156static char *UToStr(unsigned N, char *EndPtr) {
157 // Null terminate the buffer.
158 *--EndPtr = '\0';
159 if (N == 0) // Zero is a special case.
160 *--EndPtr = '0';
161 while (N) {
162 *--EndPtr = '0' + char(N % 10);
163 N /= 10;
164 }
165 return EndPtr;
166}
167
168
Reid Spencer5f016e22007-07-11 17:01:13 +0000169/// MoveToLine - Move the output to the source line specified by the location
170/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner5f180322007-12-09 21:11:08 +0000171/// #line directive. This returns false if already at the specified line, true
172/// if some newlines were emitted.
173bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000174 if (DisableLineMarkers) {
Chris Lattner5f180322007-12-09 21:11:08 +0000175 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
176 if (LineNo == CurLine) return false;
177
178 CurLine = LineNo;
179
180 if (!EmittedTokensOnThisLine)
181 return true;
182
183 OutputChar('\n');
184 EmittedTokensOnThisLine = false;
185 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000186 }
187
Chris Lattner9dc1f532007-07-20 16:37:10 +0000188 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000189
190 // If this line is "close enough" to the original line, just print newlines,
191 // otherwise print a #line directive.
192 if (LineNo-CurLine < 8) {
Chris Lattner822f9402007-07-23 05:14:05 +0000193 if (LineNo-CurLine == 1)
Reid Spencer5f016e22007-07-11 17:01:13 +0000194 OutputChar('\n');
Chris Lattner5f180322007-12-09 21:11:08 +0000195 else if (LineNo == CurLine)
196 return false; // Phys line moved, but logical line didn't.
Chris Lattner822f9402007-07-23 05:14:05 +0000197 else {
198 const char *NewLines = "\n\n\n\n\n\n\n\n";
199 OutputString(NewLines, LineNo-CurLine);
Chris Lattner822f9402007-07-23 05:14:05 +0000200 }
Chris Lattner5c0887c2007-12-09 20:45:43 +0000201 CurLine = LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000202 } else {
203 if (EmittedTokensOnThisLine) {
204 OutputChar('\n');
205 EmittedTokensOnThisLine = false;
206 }
207
208 CurLine = LineNo;
209
210 OutputChar('#');
211 OutputChar(' ');
Chris Lattnerf0637212007-07-23 06:31:11 +0000212 char NumberBuffer[20];
213 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
214 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000215 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000216 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000217 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000218 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000219
220 if (FileType == DirectoryLookup::SystemHeaderDir)
221 OutputString(" 3", 2);
222 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
223 OutputString(" 3 4", 4);
224 OutputChar('\n');
225 }
Chris Lattner5f180322007-12-09 21:11:08 +0000226 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000227}
228
229
230/// FileChanged - Whenever the preprocessor enters or exits a #include file
231/// it invokes this handler. Update our conception of the current source
232/// position.
233void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
234 FileChangeReason Reason,
235 DirectoryLookup::DirType FileType) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000236 // Unless we are exiting a #include, make sure to skip ahead to the line the
237 // #include directive was at.
238 SourceManager &SourceMgr = PP.getSourceManager();
239 if (Reason == PPCallbacks::EnterFile) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000240 MoveToLine(SourceMgr.getIncludeLoc(Loc));
Reid Spencer5f016e22007-07-11 17:01:13 +0000241 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
242 MoveToLine(Loc);
243
244 // TODO GCC emits the # directive for this directive on the line AFTER the
245 // directive and emits a bunch of spaces that aren't needed. Emulate this
246 // strange behavior.
247 }
248
Chris Lattner9dc1f532007-07-20 16:37:10 +0000249 Loc = SourceMgr.getLogicalLoc(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000250 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner5f180322007-12-09 21:11:08 +0000251
252 if (DisableLineMarkers) return;
253
Chris Lattnerd8e30832007-07-24 06:57:14 +0000254 CurFilename.clear();
255 CurFilename += SourceMgr.getSourceName(Loc);
256 Lexer::Stringify(CurFilename);
Reid Spencer5f016e22007-07-11 17:01:13 +0000257 FileType = FileType;
258
259 if (EmittedTokensOnThisLine) {
260 OutputChar('\n');
261 EmittedTokensOnThisLine = false;
262 }
263
Reid Spencer5f016e22007-07-11 17:01:13 +0000264 OutputChar('#');
265 OutputChar(' ');
Chris Lattner51431962007-07-24 06:59:01 +0000266
267 char NumberBuffer[20];
268 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
269 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000270 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000271 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000272 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000273 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000274
275 switch (Reason) {
276 case PPCallbacks::EnterFile:
277 OutputString(" 1", 2);
278 break;
279 case PPCallbacks::ExitFile:
280 OutputString(" 2", 2);
281 break;
282 case PPCallbacks::SystemHeaderPragma: break;
283 case PPCallbacks::RenameFile: break;
284 }
285
286 if (FileType == DirectoryLookup::SystemHeaderDir)
287 OutputString(" 3", 2);
288 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
289 OutputString(" 3 4", 4);
290
291 OutputChar('\n');
292}
293
294/// HandleIdent - Handle #ident directives when read by the preprocessor.
295///
296void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
297 MoveToLine(Loc);
298
299 OutputString("#ident ", strlen("#ident "));
300 OutputString(&S[0], S.size());
301 EmittedTokensOnThisLine = true;
302}
303
304/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner5f180322007-12-09 21:11:08 +0000305/// is called for the first token on each new line. If this really is the start
306/// of a new logical line, handle it and return true, otherwise return false.
307/// This may not be the start of a logical line because the "start of line"
308/// marker is set for physical lines, not logical ones.
309bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000310 // Figure out what line we went to and insert the appropriate number of
311 // newline characters.
Chris Lattner5f180322007-12-09 21:11:08 +0000312 if (!MoveToLine(Tok.getLocation()))
313 return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000314
315 // Print out space characters so that the first token on a line is
316 // indented for easy reading.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000317 const SourceManager &SourceMgr = PP.getSourceManager();
318 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Reid Spencer5f016e22007-07-11 17:01:13 +0000319
320 // This hack prevents stuff like:
321 // #define HASH #
322 // HASH define foo bar
323 // From having the # character end up at column 1, which makes it so it
324 // is not handled as a #define next time through the preprocessor if in
325 // -fpreprocessed mode.
Chris Lattner057aaf62007-10-09 18:03:42 +0000326 if (ColNo <= 1 && Tok.is(tok::hash))
Reid Spencer5f016e22007-07-11 17:01:13 +0000327 OutputChar(' ');
328
329 // Otherwise, indent the appropriate number of spaces.
330 for (; ColNo > 1; --ColNo)
331 OutputChar(' ');
Chris Lattner5f180322007-12-09 21:11:08 +0000332
333 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000334}
335
336namespace {
337struct UnknownPragmaHandler : public PragmaHandler {
338 const char *Prefix;
339 PrintPPOutputPPCallbacks *Callbacks;
340
341 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
342 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattnerd2177732007-07-20 16:59:19 +0000343 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000344 // Figure out what line we went to and insert the appropriate number of
345 // newline characters.
346 Callbacks->MoveToLine(PragmaTok.getLocation());
347 OutputString(Prefix, strlen(Prefix));
348
349 // Read and print all of the pragma tokens.
Chris Lattner057aaf62007-10-09 18:03:42 +0000350 while (PragmaTok.isNot(tok::eom)) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000351 if (PragmaTok.hasLeadingSpace())
352 OutputChar(' ');
353 std::string TokSpell = PP.getSpelling(PragmaTok);
354 OutputString(&TokSpell[0], TokSpell.size());
355 PP.LexUnexpandedToken(PragmaTok);
356 }
357 OutputChar('\n');
358 }
359};
360} // end anonymous namespace
361
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000362
363enum AvoidConcatInfo {
364 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
365 /// ',', ')', etc) don't cause a problem when concatenated.
366 aci_never_avoid_concat = 0,
367
368 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
369 /// token's requirements, and it needs to know the first character of the
370 /// token.
371 aci_custom_firstchar = 1,
372
373 /// aci_custom - AvoidConcat contains custom code to handle this token's
374 /// requirements, but it doesn't need to know the first character of the
375 /// token.
376 aci_custom = 2,
377
378 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
379 /// character. For example, "<<" turns into "<<=" when followed by an =.
380 aci_avoid_equal = 4
381};
382
383/// This array contains information for each token on what action to take when
384/// avoiding concatenation of tokens in the AvoidConcat method.
385static char TokenInfo[tok::NUM_TOKENS];
386
387/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
388/// marked by this function.
389static void InitAvoidConcatTokenInfo() {
390 // These tokens have custom code in AvoidConcat.
391 TokenInfo[tok::identifier ] |= aci_custom;
392 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
393 TokenInfo[tok::period ] |= aci_custom_firstchar;
394 TokenInfo[tok::amp ] |= aci_custom_firstchar;
395 TokenInfo[tok::plus ] |= aci_custom_firstchar;
396 TokenInfo[tok::minus ] |= aci_custom_firstchar;
397 TokenInfo[tok::slash ] |= aci_custom_firstchar;
398 TokenInfo[tok::less ] |= aci_custom_firstchar;
399 TokenInfo[tok::greater ] |= aci_custom_firstchar;
400 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
401 TokenInfo[tok::percent ] |= aci_custom_firstchar;
402 TokenInfo[tok::colon ] |= aci_custom_firstchar;
403 TokenInfo[tok::hash ] |= aci_custom_firstchar;
404 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
405
406 // These tokens change behavior if followed by an '='.
407 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
408 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
409 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
410 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
411 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
412 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
413 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
414 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
415 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
416 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
417 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
418 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
419 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
420 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
421}
422
Reid Spencer5f016e22007-07-11 17:01:13 +0000423/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
424/// the two individual tokens to be lexed as a single token, return true (which
425/// causes a space to be printed between them). This allows the output of -E
426/// mode to be lexed to the same token stream as lexing the input directly
427/// would.
428///
429/// This code must conservatively return true if it doesn't want to be 100%
430/// accurate. This will cause the output to include extra space characters, but
431/// the resulting output won't have incorrect concatenations going on. Examples
432/// include "..", which we print with a space between, because we don't want to
433/// track enough to tell "x.." from "...".
Chris Lattnerd2177732007-07-20 16:59:19 +0000434bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
435 const Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000436 char Buffer[256];
437
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000438 tok::TokenKind PrevKind = PrevTok.getKind();
439 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
440 PrevKind = tok::identifier;
441
442 // Look up information on when we should avoid concatenation with prevtok.
443 unsigned ConcatInfo = TokenInfo[PrevKind];
444
445 // If prevtok never causes a problem for anything after it, return quickly.
446 if (ConcatInfo == 0) return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000447
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000448 if (ConcatInfo & aci_avoid_equal) {
449 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner057aaf62007-10-09 18:03:42 +0000450 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000451 return true;
Chris Lattnerb638a302007-07-23 23:21:34 +0000452 ConcatInfo &= ~aci_avoid_equal;
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000453 }
454
455 if (ConcatInfo == 0) return false;
456
457
458
Reid Spencer5f016e22007-07-11 17:01:13 +0000459 // Basic algorithm: we look at the first character of the second token, and
460 // determine whether it, if appended to the first token, would form (or would
461 // contribute) to a larger token if concatenated.
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000462 char FirstChar = 0;
463 if (ConcatInfo & aci_custom) {
464 // If the token does not need to know the first character, don't get it.
465 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000466 // Avoid spelling identifiers, the most common form of token.
467 FirstChar = II->getName()[0];
Chris Lattnerb19f5e82007-07-23 05:18:42 +0000468 } else if (!Tok.needsCleaning()) {
469 SourceManager &SrcMgr = PP.getSourceManager();
470 FirstChar =
471 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Reid Spencer5f016e22007-07-11 17:01:13 +0000472 } else if (Tok.getLength() < 256) {
473 const char *TokPtr = Buffer;
474 PP.getSpelling(Tok, TokPtr);
475 FirstChar = TokPtr[0];
476 } else {
477 FirstChar = PP.getSpelling(Tok)[0];
478 }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000479
Reid Spencer5f016e22007-07-11 17:01:13 +0000480 switch (PrevKind) {
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000481 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer5f016e22007-07-11 17:01:13 +0000482 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner057aaf62007-10-09 18:03:42 +0000483 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
484 Tok.is(tok::wide_string_literal) /* ||
485 Tok.is(tok::wide_char_literal)*/)
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000486 return true;
Chris Lattner057aaf62007-10-09 18:03:42 +0000487 if (Tok.isNot(tok::char_constant))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000488 return false;
489
490 // FIXME: need a wide_char_constant!
491 if (!Tok.needsCleaning()) {
492 SourceManager &SrcMgr = PP.getSourceManager();
493 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
494 == 'L';
495 } else if (Tok.getLength() < 256) {
496 const char *TokPtr = Buffer;
497 PP.getSpelling(Tok, TokPtr);
498 return TokPtr[0] == 'L';
499 } else {
500 return PP.getSpelling(Tok)[0] == 'L';
501 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000502 case tok::numeric_constant:
Chris Lattner057aaf62007-10-09 18:03:42 +0000503 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Reid Spencer5f016e22007-07-11 17:01:13 +0000504 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
505 case tok::period: // ..., .*, .1234
506 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000507 case tok::amp: // &&
508 return FirstChar == '&';
509 case tok::plus: // ++
510 return FirstChar == '+';
511 case tok::minus: // --, ->, ->*
512 return FirstChar == '-' || FirstChar == '>';
513 case tok::slash: //, /*, //
514 return FirstChar == '*' || FirstChar == '/';
515 case tok::less: // <<, <<=, <:, <%
516 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
517 case tok::greater: // >>, >>=
518 return FirstChar == '>';
519 case tok::pipe: // ||
520 return FirstChar == '|';
521 case tok::percent: // %>, %:
522 return FirstChar == '>' || FirstChar == ':';
Reid Spencer5f016e22007-07-11 17:01:13 +0000523 case tok::colon: // ::, :>
524 return FirstChar == ':' || FirstChar == '>';
525 case tok::hash: // ##, #@, %:%:
526 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
527 case tok::arrow: // ->*
528 return FirstChar == '*';
Reid Spencer5f016e22007-07-11 17:01:13 +0000529 }
530}
531
532/// DoPrintPreprocessedInput - This implements -E mode.
533///
534void clang::DoPrintPreprocessedInput(unsigned MainFileID, Preprocessor &PP,
535 const LangOptions &Options) {
536 // Inform the preprocessor whether we want it to retain comments or not, due
537 // to -C or -CC.
538 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
539
540 InitOutputBuffer();
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000541 InitAvoidConcatTokenInfo();
Reid Spencer5f016e22007-07-11 17:01:13 +0000542
Chris Lattnerd2177732007-07-20 16:59:19 +0000543 Token Tok, PrevTok;
Reid Spencer5f016e22007-07-11 17:01:13 +0000544 char Buffer[256];
545 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
546 PP.setPPCallbacks(Callbacks);
547
548 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
549 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
550
551 // After we have configured the preprocessor, enter the main file.
552
553 // Start parsing the specified input file.
Chris Lattner53b0dab2007-10-09 22:10:18 +0000554 PP.EnterMainSourceFile(MainFileID);
Chris Lattner6f688e12007-10-10 20:45:16 +0000555
556 // Consume all of the tokens that come from the predefines buffer. Those
557 // should not be emitted into the output and are guaranteed to be at the
558 // start.
559 const SourceManager &SourceMgr = PP.getSourceManager();
560 do PP.Lex(Tok);
Chris Lattnera1a51782007-10-10 23:31:03 +0000561 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner6f688e12007-10-10 20:45:16 +0000562 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
563
564 while (1) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000565
566 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner5f180322007-12-09 21:11:08 +0000567 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
568 // done.
Reid Spencer5f016e22007-07-11 17:01:13 +0000569 } else if (Tok.hasLeadingSpace() ||
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000570 // If we haven't emitted a token on this line yet, PrevTok isn't
571 // useful to look at and no concatenation could happen anyway.
Chris Lattnerb638a302007-07-23 23:21:34 +0000572 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000573 // Don't print "-" next to "-", it would form "--".
574 Callbacks->AvoidConcat(PrevTok, Tok))) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000575 OutputChar(' ');
576 }
577
Chris Lattner2933f412007-07-23 06:14:36 +0000578 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
579 const char *Str = II->getName();
580 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
581 OutputString(Str, Len);
582 } else if (Tok.getLength() < 256) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000583 const char *TokPtr = Buffer;
584 unsigned Len = PP.getSpelling(Tok, TokPtr);
585 OutputString(TokPtr, Len);
586 } else {
587 std::string S = PP.getSpelling(Tok);
588 OutputString(&S[0], S.size());
589 }
590 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner6f688e12007-10-10 20:45:16 +0000591
592 if (Tok.is(tok::eof)) break;
593
594 PrevTok = Tok;
595 PP.Lex(Tok);
596 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000597 OutputChar('\n');
598
599 CleanupOutputBuffer();
600}
601