blob: da0023760d1390aecf32ab09caa1167ebdf89534 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
20#include "llvm/Support/CommandLine.h"
Chris Lattnerd8e30832007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include "llvm/ADT/StringExtras.h"
23#include "llvm/Config/config.h"
24#include <cstdio>
25using namespace clang;
26
27//===----------------------------------------------------------------------===//
28// Simple buffered I/O
29//===----------------------------------------------------------------------===//
30//
31// Empirically, iostream is over 30% slower than stdio for this workload, and
32// stdio itself isn't very well suited. The problem with stdio is use of
33// putchar_unlocked. We have many newline characters that need to be emitted,
34// but stdio needs to do extra checks to handle line buffering mode. These
35// extra checks make putchar_unlocked fall off its inlined code path, hitting
36// slow system code. In practice, using 'write' directly makes 'clang -E -P'
37// about 10% faster than using the stdio path on darwin.
38
39#ifdef HAVE_UNISTD_H
40#include <unistd.h>
41#else
42#define USE_STDIO 1
43#endif
44
45static char *OutBufStart = 0, *OutBufEnd, *OutBufCur;
46
47/// InitOutputBuffer - Initialize our output buffer.
48///
49static void InitOutputBuffer() {
50#ifndef USE_STDIO
51 OutBufStart = new char[64*1024];
52 OutBufEnd = OutBufStart+64*1024;
53 OutBufCur = OutBufStart;
54#endif
55}
56
57/// FlushBuffer - Write the accumulated bytes to the output stream.
58///
59static void FlushBuffer() {
60#ifndef USE_STDIO
61 write(STDOUT_FILENO, OutBufStart, OutBufCur-OutBufStart);
62 OutBufCur = OutBufStart;
63#endif
64}
65
66/// CleanupOutputBuffer - Finish up output.
67///
68static void CleanupOutputBuffer() {
69#ifndef USE_STDIO
70 FlushBuffer();
71 delete [] OutBufStart;
72#endif
73}
74
75static void OutputChar(char c) {
Chris Lattner6a4545e2007-09-03 18:24:56 +000076#if defined(_MSC_VER)
77 putchar(c);
78#elif defined(USE_STDIO)
Reid Spencer5f016e22007-07-11 17:01:13 +000079 putchar_unlocked(c);
80#else
81 if (OutBufCur >= OutBufEnd)
82 FlushBuffer();
83 *OutBufCur++ = c;
84#endif
85}
86
87static void OutputString(const char *Ptr, unsigned Size) {
88#ifdef USE_STDIO
89 fwrite(Ptr, Size, 1, stdout);
90#else
91 if (OutBufCur+Size >= OutBufEnd)
92 FlushBuffer();
Chris Lattnere225e372007-07-23 06:23:07 +000093
94 switch (Size) {
95 default:
96 memcpy(OutBufCur, Ptr, Size);
97 break;
98 case 3:
99 OutBufCur[2] = Ptr[2];
100 case 2:
101 OutBufCur[1] = Ptr[1];
102 case 1:
103 OutBufCur[0] = Ptr[0];
104 case 0:
105 break;
106 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000107 OutBufCur += Size;
108#endif
109}
110
111
112//===----------------------------------------------------------------------===//
113// Preprocessed token printer
114//===----------------------------------------------------------------------===//
115
116static llvm::cl::opt<bool>
117DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
118static llvm::cl::opt<bool>
119EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
120static llvm::cl::opt<bool>
121EnableMacroCommentOutput("CC",
122 llvm::cl::desc("Enable comment output in -E mode, "
123 "even from macro expansions"));
124
125namespace {
126class PrintPPOutputPPCallbacks : public PPCallbacks {
127 Preprocessor &PP;
128 unsigned CurLine;
Reid Spencer5f016e22007-07-11 17:01:13 +0000129 bool EmittedTokensOnThisLine;
130 DirectoryLookup::DirType FileType;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000131 llvm::SmallString<512> CurFilename;
Reid Spencer5f016e22007-07-11 17:01:13 +0000132public:
133 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
134 CurLine = 0;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000135 CurFilename += "<uninit>";
Reid Spencer5f016e22007-07-11 17:01:13 +0000136 EmittedTokensOnThisLine = false;
137 FileType = DirectoryLookup::NormalHeaderDir;
138 }
139
140 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000141 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer5f016e22007-07-11 17:01:13 +0000142
143 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
144 DirectoryLookup::DirType FileType);
145 virtual void Ident(SourceLocation Loc, const std::string &str);
146
147
Chris Lattnerd2177732007-07-20 16:59:19 +0000148 void HandleFirstTokOnLine(Token &Tok);
Reid Spencer5f016e22007-07-11 17:01:13 +0000149 void MoveToLine(SourceLocation Loc);
Chris Lattnerd2177732007-07-20 16:59:19 +0000150 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Reid Spencer5f016e22007-07-11 17:01:13 +0000151};
152}
153
Chris Lattnerf0637212007-07-23 06:31:11 +0000154/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
155/// endptr points to the end of the buffer.
156static char *UToStr(unsigned N, char *EndPtr) {
157 // Null terminate the buffer.
158 *--EndPtr = '\0';
159 if (N == 0) // Zero is a special case.
160 *--EndPtr = '0';
161 while (N) {
162 *--EndPtr = '0' + char(N % 10);
163 N /= 10;
164 }
165 return EndPtr;
166}
167
168
Reid Spencer5f016e22007-07-11 17:01:13 +0000169/// MoveToLine - Move the output to the source line specified by the location
170/// object. We can do this by emitting some number of \n's, or be emitting a
171/// #line directive.
172void PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
173 if (DisableLineMarkers) {
174 if (EmittedTokensOnThisLine) {
175 OutputChar('\n');
176 EmittedTokensOnThisLine = false;
177 }
178 return;
179 }
180
Chris Lattner9dc1f532007-07-20 16:37:10 +0000181 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000182
183 // If this line is "close enough" to the original line, just print newlines,
184 // otherwise print a #line directive.
185 if (LineNo-CurLine < 8) {
Chris Lattner822f9402007-07-23 05:14:05 +0000186 if (LineNo-CurLine == 1)
Reid Spencer5f016e22007-07-11 17:01:13 +0000187 OutputChar('\n');
Chris Lattner822f9402007-07-23 05:14:05 +0000188 else {
189 const char *NewLines = "\n\n\n\n\n\n\n\n";
190 OutputString(NewLines, LineNo-CurLine);
191 CurLine = LineNo;
192 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000193 } else {
194 if (EmittedTokensOnThisLine) {
195 OutputChar('\n');
196 EmittedTokensOnThisLine = false;
197 }
198
199 CurLine = LineNo;
200
201 OutputChar('#');
202 OutputChar(' ');
Chris Lattnerf0637212007-07-23 06:31:11 +0000203 char NumberBuffer[20];
204 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
205 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000206 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000207 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000208 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000209 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000210
211 if (FileType == DirectoryLookup::SystemHeaderDir)
212 OutputString(" 3", 2);
213 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
214 OutputString(" 3 4", 4);
215 OutputChar('\n');
216 }
217}
218
219
220/// FileChanged - Whenever the preprocessor enters or exits a #include file
221/// it invokes this handler. Update our conception of the current source
222/// position.
223void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
224 FileChangeReason Reason,
225 DirectoryLookup::DirType FileType) {
226 if (DisableLineMarkers) return;
227
228 // Unless we are exiting a #include, make sure to skip ahead to the line the
229 // #include directive was at.
230 SourceManager &SourceMgr = PP.getSourceManager();
231 if (Reason == PPCallbacks::EnterFile) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000232 MoveToLine(SourceMgr.getIncludeLoc(Loc));
Reid Spencer5f016e22007-07-11 17:01:13 +0000233 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
234 MoveToLine(Loc);
235
236 // TODO GCC emits the # directive for this directive on the line AFTER the
237 // directive and emits a bunch of spaces that aren't needed. Emulate this
238 // strange behavior.
239 }
240
Chris Lattner9dc1f532007-07-20 16:37:10 +0000241 Loc = SourceMgr.getLogicalLoc(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000242 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattnerd8e30832007-07-24 06:57:14 +0000243 CurFilename.clear();
244 CurFilename += SourceMgr.getSourceName(Loc);
245 Lexer::Stringify(CurFilename);
Reid Spencer5f016e22007-07-11 17:01:13 +0000246 FileType = FileType;
247
248 if (EmittedTokensOnThisLine) {
249 OutputChar('\n');
250 EmittedTokensOnThisLine = false;
251 }
252
Reid Spencer5f016e22007-07-11 17:01:13 +0000253 OutputChar('#');
254 OutputChar(' ');
Chris Lattner51431962007-07-24 06:59:01 +0000255
256 char NumberBuffer[20];
257 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
258 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000259 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000260 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000261 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000262 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000263
264 switch (Reason) {
265 case PPCallbacks::EnterFile:
266 OutputString(" 1", 2);
267 break;
268 case PPCallbacks::ExitFile:
269 OutputString(" 2", 2);
270 break;
271 case PPCallbacks::SystemHeaderPragma: break;
272 case PPCallbacks::RenameFile: break;
273 }
274
275 if (FileType == DirectoryLookup::SystemHeaderDir)
276 OutputString(" 3", 2);
277 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
278 OutputString(" 3 4", 4);
279
280 OutputChar('\n');
281}
282
283/// HandleIdent - Handle #ident directives when read by the preprocessor.
284///
285void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
286 MoveToLine(Loc);
287
288 OutputString("#ident ", strlen("#ident "));
289 OutputString(&S[0], S.size());
290 EmittedTokensOnThisLine = true;
291}
292
293/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
294/// is called for the first token on each new line.
Chris Lattnerd2177732007-07-20 16:59:19 +0000295void PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000296 // Figure out what line we went to and insert the appropriate number of
297 // newline characters.
298 MoveToLine(Tok.getLocation());
299
300 // Print out space characters so that the first token on a line is
301 // indented for easy reading.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000302 const SourceManager &SourceMgr = PP.getSourceManager();
303 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Reid Spencer5f016e22007-07-11 17:01:13 +0000304
305 // This hack prevents stuff like:
306 // #define HASH #
307 // HASH define foo bar
308 // From having the # character end up at column 1, which makes it so it
309 // is not handled as a #define next time through the preprocessor if in
310 // -fpreprocessed mode.
Chris Lattner057aaf62007-10-09 18:03:42 +0000311 if (ColNo <= 1 && Tok.is(tok::hash))
Reid Spencer5f016e22007-07-11 17:01:13 +0000312 OutputChar(' ');
313
314 // Otherwise, indent the appropriate number of spaces.
315 for (; ColNo > 1; --ColNo)
316 OutputChar(' ');
317}
318
319namespace {
320struct UnknownPragmaHandler : public PragmaHandler {
321 const char *Prefix;
322 PrintPPOutputPPCallbacks *Callbacks;
323
324 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
325 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattnerd2177732007-07-20 16:59:19 +0000326 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000327 // Figure out what line we went to and insert the appropriate number of
328 // newline characters.
329 Callbacks->MoveToLine(PragmaTok.getLocation());
330 OutputString(Prefix, strlen(Prefix));
331
332 // Read and print all of the pragma tokens.
Chris Lattner057aaf62007-10-09 18:03:42 +0000333 while (PragmaTok.isNot(tok::eom)) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000334 if (PragmaTok.hasLeadingSpace())
335 OutputChar(' ');
336 std::string TokSpell = PP.getSpelling(PragmaTok);
337 OutputString(&TokSpell[0], TokSpell.size());
338 PP.LexUnexpandedToken(PragmaTok);
339 }
340 OutputChar('\n');
341 }
342};
343} // end anonymous namespace
344
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000345
346enum AvoidConcatInfo {
347 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
348 /// ',', ')', etc) don't cause a problem when concatenated.
349 aci_never_avoid_concat = 0,
350
351 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
352 /// token's requirements, and it needs to know the first character of the
353 /// token.
354 aci_custom_firstchar = 1,
355
356 /// aci_custom - AvoidConcat contains custom code to handle this token's
357 /// requirements, but it doesn't need to know the first character of the
358 /// token.
359 aci_custom = 2,
360
361 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
362 /// character. For example, "<<" turns into "<<=" when followed by an =.
363 aci_avoid_equal = 4
364};
365
366/// This array contains information for each token on what action to take when
367/// avoiding concatenation of tokens in the AvoidConcat method.
368static char TokenInfo[tok::NUM_TOKENS];
369
370/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
371/// marked by this function.
372static void InitAvoidConcatTokenInfo() {
373 // These tokens have custom code in AvoidConcat.
374 TokenInfo[tok::identifier ] |= aci_custom;
375 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
376 TokenInfo[tok::period ] |= aci_custom_firstchar;
377 TokenInfo[tok::amp ] |= aci_custom_firstchar;
378 TokenInfo[tok::plus ] |= aci_custom_firstchar;
379 TokenInfo[tok::minus ] |= aci_custom_firstchar;
380 TokenInfo[tok::slash ] |= aci_custom_firstchar;
381 TokenInfo[tok::less ] |= aci_custom_firstchar;
382 TokenInfo[tok::greater ] |= aci_custom_firstchar;
383 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
384 TokenInfo[tok::percent ] |= aci_custom_firstchar;
385 TokenInfo[tok::colon ] |= aci_custom_firstchar;
386 TokenInfo[tok::hash ] |= aci_custom_firstchar;
387 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
388
389 // These tokens change behavior if followed by an '='.
390 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
391 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
392 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
393 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
394 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
395 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
396 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
397 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
398 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
399 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
400 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
401 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
402 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
403 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
404}
405
Reid Spencer5f016e22007-07-11 17:01:13 +0000406/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
407/// the two individual tokens to be lexed as a single token, return true (which
408/// causes a space to be printed between them). This allows the output of -E
409/// mode to be lexed to the same token stream as lexing the input directly
410/// would.
411///
412/// This code must conservatively return true if it doesn't want to be 100%
413/// accurate. This will cause the output to include extra space characters, but
414/// the resulting output won't have incorrect concatenations going on. Examples
415/// include "..", which we print with a space between, because we don't want to
416/// track enough to tell "x.." from "...".
Chris Lattnerd2177732007-07-20 16:59:19 +0000417bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
418 const Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000419 char Buffer[256];
420
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000421 tok::TokenKind PrevKind = PrevTok.getKind();
422 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
423 PrevKind = tok::identifier;
424
425 // Look up information on when we should avoid concatenation with prevtok.
426 unsigned ConcatInfo = TokenInfo[PrevKind];
427
428 // If prevtok never causes a problem for anything after it, return quickly.
429 if (ConcatInfo == 0) return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000430
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000431 if (ConcatInfo & aci_avoid_equal) {
432 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner057aaf62007-10-09 18:03:42 +0000433 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000434 return true;
Chris Lattnerb638a302007-07-23 23:21:34 +0000435 ConcatInfo &= ~aci_avoid_equal;
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000436 }
437
438 if (ConcatInfo == 0) return false;
439
440
441
Reid Spencer5f016e22007-07-11 17:01:13 +0000442 // Basic algorithm: we look at the first character of the second token, and
443 // determine whether it, if appended to the first token, would form (or would
444 // contribute) to a larger token if concatenated.
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000445 char FirstChar = 0;
446 if (ConcatInfo & aci_custom) {
447 // If the token does not need to know the first character, don't get it.
448 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000449 // Avoid spelling identifiers, the most common form of token.
450 FirstChar = II->getName()[0];
Chris Lattnerb19f5e82007-07-23 05:18:42 +0000451 } else if (!Tok.needsCleaning()) {
452 SourceManager &SrcMgr = PP.getSourceManager();
453 FirstChar =
454 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Reid Spencer5f016e22007-07-11 17:01:13 +0000455 } else if (Tok.getLength() < 256) {
456 const char *TokPtr = Buffer;
457 PP.getSpelling(Tok, TokPtr);
458 FirstChar = TokPtr[0];
459 } else {
460 FirstChar = PP.getSpelling(Tok)[0];
461 }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000462
Reid Spencer5f016e22007-07-11 17:01:13 +0000463 switch (PrevKind) {
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000464 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer5f016e22007-07-11 17:01:13 +0000465 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner057aaf62007-10-09 18:03:42 +0000466 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
467 Tok.is(tok::wide_string_literal) /* ||
468 Tok.is(tok::wide_char_literal)*/)
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000469 return true;
Chris Lattner057aaf62007-10-09 18:03:42 +0000470 if (Tok.isNot(tok::char_constant))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000471 return false;
472
473 // FIXME: need a wide_char_constant!
474 if (!Tok.needsCleaning()) {
475 SourceManager &SrcMgr = PP.getSourceManager();
476 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
477 == 'L';
478 } else if (Tok.getLength() < 256) {
479 const char *TokPtr = Buffer;
480 PP.getSpelling(Tok, TokPtr);
481 return TokPtr[0] == 'L';
482 } else {
483 return PP.getSpelling(Tok)[0] == 'L';
484 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000485 case tok::numeric_constant:
Chris Lattner057aaf62007-10-09 18:03:42 +0000486 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Reid Spencer5f016e22007-07-11 17:01:13 +0000487 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
488 case tok::period: // ..., .*, .1234
489 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000490 case tok::amp: // &&
491 return FirstChar == '&';
492 case tok::plus: // ++
493 return FirstChar == '+';
494 case tok::minus: // --, ->, ->*
495 return FirstChar == '-' || FirstChar == '>';
496 case tok::slash: //, /*, //
497 return FirstChar == '*' || FirstChar == '/';
498 case tok::less: // <<, <<=, <:, <%
499 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
500 case tok::greater: // >>, >>=
501 return FirstChar == '>';
502 case tok::pipe: // ||
503 return FirstChar == '|';
504 case tok::percent: // %>, %:
505 return FirstChar == '>' || FirstChar == ':';
Reid Spencer5f016e22007-07-11 17:01:13 +0000506 case tok::colon: // ::, :>
507 return FirstChar == ':' || FirstChar == '>';
508 case tok::hash: // ##, #@, %:%:
509 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
510 case tok::arrow: // ->*
511 return FirstChar == '*';
Reid Spencer5f016e22007-07-11 17:01:13 +0000512 }
513}
514
515/// DoPrintPreprocessedInput - This implements -E mode.
516///
517void clang::DoPrintPreprocessedInput(unsigned MainFileID, Preprocessor &PP,
518 const LangOptions &Options) {
519 // Inform the preprocessor whether we want it to retain comments or not, due
520 // to -C or -CC.
521 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
522
523 InitOutputBuffer();
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000524 InitAvoidConcatTokenInfo();
Reid Spencer5f016e22007-07-11 17:01:13 +0000525
Chris Lattnerd2177732007-07-20 16:59:19 +0000526 Token Tok, PrevTok;
Reid Spencer5f016e22007-07-11 17:01:13 +0000527 char Buffer[256];
528 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
529 PP.setPPCallbacks(Callbacks);
530
531 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
532 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
533
534 // After we have configured the preprocessor, enter the main file.
535
536 // Start parsing the specified input file.
Chris Lattner53b0dab2007-10-09 22:10:18 +0000537 PP.EnterMainSourceFile(MainFileID);
Chris Lattner6f688e12007-10-10 20:45:16 +0000538
539 // Consume all of the tokens that come from the predefines buffer. Those
540 // should not be emitted into the output and are guaranteed to be at the
541 // start.
542 const SourceManager &SourceMgr = PP.getSourceManager();
543 do PP.Lex(Tok);
Chris Lattnera1a51782007-10-10 23:31:03 +0000544 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner6f688e12007-10-10 20:45:16 +0000545 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
546
547 while (1) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000548
549 // If this token is at the start of a line, emit newlines if needed.
550 if (Tok.isAtStartOfLine()) {
551 Callbacks->HandleFirstTokOnLine(Tok);
552 } else if (Tok.hasLeadingSpace() ||
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000553 // If we haven't emitted a token on this line yet, PrevTok isn't
554 // useful to look at and no concatenation could happen anyway.
Chris Lattnerb638a302007-07-23 23:21:34 +0000555 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000556 // Don't print "-" next to "-", it would form "--".
557 Callbacks->AvoidConcat(PrevTok, Tok))) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000558 OutputChar(' ');
559 }
560
Chris Lattner2933f412007-07-23 06:14:36 +0000561 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
562 const char *Str = II->getName();
563 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
564 OutputString(Str, Len);
565 } else if (Tok.getLength() < 256) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000566 const char *TokPtr = Buffer;
567 unsigned Len = PP.getSpelling(Tok, TokPtr);
568 OutputString(TokPtr, Len);
569 } else {
570 std::string S = PP.getSpelling(Tok);
571 OutputString(&S[0], S.size());
572 }
573 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner6f688e12007-10-10 20:45:16 +0000574
575 if (Tok.is(tok::eof)) break;
576
577 PrevTok = Tok;
578 PP.Lex(Tok);
579 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000580 OutputChar('\n');
581
582 CleanupOutputBuffer();
583}
584