blob: 3035b654643441a2242032b37f55097415f386ba [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner4b009652007-07-25 00:24:17 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner6619f662008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattner4b009652007-07-25 00:24:17 +000021#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringExtras.h"
Chris Lattner6619f662008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Chris Lattner4b009652007-07-25 00:24:17 +000025#include "llvm/Config/config.h"
Chris Lattner93b4f302008-08-17 01:47:12 +000026#include "llvm/Support/raw_ostream.h"
Chris Lattner4b009652007-07-25 00:24:17 +000027#include <cstdio>
28using namespace clang;
29
Chris Lattner6619f662008-04-08 04:16:20 +000030static std::string OutputFilename;
Chris Lattner93b4f302008-08-17 01:47:12 +000031static llvm::raw_ostream *OutStream;
Chris Lattner4b009652007-07-25 00:24:17 +000032
33/// InitOutputBuffer - Initialize our output buffer.
34///
Chris Lattnerefd02a32008-01-27 23:55:11 +000035static void InitOutputBuffer(const std::string& Output) {
Chris Lattner93b4f302008-08-17 01:47:12 +000036 if (!Output.size() || Output == "-") {
37 OutputFilename = "<stdout>";
38 OutStream = new llvm::raw_stdout_ostream();
39 } else {
Chris Lattner6619f662008-04-08 04:16:20 +000040 OutputFilename = Output;
Chris Lattner93b4f302008-08-17 01:47:12 +000041 std::string Err;
42 OutStream = new llvm::raw_fd_ostream(Output.c_str(), Err);
Chris Lattnere5362152008-04-11 06:14:11 +000043
Chris Lattner93b4f302008-08-17 01:47:12 +000044 if (!Err.empty()) {
45 delete OutStream;
46 fprintf(stderr, "%s\n", Err.c_str());
Chris Lattnere5362152008-04-11 06:14:11 +000047 exit(1);
48 }
Chris Lattner6619f662008-04-08 04:16:20 +000049 }
Chris Lattner93b4f302008-08-17 01:47:12 +000050 OutStream->SetBufferSize(64*1024);
Chris Lattner4b009652007-07-25 00:24:17 +000051}
52
Chris Lattner4b009652007-07-25 00:24:17 +000053/// CleanupOutputBuffer - Finish up output.
54///
Chris Lattner6619f662008-04-08 04:16:20 +000055static void CleanupOutputBuffer(bool ErrorOccurred) {
Chris Lattner93b4f302008-08-17 01:47:12 +000056 delete OutStream;
Chris Lattner6619f662008-04-08 04:16:20 +000057
58 // If an error occurred, remove the output file.
59 if (ErrorOccurred && !OutputFilename.empty())
60 llvm::sys::Path(OutputFilename).eraseFromDisk();
Chris Lattner4b009652007-07-25 00:24:17 +000061}
62
Chris Lattner93b4f302008-08-17 01:47:12 +000063static inline void OutputChar(char c) {
64 *OutStream << c;
Chris Lattner4b009652007-07-25 00:24:17 +000065}
66
Chris Lattner93b4f302008-08-17 01:47:12 +000067static inline void OutputString(const char *Ptr, unsigned Size) {
68 OutStream->write(Ptr, Size);
Chris Lattner4b009652007-07-25 00:24:17 +000069}
70
71
72//===----------------------------------------------------------------------===//
73// Preprocessed token printer
74//===----------------------------------------------------------------------===//
75
76static llvm::cl::opt<bool>
77DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
78static llvm::cl::opt<bool>
79EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
80static llvm::cl::opt<bool>
81EnableMacroCommentOutput("CC",
82 llvm::cl::desc("Enable comment output in -E mode, "
83 "even from macro expansions"));
84
85namespace {
86class PrintPPOutputPPCallbacks : public PPCallbacks {
87 Preprocessor &PP;
88 unsigned CurLine;
89 bool EmittedTokensOnThisLine;
90 DirectoryLookup::DirType FileType;
91 llvm::SmallString<512> CurFilename;
92public:
93 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
94 CurLine = 0;
95 CurFilename += "<uninit>";
96 EmittedTokensOnThisLine = false;
97 FileType = DirectoryLookup::NormalHeaderDir;
98 }
99
100 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
101 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
102
103 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
104 DirectoryLookup::DirType FileType);
105 virtual void Ident(SourceLocation Loc, const std::string &str);
106
107
Chris Lattner6c451292007-12-09 21:11:08 +0000108 bool HandleFirstTokOnLine(Token &Tok);
109 bool MoveToLine(SourceLocation Loc);
Chris Lattner4b009652007-07-25 00:24:17 +0000110 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
111};
Chris Lattner6619f662008-04-08 04:16:20 +0000112} // end anonymous namespace
Chris Lattner4b009652007-07-25 00:24:17 +0000113
114/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
115/// endptr points to the end of the buffer.
116static char *UToStr(unsigned N, char *EndPtr) {
117 // Null terminate the buffer.
118 *--EndPtr = '\0';
119 if (N == 0) // Zero is a special case.
120 *--EndPtr = '0';
121 while (N) {
122 *--EndPtr = '0' + char(N % 10);
123 N /= 10;
124 }
125 return EndPtr;
126}
127
128
129/// MoveToLine - Move the output to the source line specified by the location
130/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner6c451292007-12-09 21:11:08 +0000131/// #line directive. This returns false if already at the specified line, true
132/// if some newlines were emitted.
133bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattner4b009652007-07-25 00:24:17 +0000134 if (DisableLineMarkers) {
Chris Lattner6c451292007-12-09 21:11:08 +0000135 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
136 if (LineNo == CurLine) return false;
137
138 CurLine = LineNo;
139
140 if (!EmittedTokensOnThisLine)
141 return true;
142
143 OutputChar('\n');
144 EmittedTokensOnThisLine = false;
145 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000146 }
147
148 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
149
150 // If this line is "close enough" to the original line, just print newlines,
151 // otherwise print a #line directive.
152 if (LineNo-CurLine < 8) {
153 if (LineNo-CurLine == 1)
154 OutputChar('\n');
Chris Lattner6c451292007-12-09 21:11:08 +0000155 else if (LineNo == CurLine)
156 return false; // Phys line moved, but logical line didn't.
Chris Lattner4b009652007-07-25 00:24:17 +0000157 else {
158 const char *NewLines = "\n\n\n\n\n\n\n\n";
159 OutputString(NewLines, LineNo-CurLine);
Chris Lattner4b009652007-07-25 00:24:17 +0000160 }
Chris Lattner45ac8172007-12-09 20:45:43 +0000161 CurLine = LineNo;
Chris Lattner4b009652007-07-25 00:24:17 +0000162 } else {
163 if (EmittedTokensOnThisLine) {
164 OutputChar('\n');
165 EmittedTokensOnThisLine = false;
166 }
167
168 CurLine = LineNo;
169
170 OutputChar('#');
171 OutputChar(' ');
172 char NumberBuffer[20];
173 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
174 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
175 OutputChar(' ');
176 OutputChar('"');
177 OutputString(&CurFilename[0], CurFilename.size());
178 OutputChar('"');
179
180 if (FileType == DirectoryLookup::SystemHeaderDir)
181 OutputString(" 3", 2);
182 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
183 OutputString(" 3 4", 4);
184 OutputChar('\n');
185 }
Chris Lattner6c451292007-12-09 21:11:08 +0000186 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000187}
188
189
190/// FileChanged - Whenever the preprocessor enters or exits a #include file
191/// it invokes this handler. Update our conception of the current source
192/// position.
193void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
194 FileChangeReason Reason,
195 DirectoryLookup::DirType FileType) {
Chris Lattner4b009652007-07-25 00:24:17 +0000196 // Unless we are exiting a #include, make sure to skip ahead to the line the
197 // #include directive was at.
198 SourceManager &SourceMgr = PP.getSourceManager();
199 if (Reason == PPCallbacks::EnterFile) {
200 MoveToLine(SourceMgr.getIncludeLoc(Loc));
201 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
202 MoveToLine(Loc);
203
204 // TODO GCC emits the # directive for this directive on the line AFTER the
205 // directive and emits a bunch of spaces that aren't needed. Emulate this
206 // strange behavior.
207 }
208
209 Loc = SourceMgr.getLogicalLoc(Loc);
210 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner6c451292007-12-09 21:11:08 +0000211
212 if (DisableLineMarkers) return;
213
Chris Lattner4b009652007-07-25 00:24:17 +0000214 CurFilename.clear();
215 CurFilename += SourceMgr.getSourceName(Loc);
216 Lexer::Stringify(CurFilename);
217 FileType = FileType;
218
219 if (EmittedTokensOnThisLine) {
220 OutputChar('\n');
221 EmittedTokensOnThisLine = false;
222 }
223
224 OutputChar('#');
225 OutputChar(' ');
226
227 char NumberBuffer[20];
228 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
229 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
230 OutputChar(' ');
231 OutputChar('"');
232 OutputString(&CurFilename[0], CurFilename.size());
233 OutputChar('"');
234
235 switch (Reason) {
236 case PPCallbacks::EnterFile:
237 OutputString(" 1", 2);
238 break;
239 case PPCallbacks::ExitFile:
240 OutputString(" 2", 2);
241 break;
242 case PPCallbacks::SystemHeaderPragma: break;
243 case PPCallbacks::RenameFile: break;
244 }
245
246 if (FileType == DirectoryLookup::SystemHeaderDir)
247 OutputString(" 3", 2);
248 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
249 OutputString(" 3 4", 4);
250
251 OutputChar('\n');
252}
253
254/// HandleIdent - Handle #ident directives when read by the preprocessor.
255///
256void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
257 MoveToLine(Loc);
258
259 OutputString("#ident ", strlen("#ident "));
260 OutputString(&S[0], S.size());
261 EmittedTokensOnThisLine = true;
262}
263
264/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner6c451292007-12-09 21:11:08 +0000265/// is called for the first token on each new line. If this really is the start
266/// of a new logical line, handle it and return true, otherwise return false.
267/// This may not be the start of a logical line because the "start of line"
268/// marker is set for physical lines, not logical ones.
269bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Chris Lattner4b009652007-07-25 00:24:17 +0000270 // Figure out what line we went to and insert the appropriate number of
271 // newline characters.
Chris Lattner6c451292007-12-09 21:11:08 +0000272 if (!MoveToLine(Tok.getLocation()))
273 return false;
Chris Lattner4b009652007-07-25 00:24:17 +0000274
275 // Print out space characters so that the first token on a line is
276 // indented for easy reading.
277 const SourceManager &SourceMgr = PP.getSourceManager();
278 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
279
280 // This hack prevents stuff like:
281 // #define HASH #
282 // HASH define foo bar
283 // From having the # character end up at column 1, which makes it so it
284 // is not handled as a #define next time through the preprocessor if in
285 // -fpreprocessed mode.
Chris Lattner3b494152007-10-09 18:03:42 +0000286 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattner4b009652007-07-25 00:24:17 +0000287 OutputChar(' ');
288
289 // Otherwise, indent the appropriate number of spaces.
290 for (; ColNo > 1; --ColNo)
291 OutputChar(' ');
Chris Lattner6c451292007-12-09 21:11:08 +0000292
293 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000294}
295
296namespace {
297struct UnknownPragmaHandler : public PragmaHandler {
298 const char *Prefix;
299 PrintPPOutputPPCallbacks *Callbacks;
300
301 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
302 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
303 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
304 // Figure out what line we went to and insert the appropriate number of
305 // newline characters.
306 Callbacks->MoveToLine(PragmaTok.getLocation());
307 OutputString(Prefix, strlen(Prefix));
308
309 // Read and print all of the pragma tokens.
Chris Lattner3b494152007-10-09 18:03:42 +0000310 while (PragmaTok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000311 if (PragmaTok.hasLeadingSpace())
312 OutputChar(' ');
313 std::string TokSpell = PP.getSpelling(PragmaTok);
314 OutputString(&TokSpell[0], TokSpell.size());
315 PP.LexUnexpandedToken(PragmaTok);
316 }
317 OutputChar('\n');
318 }
319};
320} // end anonymous namespace
321
322
323enum AvoidConcatInfo {
324 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
325 /// ',', ')', etc) don't cause a problem when concatenated.
326 aci_never_avoid_concat = 0,
327
328 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
329 /// token's requirements, and it needs to know the first character of the
330 /// token.
331 aci_custom_firstchar = 1,
332
333 /// aci_custom - AvoidConcat contains custom code to handle this token's
334 /// requirements, but it doesn't need to know the first character of the
335 /// token.
336 aci_custom = 2,
337
338 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
339 /// character. For example, "<<" turns into "<<=" when followed by an =.
340 aci_avoid_equal = 4
341};
342
343/// This array contains information for each token on what action to take when
344/// avoiding concatenation of tokens in the AvoidConcat method.
345static char TokenInfo[tok::NUM_TOKENS];
346
347/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
348/// marked by this function.
349static void InitAvoidConcatTokenInfo() {
350 // These tokens have custom code in AvoidConcat.
351 TokenInfo[tok::identifier ] |= aci_custom;
352 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
353 TokenInfo[tok::period ] |= aci_custom_firstchar;
354 TokenInfo[tok::amp ] |= aci_custom_firstchar;
355 TokenInfo[tok::plus ] |= aci_custom_firstchar;
356 TokenInfo[tok::minus ] |= aci_custom_firstchar;
357 TokenInfo[tok::slash ] |= aci_custom_firstchar;
358 TokenInfo[tok::less ] |= aci_custom_firstchar;
359 TokenInfo[tok::greater ] |= aci_custom_firstchar;
360 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
361 TokenInfo[tok::percent ] |= aci_custom_firstchar;
362 TokenInfo[tok::colon ] |= aci_custom_firstchar;
363 TokenInfo[tok::hash ] |= aci_custom_firstchar;
364 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
365
366 // These tokens change behavior if followed by an '='.
367 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
368 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
369 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
370 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
371 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
372 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
373 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
374 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
375 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
376 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
377 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
378 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
379 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
380 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
381}
382
Chris Lattnerafa40122008-01-15 05:22:14 +0000383/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner400f0242008-01-15 05:14:19 +0000384static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner400f0242008-01-15 05:14:19 +0000385 if (!Tok.needsCleaning()) {
386 SourceManager &SrcMgr = PP.getSourceManager();
387 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
388 == 'L';
389 }
390
391 if (Tok.getLength() < 256) {
Chris Lattnerafa40122008-01-15 05:22:14 +0000392 char Buffer[256];
Chris Lattner400f0242008-01-15 05:14:19 +0000393 const char *TokPtr = Buffer;
394 PP.getSpelling(Tok, TokPtr);
395 return TokPtr[0] == 'L';
396 }
397
398 return PP.getSpelling(Tok)[0] == 'L';
399}
400
Chris Lattnerafa40122008-01-15 05:22:14 +0000401/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
402static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
403 if (!Tok.needsCleaning()) {
404 if (Tok.getLength() != 1)
405 return false;
406 SourceManager &SrcMgr = PP.getSourceManager();
407 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
408 == 'L';
409 }
410
411 if (Tok.getLength() < 256) {
412 char Buffer[256];
413 const char *TokPtr = Buffer;
414 if (PP.getSpelling(Tok, TokPtr) != 1)
415 return false;
416 return TokPtr[0] == 'L';
417 }
418
419 return PP.getSpelling(Tok) == "L";
420}
421
422
Chris Lattner4b009652007-07-25 00:24:17 +0000423/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
424/// the two individual tokens to be lexed as a single token, return true (which
425/// causes a space to be printed between them). This allows the output of -E
426/// mode to be lexed to the same token stream as lexing the input directly
427/// would.
428///
429/// This code must conservatively return true if it doesn't want to be 100%
430/// accurate. This will cause the output to include extra space characters, but
431/// the resulting output won't have incorrect concatenations going on. Examples
432/// include "..", which we print with a space between, because we don't want to
433/// track enough to tell "x.." from "...".
434bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
435 const Token &Tok) {
436 char Buffer[256];
437
438 tok::TokenKind PrevKind = PrevTok.getKind();
439 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
440 PrevKind = tok::identifier;
441
442 // Look up information on when we should avoid concatenation with prevtok.
443 unsigned ConcatInfo = TokenInfo[PrevKind];
444
445 // If prevtok never causes a problem for anything after it, return quickly.
446 if (ConcatInfo == 0) return false;
447
448 if (ConcatInfo & aci_avoid_equal) {
449 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner3b494152007-10-09 18:03:42 +0000450 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattner4b009652007-07-25 00:24:17 +0000451 return true;
452 ConcatInfo &= ~aci_avoid_equal;
453 }
454
455 if (ConcatInfo == 0) return false;
456
457
458
459 // Basic algorithm: we look at the first character of the second token, and
460 // determine whether it, if appended to the first token, would form (or would
461 // contribute) to a larger token if concatenated.
462 char FirstChar = 0;
463 if (ConcatInfo & aci_custom) {
464 // If the token does not need to know the first character, don't get it.
465 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
466 // Avoid spelling identifiers, the most common form of token.
467 FirstChar = II->getName()[0];
468 } else if (!Tok.needsCleaning()) {
469 SourceManager &SrcMgr = PP.getSourceManager();
470 FirstChar =
471 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
472 } else if (Tok.getLength() < 256) {
473 const char *TokPtr = Buffer;
474 PP.getSpelling(Tok, TokPtr);
475 FirstChar = TokPtr[0];
476 } else {
477 FirstChar = PP.getSpelling(Tok)[0];
478 }
479
480 switch (PrevKind) {
481 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
482 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner3b494152007-10-09 18:03:42 +0000483 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
484 Tok.is(tok::wide_string_literal) /* ||
485 Tok.is(tok::wide_char_literal)*/)
Chris Lattner4b009652007-07-25 00:24:17 +0000486 return true;
Chris Lattner400f0242008-01-15 05:14:19 +0000487
488 // If this isn't identifier + string, we're done.
489 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner4b009652007-07-25 00:24:17 +0000490 return false;
491
492 // FIXME: need a wide_char_constant!
Chris Lattner400f0242008-01-15 05:14:19 +0000493
494 // If the string was a wide string L"foo" or wide char L'f', it would concat
495 // with the previous identifier into fooL"bar". Avoid this.
496 if (StartsWithL(Tok, PP))
497 return true;
498
Chris Lattnerafa40122008-01-15 05:22:14 +0000499 // Otherwise, this is a narrow character or string. If the *identifier* is
500 // a literal 'L', avoid pasting L "foo" -> L"foo".
501 return IsIdentifierL(PrevTok, PP);
Chris Lattner4b009652007-07-25 00:24:17 +0000502 case tok::numeric_constant:
Chris Lattner3b494152007-10-09 18:03:42 +0000503 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Chris Lattner4b009652007-07-25 00:24:17 +0000504 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
505 case tok::period: // ..., .*, .1234
506 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
507 case tok::amp: // &&
508 return FirstChar == '&';
509 case tok::plus: // ++
510 return FirstChar == '+';
511 case tok::minus: // --, ->, ->*
512 return FirstChar == '-' || FirstChar == '>';
513 case tok::slash: //, /*, //
514 return FirstChar == '*' || FirstChar == '/';
515 case tok::less: // <<, <<=, <:, <%
516 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
517 case tok::greater: // >>, >>=
518 return FirstChar == '>';
519 case tok::pipe: // ||
520 return FirstChar == '|';
521 case tok::percent: // %>, %:
522 return FirstChar == '>' || FirstChar == ':';
523 case tok::colon: // ::, :>
524 return FirstChar == ':' || FirstChar == '>';
525 case tok::hash: // ##, #@, %:%:
526 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
527 case tok::arrow: // ->*
528 return FirstChar == '*';
529 }
530}
531
532/// DoPrintPreprocessedInput - This implements -E mode.
533///
Chris Lattner6619f662008-04-08 04:16:20 +0000534void clang::DoPrintPreprocessedInput(Preprocessor &PP,
535 const std::string &OutFile) {
Chris Lattner4b009652007-07-25 00:24:17 +0000536 // Inform the preprocessor whether we want it to retain comments or not, due
537 // to -C or -CC.
538 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
539
Chris Lattnerefd02a32008-01-27 23:55:11 +0000540 InitOutputBuffer(OutFile);
Chris Lattner4b009652007-07-25 00:24:17 +0000541 InitAvoidConcatTokenInfo();
542
543 Token Tok, PrevTok;
544 char Buffer[256];
545 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
546 PP.setPPCallbacks(Callbacks);
547
548 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
549 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
550
551 // After we have configured the preprocessor, enter the main file.
552
553 // Start parsing the specified input file.
Ted Kremenek17861c52007-12-19 22:51:13 +0000554 PP.EnterMainSourceFile();
Chris Lattner3eddc862007-10-10 20:45:16 +0000555
556 // Consume all of the tokens that come from the predefines buffer. Those
557 // should not be emitted into the output and are guaranteed to be at the
558 // start.
559 const SourceManager &SourceMgr = PP.getSourceManager();
560 do PP.Lex(Tok);
Chris Lattner890c5932007-10-10 23:31:03 +0000561 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner3eddc862007-10-10 20:45:16 +0000562 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
563
564 while (1) {
Chris Lattner4b009652007-07-25 00:24:17 +0000565
566 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner6c451292007-12-09 21:11:08 +0000567 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
568 // done.
Chris Lattner4b009652007-07-25 00:24:17 +0000569 } else if (Tok.hasLeadingSpace() ||
570 // If we haven't emitted a token on this line yet, PrevTok isn't
571 // useful to look at and no concatenation could happen anyway.
572 (Callbacks->hasEmittedTokensOnThisLine() &&
573 // Don't print "-" next to "-", it would form "--".
574 Callbacks->AvoidConcat(PrevTok, Tok))) {
575 OutputChar(' ');
576 }
577
578 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
579 const char *Str = II->getName();
580 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
581 OutputString(Str, Len);
582 } else if (Tok.getLength() < 256) {
583 const char *TokPtr = Buffer;
584 unsigned Len = PP.getSpelling(Tok, TokPtr);
585 OutputString(TokPtr, Len);
586 } else {
587 std::string S = PP.getSpelling(Tok);
588 OutputString(&S[0], S.size());
589 }
590 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner3eddc862007-10-10 20:45:16 +0000591
592 if (Tok.is(tok::eof)) break;
593
594 PrevTok = Tok;
595 PP.Lex(Tok);
596 }
Chris Lattner4b009652007-07-25 00:24:17 +0000597 OutputChar('\n');
598
Chris Lattner6619f662008-04-08 04:16:20 +0000599 CleanupOutputBuffer(PP.getDiagnostics().hasErrorOccurred());
Chris Lattner4b009652007-07-25 00:24:17 +0000600}
601