blob: 5044250fd5c553a9d66cda6092ad90c50203d751 [file] [log] [blame]
Chris Lattner4b009652007-07-25 00:24:17 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner959e5be2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Chris Lattner4b009652007-07-25 00:24:17 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner6619f662008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattner4b009652007-07-25 00:24:17 +000021#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringExtras.h"
Chris Lattner6619f662008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Chris Lattner4b009652007-07-25 00:24:17 +000025#include "llvm/Config/config.h"
Chris Lattner93b4f302008-08-17 01:47:12 +000026#include "llvm/Support/raw_ostream.h"
Chris Lattner4b009652007-07-25 00:24:17 +000027#include <cstdio>
28using namespace clang;
29
Chris Lattner4b009652007-07-25 00:24:17 +000030//===----------------------------------------------------------------------===//
31// Preprocessed token printer
32//===----------------------------------------------------------------------===//
33
34static llvm::cl::opt<bool>
35DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
36static llvm::cl::opt<bool>
37EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
38static llvm::cl::opt<bool>
39EnableMacroCommentOutput("CC",
40 llvm::cl::desc("Enable comment output in -E mode, "
41 "even from macro expansions"));
42
43namespace {
44class PrintPPOutputPPCallbacks : public PPCallbacks {
45 Preprocessor &PP;
Chris Lattner21494222008-08-17 03:12:02 +000046public:
47 llvm::raw_ostream &OS;
48private:
Chris Lattner4b009652007-07-25 00:24:17 +000049 unsigned CurLine;
50 bool EmittedTokensOnThisLine;
51 DirectoryLookup::DirType FileType;
52 llvm::SmallString<512> CurFilename;
53public:
Chris Lattner21494222008-08-17 03:12:02 +000054 PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os)
55 : PP(pp), OS(os) {
Chris Lattner4b009652007-07-25 00:24:17 +000056 CurLine = 0;
57 CurFilename += "<uninit>";
58 EmittedTokensOnThisLine = false;
59 FileType = DirectoryLookup::NormalHeaderDir;
60 }
61
62 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
63 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
64
65 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
66 DirectoryLookup::DirType FileType);
67 virtual void Ident(SourceLocation Loc, const std::string &str);
68
69
Chris Lattner6c451292007-12-09 21:11:08 +000070 bool HandleFirstTokOnLine(Token &Tok);
71 bool MoveToLine(SourceLocation Loc);
Chris Lattner4b009652007-07-25 00:24:17 +000072 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
73};
Chris Lattner6619f662008-04-08 04:16:20 +000074} // end anonymous namespace
Chris Lattner4b009652007-07-25 00:24:17 +000075
76/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
77/// endptr points to the end of the buffer.
78static char *UToStr(unsigned N, char *EndPtr) {
79 // Null terminate the buffer.
80 *--EndPtr = '\0';
81 if (N == 0) // Zero is a special case.
82 *--EndPtr = '0';
83 while (N) {
84 *--EndPtr = '0' + char(N % 10);
85 N /= 10;
86 }
87 return EndPtr;
88}
89
90
91/// MoveToLine - Move the output to the source line specified by the location
92/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner6c451292007-12-09 21:11:08 +000093/// #line directive. This returns false if already at the specified line, true
94/// if some newlines were emitted.
95bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Chris Lattner4b009652007-07-25 00:24:17 +000096 if (DisableLineMarkers) {
Chris Lattner6c451292007-12-09 21:11:08 +000097 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
98 if (LineNo == CurLine) return false;
99
100 CurLine = LineNo;
101
102 if (!EmittedTokensOnThisLine)
103 return true;
104
Chris Lattner21494222008-08-17 03:12:02 +0000105 OS << '\n';
Chris Lattner6c451292007-12-09 21:11:08 +0000106 EmittedTokensOnThisLine = false;
107 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000108 }
109
110 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
111
112 // If this line is "close enough" to the original line, just print newlines,
113 // otherwise print a #line directive.
114 if (LineNo-CurLine < 8) {
115 if (LineNo-CurLine == 1)
Chris Lattner21494222008-08-17 03:12:02 +0000116 OS << '\n';
Chris Lattner6c451292007-12-09 21:11:08 +0000117 else if (LineNo == CurLine)
118 return false; // Phys line moved, but logical line didn't.
Chris Lattner4b009652007-07-25 00:24:17 +0000119 else {
120 const char *NewLines = "\n\n\n\n\n\n\n\n";
Chris Lattner21494222008-08-17 03:12:02 +0000121 OS.write(NewLines, LineNo-CurLine);
Chris Lattner4b009652007-07-25 00:24:17 +0000122 }
Chris Lattner45ac8172007-12-09 20:45:43 +0000123 CurLine = LineNo;
Chris Lattner4b009652007-07-25 00:24:17 +0000124 } else {
125 if (EmittedTokensOnThisLine) {
Chris Lattner21494222008-08-17 03:12:02 +0000126 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000127 EmittedTokensOnThisLine = false;
128 }
129
130 CurLine = LineNo;
131
Chris Lattner21494222008-08-17 03:12:02 +0000132 OS << '#' << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000133 char NumberBuffer[20];
134 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
Chris Lattner21494222008-08-17 03:12:02 +0000135 OS.write(NumStr, (NumberBuffer+20)-NumStr-1);
136 OS << ' ';
137 OS << '"';
138 OS.write(&CurFilename[0], CurFilename.size());
139 OS << '"';
Chris Lattner4b009652007-07-25 00:24:17 +0000140
141 if (FileType == DirectoryLookup::SystemHeaderDir)
Chris Lattner21494222008-08-17 03:12:02 +0000142 OS.write(" 3", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000143 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
Chris Lattner21494222008-08-17 03:12:02 +0000144 OS.write(" 3 4", 4);
145 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000146 }
Chris Lattner6c451292007-12-09 21:11:08 +0000147 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000148}
149
150
151/// FileChanged - Whenever the preprocessor enters or exits a #include file
152/// it invokes this handler. Update our conception of the current source
153/// position.
154void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
155 FileChangeReason Reason,
156 DirectoryLookup::DirType FileType) {
Chris Lattner4b009652007-07-25 00:24:17 +0000157 // Unless we are exiting a #include, make sure to skip ahead to the line the
158 // #include directive was at.
159 SourceManager &SourceMgr = PP.getSourceManager();
160 if (Reason == PPCallbacks::EnterFile) {
161 MoveToLine(SourceMgr.getIncludeLoc(Loc));
162 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
163 MoveToLine(Loc);
164
165 // TODO GCC emits the # directive for this directive on the line AFTER the
166 // directive and emits a bunch of spaces that aren't needed. Emulate this
167 // strange behavior.
168 }
169
170 Loc = SourceMgr.getLogicalLoc(Loc);
171 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner6c451292007-12-09 21:11:08 +0000172
173 if (DisableLineMarkers) return;
174
Chris Lattner4b009652007-07-25 00:24:17 +0000175 CurFilename.clear();
176 CurFilename += SourceMgr.getSourceName(Loc);
177 Lexer::Stringify(CurFilename);
178 FileType = FileType;
179
180 if (EmittedTokensOnThisLine) {
Chris Lattner21494222008-08-17 03:12:02 +0000181 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000182 EmittedTokensOnThisLine = false;
183 }
184
Chris Lattner21494222008-08-17 03:12:02 +0000185 OS << '#' << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000186
187 char NumberBuffer[20];
188 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
Chris Lattner21494222008-08-17 03:12:02 +0000189 OS.write(NumStr, (NumberBuffer+20)-NumStr-1);
190 OS << ' ' << '"';
191 OS.write(&CurFilename[0], CurFilename.size());
192 OS << '"';
Chris Lattner4b009652007-07-25 00:24:17 +0000193
194 switch (Reason) {
195 case PPCallbacks::EnterFile:
Chris Lattner21494222008-08-17 03:12:02 +0000196 OS.write(" 1", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000197 break;
198 case PPCallbacks::ExitFile:
Chris Lattner21494222008-08-17 03:12:02 +0000199 OS.write(" 2", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000200 break;
201 case PPCallbacks::SystemHeaderPragma: break;
202 case PPCallbacks::RenameFile: break;
203 }
204
205 if (FileType == DirectoryLookup::SystemHeaderDir)
Chris Lattner21494222008-08-17 03:12:02 +0000206 OS.write(" 3", 2);
Chris Lattner4b009652007-07-25 00:24:17 +0000207 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
Chris Lattner21494222008-08-17 03:12:02 +0000208 OS.write(" 3 4", 4);
Chris Lattner4b009652007-07-25 00:24:17 +0000209
Chris Lattner21494222008-08-17 03:12:02 +0000210 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000211}
212
213/// HandleIdent - Handle #ident directives when read by the preprocessor.
214///
215void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
216 MoveToLine(Loc);
217
Chris Lattner21494222008-08-17 03:12:02 +0000218 OS.write("#ident ", strlen("#ident "));
219 OS.write(&S[0], S.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000220 EmittedTokensOnThisLine = true;
221}
222
223/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner6c451292007-12-09 21:11:08 +0000224/// is called for the first token on each new line. If this really is the start
225/// of a new logical line, handle it and return true, otherwise return false.
226/// This may not be the start of a logical line because the "start of line"
227/// marker is set for physical lines, not logical ones.
228bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Chris Lattner4b009652007-07-25 00:24:17 +0000229 // Figure out what line we went to and insert the appropriate number of
230 // newline characters.
Chris Lattner6c451292007-12-09 21:11:08 +0000231 if (!MoveToLine(Tok.getLocation()))
232 return false;
Chris Lattner4b009652007-07-25 00:24:17 +0000233
234 // Print out space characters so that the first token on a line is
235 // indented for easy reading.
236 const SourceManager &SourceMgr = PP.getSourceManager();
237 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
238
239 // This hack prevents stuff like:
240 // #define HASH #
241 // HASH define foo bar
242 // From having the # character end up at column 1, which makes it so it
243 // is not handled as a #define next time through the preprocessor if in
244 // -fpreprocessed mode.
Chris Lattner3b494152007-10-09 18:03:42 +0000245 if (ColNo <= 1 && Tok.is(tok::hash))
Chris Lattner21494222008-08-17 03:12:02 +0000246 OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000247
248 // Otherwise, indent the appropriate number of spaces.
249 for (; ColNo > 1; --ColNo)
Chris Lattner21494222008-08-17 03:12:02 +0000250 OS << ' ';
Chris Lattner6c451292007-12-09 21:11:08 +0000251
252 return true;
Chris Lattner4b009652007-07-25 00:24:17 +0000253}
254
255namespace {
256struct UnknownPragmaHandler : public PragmaHandler {
257 const char *Prefix;
258 PrintPPOutputPPCallbacks *Callbacks;
259
260 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
261 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
262 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
263 // Figure out what line we went to and insert the appropriate number of
264 // newline characters.
265 Callbacks->MoveToLine(PragmaTok.getLocation());
Chris Lattner21494222008-08-17 03:12:02 +0000266 Callbacks->OS.write(Prefix, strlen(Prefix));
Chris Lattner4b009652007-07-25 00:24:17 +0000267
268 // Read and print all of the pragma tokens.
Chris Lattner3b494152007-10-09 18:03:42 +0000269 while (PragmaTok.isNot(tok::eom)) {
Chris Lattner4b009652007-07-25 00:24:17 +0000270 if (PragmaTok.hasLeadingSpace())
Chris Lattner21494222008-08-17 03:12:02 +0000271 Callbacks->OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000272 std::string TokSpell = PP.getSpelling(PragmaTok);
Chris Lattner21494222008-08-17 03:12:02 +0000273 Callbacks->OS.write(&TokSpell[0], TokSpell.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000274 PP.LexUnexpandedToken(PragmaTok);
275 }
Chris Lattner21494222008-08-17 03:12:02 +0000276 Callbacks->OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000277 }
278};
279} // end anonymous namespace
280
281
282enum AvoidConcatInfo {
283 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
284 /// ',', ')', etc) don't cause a problem when concatenated.
285 aci_never_avoid_concat = 0,
286
287 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
288 /// token's requirements, and it needs to know the first character of the
289 /// token.
290 aci_custom_firstchar = 1,
291
292 /// aci_custom - AvoidConcat contains custom code to handle this token's
293 /// requirements, but it doesn't need to know the first character of the
294 /// token.
295 aci_custom = 2,
296
297 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
298 /// character. For example, "<<" turns into "<<=" when followed by an =.
299 aci_avoid_equal = 4
300};
301
302/// This array contains information for each token on what action to take when
303/// avoiding concatenation of tokens in the AvoidConcat method.
304static char TokenInfo[tok::NUM_TOKENS];
305
306/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
307/// marked by this function.
308static void InitAvoidConcatTokenInfo() {
309 // These tokens have custom code in AvoidConcat.
310 TokenInfo[tok::identifier ] |= aci_custom;
311 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
312 TokenInfo[tok::period ] |= aci_custom_firstchar;
313 TokenInfo[tok::amp ] |= aci_custom_firstchar;
314 TokenInfo[tok::plus ] |= aci_custom_firstchar;
315 TokenInfo[tok::minus ] |= aci_custom_firstchar;
316 TokenInfo[tok::slash ] |= aci_custom_firstchar;
317 TokenInfo[tok::less ] |= aci_custom_firstchar;
318 TokenInfo[tok::greater ] |= aci_custom_firstchar;
319 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
320 TokenInfo[tok::percent ] |= aci_custom_firstchar;
321 TokenInfo[tok::colon ] |= aci_custom_firstchar;
322 TokenInfo[tok::hash ] |= aci_custom_firstchar;
323 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
324
325 // These tokens change behavior if followed by an '='.
326 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
327 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
328 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
329 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
330 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
331 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
332 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
333 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
334 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
335 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
336 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
337 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
338 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
339 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
340}
341
Chris Lattnerafa40122008-01-15 05:22:14 +0000342/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner400f0242008-01-15 05:14:19 +0000343static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner400f0242008-01-15 05:14:19 +0000344 if (!Tok.needsCleaning()) {
345 SourceManager &SrcMgr = PP.getSourceManager();
346 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
347 == 'L';
348 }
349
350 if (Tok.getLength() < 256) {
Chris Lattnerafa40122008-01-15 05:22:14 +0000351 char Buffer[256];
Chris Lattner400f0242008-01-15 05:14:19 +0000352 const char *TokPtr = Buffer;
353 PP.getSpelling(Tok, TokPtr);
354 return TokPtr[0] == 'L';
355 }
356
357 return PP.getSpelling(Tok)[0] == 'L';
358}
359
Chris Lattnerafa40122008-01-15 05:22:14 +0000360/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
361static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
362 if (!Tok.needsCleaning()) {
363 if (Tok.getLength() != 1)
364 return false;
365 SourceManager &SrcMgr = PP.getSourceManager();
366 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
367 == 'L';
368 }
369
370 if (Tok.getLength() < 256) {
371 char Buffer[256];
372 const char *TokPtr = Buffer;
373 if (PP.getSpelling(Tok, TokPtr) != 1)
374 return false;
375 return TokPtr[0] == 'L';
376 }
377
378 return PP.getSpelling(Tok) == "L";
379}
380
381
Chris Lattner4b009652007-07-25 00:24:17 +0000382/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
383/// the two individual tokens to be lexed as a single token, return true (which
384/// causes a space to be printed between them). This allows the output of -E
385/// mode to be lexed to the same token stream as lexing the input directly
386/// would.
387///
388/// This code must conservatively return true if it doesn't want to be 100%
389/// accurate. This will cause the output to include extra space characters, but
390/// the resulting output won't have incorrect concatenations going on. Examples
391/// include "..", which we print with a space between, because we don't want to
392/// track enough to tell "x.." from "...".
393bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
394 const Token &Tok) {
395 char Buffer[256];
396
397 tok::TokenKind PrevKind = PrevTok.getKind();
398 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
399 PrevKind = tok::identifier;
400
401 // Look up information on when we should avoid concatenation with prevtok.
402 unsigned ConcatInfo = TokenInfo[PrevKind];
403
404 // If prevtok never causes a problem for anything after it, return quickly.
405 if (ConcatInfo == 0) return false;
406
407 if (ConcatInfo & aci_avoid_equal) {
408 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner3b494152007-10-09 18:03:42 +0000409 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattner4b009652007-07-25 00:24:17 +0000410 return true;
411 ConcatInfo &= ~aci_avoid_equal;
412 }
413
414 if (ConcatInfo == 0) return false;
415
416
417
418 // Basic algorithm: we look at the first character of the second token, and
419 // determine whether it, if appended to the first token, would form (or would
420 // contribute) to a larger token if concatenated.
421 char FirstChar = 0;
422 if (ConcatInfo & aci_custom) {
423 // If the token does not need to know the first character, don't get it.
424 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
425 // Avoid spelling identifiers, the most common form of token.
426 FirstChar = II->getName()[0];
427 } else if (!Tok.needsCleaning()) {
428 SourceManager &SrcMgr = PP.getSourceManager();
429 FirstChar =
430 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
431 } else if (Tok.getLength() < 256) {
432 const char *TokPtr = Buffer;
433 PP.getSpelling(Tok, TokPtr);
434 FirstChar = TokPtr[0];
435 } else {
436 FirstChar = PP.getSpelling(Tok)[0];
437 }
438
439 switch (PrevKind) {
440 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
441 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner3b494152007-10-09 18:03:42 +0000442 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
443 Tok.is(tok::wide_string_literal) /* ||
444 Tok.is(tok::wide_char_literal)*/)
Chris Lattner4b009652007-07-25 00:24:17 +0000445 return true;
Chris Lattner400f0242008-01-15 05:14:19 +0000446
447 // If this isn't identifier + string, we're done.
448 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner4b009652007-07-25 00:24:17 +0000449 return false;
450
451 // FIXME: need a wide_char_constant!
Chris Lattner400f0242008-01-15 05:14:19 +0000452
453 // If the string was a wide string L"foo" or wide char L'f', it would concat
454 // with the previous identifier into fooL"bar". Avoid this.
455 if (StartsWithL(Tok, PP))
456 return true;
457
Chris Lattnerafa40122008-01-15 05:22:14 +0000458 // Otherwise, this is a narrow character or string. If the *identifier* is
459 // a literal 'L', avoid pasting L "foo" -> L"foo".
460 return IsIdentifierL(PrevTok, PP);
Chris Lattner4b009652007-07-25 00:24:17 +0000461 case tok::numeric_constant:
Chris Lattner3b494152007-10-09 18:03:42 +0000462 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Chris Lattner4b009652007-07-25 00:24:17 +0000463 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
464 case tok::period: // ..., .*, .1234
465 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
466 case tok::amp: // &&
467 return FirstChar == '&';
468 case tok::plus: // ++
469 return FirstChar == '+';
470 case tok::minus: // --, ->, ->*
471 return FirstChar == '-' || FirstChar == '>';
472 case tok::slash: //, /*, //
473 return FirstChar == '*' || FirstChar == '/';
474 case tok::less: // <<, <<=, <:, <%
475 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
476 case tok::greater: // >>, >>=
477 return FirstChar == '>';
478 case tok::pipe: // ||
479 return FirstChar == '|';
480 case tok::percent: // %>, %:
481 return FirstChar == '>' || FirstChar == ':';
482 case tok::colon: // ::, :>
483 return FirstChar == ':' || FirstChar == '>';
484 case tok::hash: // ##, #@, %:%:
485 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
486 case tok::arrow: // ->*
487 return FirstChar == '*';
488 }
489}
490
491/// DoPrintPreprocessedInput - This implements -E mode.
492///
Chris Lattner6619f662008-04-08 04:16:20 +0000493void clang::DoPrintPreprocessedInput(Preprocessor &PP,
494 const std::string &OutFile) {
Chris Lattner4b009652007-07-25 00:24:17 +0000495 // Inform the preprocessor whether we want it to retain comments or not, due
496 // to -C or -CC.
497 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
Chris Lattner4b009652007-07-25 00:24:17 +0000498 InitAvoidConcatTokenInfo();
Chris Lattner21494222008-08-17 03:12:02 +0000499
500
501 // Open the output buffer.
502 static llvm::raw_ostream *OutStream;
503
504 if (!OutFile.size() || OutFile == "-") {
505 OutStream = new llvm::raw_stdout_ostream();
506 } else {
507 std::string Err;
508 OutStream = new llvm::raw_fd_ostream(OutFile.c_str(), Err);
509
510 if (!Err.empty()) {
511 delete OutStream;
512 fprintf(stderr, "%s\n", Err.c_str());
513 exit(1);
514 }
515 }
516 OutStream->SetBufferSize(64*1024);
517
518 llvm::raw_ostream &OS = *OutStream;
Chris Lattner4b009652007-07-25 00:24:17 +0000519
520 Token Tok, PrevTok;
521 char Buffer[256];
Chris Lattner21494222008-08-17 03:12:02 +0000522 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP, OS);
Chris Lattner4b009652007-07-25 00:24:17 +0000523 PP.setPPCallbacks(Callbacks);
524
525 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
526 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
527
528 // After we have configured the preprocessor, enter the main file.
529
530 // Start parsing the specified input file.
Ted Kremenek17861c52007-12-19 22:51:13 +0000531 PP.EnterMainSourceFile();
Chris Lattner3eddc862007-10-10 20:45:16 +0000532
533 // Consume all of the tokens that come from the predefines buffer. Those
534 // should not be emitted into the output and are guaranteed to be at the
535 // start.
536 const SourceManager &SourceMgr = PP.getSourceManager();
537 do PP.Lex(Tok);
Chris Lattner890c5932007-10-10 23:31:03 +0000538 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner3eddc862007-10-10 20:45:16 +0000539 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
540
541 while (1) {
Chris Lattner4b009652007-07-25 00:24:17 +0000542
543 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner6c451292007-12-09 21:11:08 +0000544 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
545 // done.
Chris Lattner4b009652007-07-25 00:24:17 +0000546 } else if (Tok.hasLeadingSpace() ||
547 // If we haven't emitted a token on this line yet, PrevTok isn't
548 // useful to look at and no concatenation could happen anyway.
549 (Callbacks->hasEmittedTokensOnThisLine() &&
550 // Don't print "-" next to "-", it would form "--".
551 Callbacks->AvoidConcat(PrevTok, Tok))) {
Chris Lattner21494222008-08-17 03:12:02 +0000552 OS << ' ';
Chris Lattner4b009652007-07-25 00:24:17 +0000553 }
554
555 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
556 const char *Str = II->getName();
557 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
Chris Lattner21494222008-08-17 03:12:02 +0000558 OS.write(Str, Len);
Chris Lattner4b009652007-07-25 00:24:17 +0000559 } else if (Tok.getLength() < 256) {
560 const char *TokPtr = Buffer;
561 unsigned Len = PP.getSpelling(Tok, TokPtr);
Chris Lattner21494222008-08-17 03:12:02 +0000562 OS.write(TokPtr, Len);
Chris Lattner4b009652007-07-25 00:24:17 +0000563 } else {
564 std::string S = PP.getSpelling(Tok);
Chris Lattner21494222008-08-17 03:12:02 +0000565 OS.write(&S[0], S.size());
Chris Lattner4b009652007-07-25 00:24:17 +0000566 }
567 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner3eddc862007-10-10 20:45:16 +0000568
569 if (Tok.is(tok::eof)) break;
570
571 PrevTok = Tok;
572 PP.Lex(Tok);
573 }
Chris Lattner21494222008-08-17 03:12:02 +0000574 OS << '\n';
Chris Lattner4b009652007-07-25 00:24:17 +0000575
Chris Lattner21494222008-08-17 03:12:02 +0000576 // Flush and free the ostream.
577 delete &OS;
578
579 // If an error occurred, remove the output file.
580 if (PP.getDiagnostics().hasErrorOccurred() && !OutFile.empty())
581 llvm::sys::Path(OutFile).eraseFromDisk();
Chris Lattner4b009652007-07-25 00:24:17 +0000582}
583