blob: 105e99e9c481f16a497ae7b729eadde1cc99e858 [file] [log] [blame]
Reid Spencer5f016e22007-07-11 17:01:13 +00001//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
Chris Lattner0bc735f2007-12-29 19:59:25 +00005// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
Reid Spencer5f016e22007-07-11 17:01:13 +00007//
8//===----------------------------------------------------------------------===//
9//
10// This code simply runs the preprocessor on the input file and prints out the
11// result. This is the traditional behavior of the -E option.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang.h"
16#include "clang/Lex/PPCallbacks.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Lex/Pragma.h"
19#include "clang/Basic/SourceManager.h"
Chris Lattner5db17c92008-04-08 04:16:20 +000020#include "clang/Basic/Diagnostic.h"
Chris Lattnerd8e30832007-07-24 06:57:14 +000021#include "llvm/ADT/SmallString.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000022#include "llvm/ADT/StringExtras.h"
Chris Lattner5db17c92008-04-08 04:16:20 +000023#include "llvm/System/Path.h"
24#include "llvm/Support/CommandLine.h"
Reid Spencer5f016e22007-07-11 17:01:13 +000025#include "llvm/Config/config.h"
26#include <cstdio>
27using namespace clang;
28
29//===----------------------------------------------------------------------===//
30// Simple buffered I/O
31//===----------------------------------------------------------------------===//
32//
33// Empirically, iostream is over 30% slower than stdio for this workload, and
34// stdio itself isn't very well suited. The problem with stdio is use of
35// putchar_unlocked. We have many newline characters that need to be emitted,
36// but stdio needs to do extra checks to handle line buffering mode. These
37// extra checks make putchar_unlocked fall off its inlined code path, hitting
38// slow system code. In practice, using 'write' directly makes 'clang -E -P'
39// about 10% faster than using the stdio path on darwin.
40
Chris Lattnere988bc22008-01-27 23:55:11 +000041#if defined(HAVE_UNISTD_H) && defined(HAVE_FCNTL_H)
Reid Spencer5f016e22007-07-11 17:01:13 +000042#include <unistd.h>
Chris Lattnere988bc22008-01-27 23:55:11 +000043#include <fcntl.h>
Reid Spencer5f016e22007-07-11 17:01:13 +000044#else
45#define USE_STDIO 1
46#endif
47
Chris Lattner5db17c92008-04-08 04:16:20 +000048static std::string OutputFilename;
Chris Lattnere988bc22008-01-27 23:55:11 +000049#ifdef USE_STDIO
Chris Lattner5db17c92008-04-08 04:16:20 +000050static FILE *OutputFILE;
Chris Lattnere988bc22008-01-27 23:55:11 +000051#else
52static int OutputFD;
Reid Spencer5f016e22007-07-11 17:01:13 +000053static char *OutBufStart = 0, *OutBufEnd, *OutBufCur;
Chris Lattnere988bc22008-01-27 23:55:11 +000054#endif
Reid Spencer5f016e22007-07-11 17:01:13 +000055
56/// InitOutputBuffer - Initialize our output buffer.
57///
Chris Lattnere988bc22008-01-27 23:55:11 +000058static void InitOutputBuffer(const std::string& Output) {
59#ifdef USE_STDIO
60 if (!Output.size() || Output == "-")
61 OutputFILE = stdout;
Chris Lattner5db17c92008-04-08 04:16:20 +000062 else {
Chris Lattner5db17c92008-04-08 04:16:20 +000063 OutputFilename = Output;
Chris Lattner8808f002008-04-11 06:14:11 +000064 OutputFILE = fopen(Output.c_str(), "w+");
65
66 if (OutputFILE == 0) {
67 fprintf(stderr, "Error opening output file '%s'.\n", Output.c_str());
68 exit(1);
69 }
70
Chris Lattner5db17c92008-04-08 04:16:20 +000071 }
Chris Lattnere988bc22008-01-27 23:55:11 +000072
73 assert(OutputFILE && "failed to open output file");
74#else
Reid Spencer5f016e22007-07-11 17:01:13 +000075 OutBufStart = new char[64*1024];
76 OutBufEnd = OutBufStart+64*1024;
77 OutBufCur = OutBufStart;
Chris Lattnere988bc22008-01-27 23:55:11 +000078
79 if (!Output.size() || Output == "-")
80 OutputFD = STDOUT_FILENO;
Chris Lattner5db17c92008-04-08 04:16:20 +000081 else {
Chris Lattner5db17c92008-04-08 04:16:20 +000082 OutputFilename = Output;
Chris Lattner8808f002008-04-11 06:14:11 +000083 OutputFD = open(Output.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0644);
84 if (OutputFD < 0) {
85 fprintf(stderr, "Error opening output file '%s'.\n", Output.c_str());
86 exit(1);
87 }
Chris Lattner5db17c92008-04-08 04:16:20 +000088 }
Reid Spencer5f016e22007-07-11 17:01:13 +000089#endif
90}
91
Chris Lattnere988bc22008-01-27 23:55:11 +000092#ifndef USE_STDIO
Reid Spencer5f016e22007-07-11 17:01:13 +000093/// FlushBuffer - Write the accumulated bytes to the output stream.
94///
95static void FlushBuffer() {
Chris Lattnere988bc22008-01-27 23:55:11 +000096 write(OutputFD, OutBufStart, OutBufCur-OutBufStart);
Reid Spencer5f016e22007-07-11 17:01:13 +000097 OutBufCur = OutBufStart;
Reid Spencer5f016e22007-07-11 17:01:13 +000098}
Chris Lattnere988bc22008-01-27 23:55:11 +000099#endif
Reid Spencer5f016e22007-07-11 17:01:13 +0000100
101/// CleanupOutputBuffer - Finish up output.
102///
Chris Lattner5db17c92008-04-08 04:16:20 +0000103static void CleanupOutputBuffer(bool ErrorOccurred) {
104#ifdef USE_STDIO
105 if (OutputFILE != stdout)
106 fclose(OutputFILE);
107#else
Reid Spencer5f016e22007-07-11 17:01:13 +0000108 FlushBuffer();
109 delete [] OutBufStart;
Chris Lattner5db17c92008-04-08 04:16:20 +0000110 if (OutputFD != STDOUT_FILENO)
111 close(OutputFD);
Reid Spencer5f016e22007-07-11 17:01:13 +0000112#endif
Chris Lattner5db17c92008-04-08 04:16:20 +0000113
114 // If an error occurred, remove the output file.
115 if (ErrorOccurred && !OutputFilename.empty())
116 llvm::sys::Path(OutputFilename).eraseFromDisk();
Reid Spencer5f016e22007-07-11 17:01:13 +0000117}
118
119static void OutputChar(char c) {
Chris Lattner6a4545e2007-09-03 18:24:56 +0000120#if defined(_MSC_VER)
Chris Lattnere988bc22008-01-27 23:55:11 +0000121 putc(c, OutputFILE);
Chris Lattner6a4545e2007-09-03 18:24:56 +0000122#elif defined(USE_STDIO)
Chris Lattnere988bc22008-01-27 23:55:11 +0000123 putc_unlocked(c, OutputFILE);
Reid Spencer5f016e22007-07-11 17:01:13 +0000124#else
125 if (OutBufCur >= OutBufEnd)
126 FlushBuffer();
127 *OutBufCur++ = c;
128#endif
129}
130
131static void OutputString(const char *Ptr, unsigned Size) {
132#ifdef USE_STDIO
Chris Lattnere988bc22008-01-27 23:55:11 +0000133 fwrite(Ptr, Size, 1, OutputFILE);
Reid Spencer5f016e22007-07-11 17:01:13 +0000134#else
135 if (OutBufCur+Size >= OutBufEnd)
136 FlushBuffer();
Chris Lattnere225e372007-07-23 06:23:07 +0000137
138 switch (Size) {
139 default:
140 memcpy(OutBufCur, Ptr, Size);
141 break;
142 case 3:
143 OutBufCur[2] = Ptr[2];
144 case 2:
145 OutBufCur[1] = Ptr[1];
146 case 1:
147 OutBufCur[0] = Ptr[0];
148 case 0:
149 break;
150 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000151 OutBufCur += Size;
152#endif
153}
154
155
156//===----------------------------------------------------------------------===//
157// Preprocessed token printer
158//===----------------------------------------------------------------------===//
159
160static llvm::cl::opt<bool>
161DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
162static llvm::cl::opt<bool>
163EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
164static llvm::cl::opt<bool>
165EnableMacroCommentOutput("CC",
166 llvm::cl::desc("Enable comment output in -E mode, "
167 "even from macro expansions"));
168
169namespace {
170class PrintPPOutputPPCallbacks : public PPCallbacks {
171 Preprocessor &PP;
172 unsigned CurLine;
Reid Spencer5f016e22007-07-11 17:01:13 +0000173 bool EmittedTokensOnThisLine;
174 DirectoryLookup::DirType FileType;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000175 llvm::SmallString<512> CurFilename;
Reid Spencer5f016e22007-07-11 17:01:13 +0000176public:
177 PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
178 CurLine = 0;
Chris Lattnerd8e30832007-07-24 06:57:14 +0000179 CurFilename += "<uninit>";
Reid Spencer5f016e22007-07-11 17:01:13 +0000180 EmittedTokensOnThisLine = false;
181 FileType = DirectoryLookup::NormalHeaderDir;
182 }
183
184 void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000185 bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer5f016e22007-07-11 17:01:13 +0000186
187 virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
188 DirectoryLookup::DirType FileType);
189 virtual void Ident(SourceLocation Loc, const std::string &str);
190
191
Chris Lattner5f180322007-12-09 21:11:08 +0000192 bool HandleFirstTokOnLine(Token &Tok);
193 bool MoveToLine(SourceLocation Loc);
Chris Lattnerd2177732007-07-20 16:59:19 +0000194 bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Reid Spencer5f016e22007-07-11 17:01:13 +0000195};
Chris Lattner5db17c92008-04-08 04:16:20 +0000196} // end anonymous namespace
Reid Spencer5f016e22007-07-11 17:01:13 +0000197
Chris Lattnerf0637212007-07-23 06:31:11 +0000198/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
199/// endptr points to the end of the buffer.
200static char *UToStr(unsigned N, char *EndPtr) {
201 // Null terminate the buffer.
202 *--EndPtr = '\0';
203 if (N == 0) // Zero is a special case.
204 *--EndPtr = '0';
205 while (N) {
206 *--EndPtr = '0' + char(N % 10);
207 N /= 10;
208 }
209 return EndPtr;
210}
211
212
Reid Spencer5f016e22007-07-11 17:01:13 +0000213/// MoveToLine - Move the output to the source line specified by the location
214/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner5f180322007-12-09 21:11:08 +0000215/// #line directive. This returns false if already at the specified line, true
216/// if some newlines were emitted.
217bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000218 if (DisableLineMarkers) {
Chris Lattner5f180322007-12-09 21:11:08 +0000219 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
220 if (LineNo == CurLine) return false;
221
222 CurLine = LineNo;
223
224 if (!EmittedTokensOnThisLine)
225 return true;
226
227 OutputChar('\n');
228 EmittedTokensOnThisLine = false;
229 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000230 }
231
Chris Lattner9dc1f532007-07-20 16:37:10 +0000232 unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000233
234 // If this line is "close enough" to the original line, just print newlines,
235 // otherwise print a #line directive.
236 if (LineNo-CurLine < 8) {
Chris Lattner822f9402007-07-23 05:14:05 +0000237 if (LineNo-CurLine == 1)
Reid Spencer5f016e22007-07-11 17:01:13 +0000238 OutputChar('\n');
Chris Lattner5f180322007-12-09 21:11:08 +0000239 else if (LineNo == CurLine)
240 return false; // Phys line moved, but logical line didn't.
Chris Lattner822f9402007-07-23 05:14:05 +0000241 else {
242 const char *NewLines = "\n\n\n\n\n\n\n\n";
243 OutputString(NewLines, LineNo-CurLine);
Chris Lattner822f9402007-07-23 05:14:05 +0000244 }
Chris Lattner5c0887c2007-12-09 20:45:43 +0000245 CurLine = LineNo;
Reid Spencer5f016e22007-07-11 17:01:13 +0000246 } else {
247 if (EmittedTokensOnThisLine) {
248 OutputChar('\n');
249 EmittedTokensOnThisLine = false;
250 }
251
252 CurLine = LineNo;
253
254 OutputChar('#');
255 OutputChar(' ');
Chris Lattnerf0637212007-07-23 06:31:11 +0000256 char NumberBuffer[20];
257 const char *NumStr = UToStr(LineNo, NumberBuffer+20);
258 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000259 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000260 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000261 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000262 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000263
264 if (FileType == DirectoryLookup::SystemHeaderDir)
265 OutputString(" 3", 2);
266 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
267 OutputString(" 3 4", 4);
268 OutputChar('\n');
269 }
Chris Lattner5f180322007-12-09 21:11:08 +0000270 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000271}
272
273
274/// FileChanged - Whenever the preprocessor enters or exits a #include file
275/// it invokes this handler. Update our conception of the current source
276/// position.
277void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
278 FileChangeReason Reason,
279 DirectoryLookup::DirType FileType) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000280 // Unless we are exiting a #include, make sure to skip ahead to the line the
281 // #include directive was at.
282 SourceManager &SourceMgr = PP.getSourceManager();
283 if (Reason == PPCallbacks::EnterFile) {
Chris Lattner9dc1f532007-07-20 16:37:10 +0000284 MoveToLine(SourceMgr.getIncludeLoc(Loc));
Reid Spencer5f016e22007-07-11 17:01:13 +0000285 } else if (Reason == PPCallbacks::SystemHeaderPragma) {
286 MoveToLine(Loc);
287
288 // TODO GCC emits the # directive for this directive on the line AFTER the
289 // directive and emits a bunch of spaces that aren't needed. Emulate this
290 // strange behavior.
291 }
292
Chris Lattner9dc1f532007-07-20 16:37:10 +0000293 Loc = SourceMgr.getLogicalLoc(Loc);
Reid Spencer5f016e22007-07-11 17:01:13 +0000294 CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner5f180322007-12-09 21:11:08 +0000295
296 if (DisableLineMarkers) return;
297
Chris Lattnerd8e30832007-07-24 06:57:14 +0000298 CurFilename.clear();
299 CurFilename += SourceMgr.getSourceName(Loc);
300 Lexer::Stringify(CurFilename);
Reid Spencer5f016e22007-07-11 17:01:13 +0000301 FileType = FileType;
302
303 if (EmittedTokensOnThisLine) {
304 OutputChar('\n');
305 EmittedTokensOnThisLine = false;
306 }
307
Reid Spencer5f016e22007-07-11 17:01:13 +0000308 OutputChar('#');
309 OutputChar(' ');
Chris Lattner51431962007-07-24 06:59:01 +0000310
311 char NumberBuffer[20];
312 const char *NumStr = UToStr(CurLine, NumberBuffer+20);
313 OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer5f016e22007-07-11 17:01:13 +0000314 OutputChar(' ');
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000315 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000316 OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner0cbc4b52007-07-22 06:38:50 +0000317 OutputChar('"');
Reid Spencer5f016e22007-07-11 17:01:13 +0000318
319 switch (Reason) {
320 case PPCallbacks::EnterFile:
321 OutputString(" 1", 2);
322 break;
323 case PPCallbacks::ExitFile:
324 OutputString(" 2", 2);
325 break;
326 case PPCallbacks::SystemHeaderPragma: break;
327 case PPCallbacks::RenameFile: break;
328 }
329
330 if (FileType == DirectoryLookup::SystemHeaderDir)
331 OutputString(" 3", 2);
332 else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
333 OutputString(" 3 4", 4);
334
335 OutputChar('\n');
336}
337
338/// HandleIdent - Handle #ident directives when read by the preprocessor.
339///
340void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
341 MoveToLine(Loc);
342
343 OutputString("#ident ", strlen("#ident "));
344 OutputString(&S[0], S.size());
345 EmittedTokensOnThisLine = true;
346}
347
348/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner5f180322007-12-09 21:11:08 +0000349/// is called for the first token on each new line. If this really is the start
350/// of a new logical line, handle it and return true, otherwise return false.
351/// This may not be the start of a logical line because the "start of line"
352/// marker is set for physical lines, not logical ones.
353bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000354 // Figure out what line we went to and insert the appropriate number of
355 // newline characters.
Chris Lattner5f180322007-12-09 21:11:08 +0000356 if (!MoveToLine(Tok.getLocation()))
357 return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000358
359 // Print out space characters so that the first token on a line is
360 // indented for easy reading.
Chris Lattner9dc1f532007-07-20 16:37:10 +0000361 const SourceManager &SourceMgr = PP.getSourceManager();
362 unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Reid Spencer5f016e22007-07-11 17:01:13 +0000363
364 // This hack prevents stuff like:
365 // #define HASH #
366 // HASH define foo bar
367 // From having the # character end up at column 1, which makes it so it
368 // is not handled as a #define next time through the preprocessor if in
369 // -fpreprocessed mode.
Chris Lattner057aaf62007-10-09 18:03:42 +0000370 if (ColNo <= 1 && Tok.is(tok::hash))
Reid Spencer5f016e22007-07-11 17:01:13 +0000371 OutputChar(' ');
372
373 // Otherwise, indent the appropriate number of spaces.
374 for (; ColNo > 1; --ColNo)
375 OutputChar(' ');
Chris Lattner5f180322007-12-09 21:11:08 +0000376
377 return true;
Reid Spencer5f016e22007-07-11 17:01:13 +0000378}
379
380namespace {
381struct UnknownPragmaHandler : public PragmaHandler {
382 const char *Prefix;
383 PrintPPOutputPPCallbacks *Callbacks;
384
385 UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
386 : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattnerd2177732007-07-20 16:59:19 +0000387 virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000388 // Figure out what line we went to and insert the appropriate number of
389 // newline characters.
390 Callbacks->MoveToLine(PragmaTok.getLocation());
391 OutputString(Prefix, strlen(Prefix));
392
393 // Read and print all of the pragma tokens.
Chris Lattner057aaf62007-10-09 18:03:42 +0000394 while (PragmaTok.isNot(tok::eom)) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000395 if (PragmaTok.hasLeadingSpace())
396 OutputChar(' ');
397 std::string TokSpell = PP.getSpelling(PragmaTok);
398 OutputString(&TokSpell[0], TokSpell.size());
399 PP.LexUnexpandedToken(PragmaTok);
400 }
401 OutputChar('\n');
402 }
403};
404} // end anonymous namespace
405
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000406
407enum AvoidConcatInfo {
408 /// By default, a token never needs to avoid concatenation. Most tokens (e.g.
409 /// ',', ')', etc) don't cause a problem when concatenated.
410 aci_never_avoid_concat = 0,
411
412 /// aci_custom_firstchar - AvoidConcat contains custom code to handle this
413 /// token's requirements, and it needs to know the first character of the
414 /// token.
415 aci_custom_firstchar = 1,
416
417 /// aci_custom - AvoidConcat contains custom code to handle this token's
418 /// requirements, but it doesn't need to know the first character of the
419 /// token.
420 aci_custom = 2,
421
422 /// aci_avoid_equal - Many tokens cannot be safely followed by an '='
423 /// character. For example, "<<" turns into "<<=" when followed by an =.
424 aci_avoid_equal = 4
425};
426
427/// This array contains information for each token on what action to take when
428/// avoiding concatenation of tokens in the AvoidConcat method.
429static char TokenInfo[tok::NUM_TOKENS];
430
431/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
432/// marked by this function.
433static void InitAvoidConcatTokenInfo() {
434 // These tokens have custom code in AvoidConcat.
435 TokenInfo[tok::identifier ] |= aci_custom;
436 TokenInfo[tok::numeric_constant] |= aci_custom_firstchar;
437 TokenInfo[tok::period ] |= aci_custom_firstchar;
438 TokenInfo[tok::amp ] |= aci_custom_firstchar;
439 TokenInfo[tok::plus ] |= aci_custom_firstchar;
440 TokenInfo[tok::minus ] |= aci_custom_firstchar;
441 TokenInfo[tok::slash ] |= aci_custom_firstchar;
442 TokenInfo[tok::less ] |= aci_custom_firstchar;
443 TokenInfo[tok::greater ] |= aci_custom_firstchar;
444 TokenInfo[tok::pipe ] |= aci_custom_firstchar;
445 TokenInfo[tok::percent ] |= aci_custom_firstchar;
446 TokenInfo[tok::colon ] |= aci_custom_firstchar;
447 TokenInfo[tok::hash ] |= aci_custom_firstchar;
448 TokenInfo[tok::arrow ] |= aci_custom_firstchar;
449
450 // These tokens change behavior if followed by an '='.
451 TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
452 TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
453 TokenInfo[tok::minus ] |= aci_avoid_equal; // -=
454 TokenInfo[tok::slash ] |= aci_avoid_equal; // /=
455 TokenInfo[tok::less ] |= aci_avoid_equal; // <=
456 TokenInfo[tok::greater ] |= aci_avoid_equal; // >=
457 TokenInfo[tok::pipe ] |= aci_avoid_equal; // |=
458 TokenInfo[tok::percent ] |= aci_avoid_equal; // %=
459 TokenInfo[tok::star ] |= aci_avoid_equal; // *=
460 TokenInfo[tok::exclaim ] |= aci_avoid_equal; // !=
461 TokenInfo[tok::lessless ] |= aci_avoid_equal; // <<=
462 TokenInfo[tok::greaterequal] |= aci_avoid_equal; // >>=
463 TokenInfo[tok::caret ] |= aci_avoid_equal; // ^=
464 TokenInfo[tok::equal ] |= aci_avoid_equal; // ==
465}
466
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000467/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000468static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000469 if (!Tok.needsCleaning()) {
470 SourceManager &SrcMgr = PP.getSourceManager();
471 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
472 == 'L';
473 }
474
475 if (Tok.getLength() < 256) {
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000476 char Buffer[256];
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000477 const char *TokPtr = Buffer;
478 PP.getSpelling(Tok, TokPtr);
479 return TokPtr[0] == 'L';
480 }
481
482 return PP.getSpelling(Tok)[0] == 'L';
483}
484
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000485/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
486static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
487 if (!Tok.needsCleaning()) {
488 if (Tok.getLength() != 1)
489 return false;
490 SourceManager &SrcMgr = PP.getSourceManager();
491 return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
492 == 'L';
493 }
494
495 if (Tok.getLength() < 256) {
496 char Buffer[256];
497 const char *TokPtr = Buffer;
498 if (PP.getSpelling(Tok, TokPtr) != 1)
499 return false;
500 return TokPtr[0] == 'L';
501 }
502
503 return PP.getSpelling(Tok) == "L";
504}
505
506
Reid Spencer5f016e22007-07-11 17:01:13 +0000507/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
508/// the two individual tokens to be lexed as a single token, return true (which
509/// causes a space to be printed between them). This allows the output of -E
510/// mode to be lexed to the same token stream as lexing the input directly
511/// would.
512///
513/// This code must conservatively return true if it doesn't want to be 100%
514/// accurate. This will cause the output to include extra space characters, but
515/// the resulting output won't have incorrect concatenations going on. Examples
516/// include "..", which we print with a space between, because we don't want to
517/// track enough to tell "x.." from "...".
Chris Lattnerd2177732007-07-20 16:59:19 +0000518bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
519 const Token &Tok) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000520 char Buffer[256];
521
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000522 tok::TokenKind PrevKind = PrevTok.getKind();
523 if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
524 PrevKind = tok::identifier;
525
526 // Look up information on when we should avoid concatenation with prevtok.
527 unsigned ConcatInfo = TokenInfo[PrevKind];
528
529 // If prevtok never causes a problem for anything after it, return quickly.
530 if (ConcatInfo == 0) return false;
Reid Spencer5f016e22007-07-11 17:01:13 +0000531
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000532 if (ConcatInfo & aci_avoid_equal) {
533 // If the next token is '=' or '==', avoid concatenation.
Chris Lattner057aaf62007-10-09 18:03:42 +0000534 if (Tok.is(tok::equal) || Tok.is(tok::equalequal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000535 return true;
Chris Lattnerb638a302007-07-23 23:21:34 +0000536 ConcatInfo &= ~aci_avoid_equal;
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000537 }
538
539 if (ConcatInfo == 0) return false;
540
541
542
Reid Spencer5f016e22007-07-11 17:01:13 +0000543 // Basic algorithm: we look at the first character of the second token, and
544 // determine whether it, if appended to the first token, would form (or would
545 // contribute) to a larger token if concatenated.
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000546 char FirstChar = 0;
547 if (ConcatInfo & aci_custom) {
548 // If the token does not need to know the first character, don't get it.
549 } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000550 // Avoid spelling identifiers, the most common form of token.
551 FirstChar = II->getName()[0];
Chris Lattnerb19f5e82007-07-23 05:18:42 +0000552 } else if (!Tok.needsCleaning()) {
553 SourceManager &SrcMgr = PP.getSourceManager();
554 FirstChar =
555 *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Reid Spencer5f016e22007-07-11 17:01:13 +0000556 } else if (Tok.getLength() < 256) {
557 const char *TokPtr = Buffer;
558 PP.getSpelling(Tok, TokPtr);
559 FirstChar = TokPtr[0];
560 } else {
561 FirstChar = PP.getSpelling(Tok)[0];
562 }
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000563
Reid Spencer5f016e22007-07-11 17:01:13 +0000564 switch (PrevKind) {
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000565 default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer5f016e22007-07-11 17:01:13 +0000566 case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner057aaf62007-10-09 18:03:42 +0000567 if (Tok.is(tok::numeric_constant) || Tok.getIdentifierInfo() ||
568 Tok.is(tok::wide_string_literal) /* ||
569 Tok.is(tok::wide_char_literal)*/)
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000570 return true;
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000571
572 // If this isn't identifier + string, we're done.
573 if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000574 return false;
575
576 // FIXME: need a wide_char_constant!
Chris Lattnerfdc0d3c2008-01-15 05:14:19 +0000577
578 // If the string was a wide string L"foo" or wide char L'f', it would concat
579 // with the previous identifier into fooL"bar". Avoid this.
580 if (StartsWithL(Tok, PP))
581 return true;
582
Chris Lattnerb1a17ae2008-01-15 05:22:14 +0000583 // Otherwise, this is a narrow character or string. If the *identifier* is
584 // a literal 'L', avoid pasting L "foo" -> L"foo".
585 return IsIdentifierL(PrevTok, PP);
Reid Spencer5f016e22007-07-11 17:01:13 +0000586 case tok::numeric_constant:
Chris Lattner057aaf62007-10-09 18:03:42 +0000587 return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
Reid Spencer5f016e22007-07-11 17:01:13 +0000588 FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
589 case tok::period: // ..., .*, .1234
590 return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000591 case tok::amp: // &&
592 return FirstChar == '&';
593 case tok::plus: // ++
594 return FirstChar == '+';
595 case tok::minus: // --, ->, ->*
596 return FirstChar == '-' || FirstChar == '>';
597 case tok::slash: //, /*, //
598 return FirstChar == '*' || FirstChar == '/';
599 case tok::less: // <<, <<=, <:, <%
600 return FirstChar == '<' || FirstChar == ':' || FirstChar == '%';
601 case tok::greater: // >>, >>=
602 return FirstChar == '>';
603 case tok::pipe: // ||
604 return FirstChar == '|';
605 case tok::percent: // %>, %:
606 return FirstChar == '>' || FirstChar == ':';
Reid Spencer5f016e22007-07-11 17:01:13 +0000607 case tok::colon: // ::, :>
608 return FirstChar == ':' || FirstChar == '>';
609 case tok::hash: // ##, #@, %:%:
610 return FirstChar == '#' || FirstChar == '@' || FirstChar == '%';
611 case tok::arrow: // ->*
612 return FirstChar == '*';
Reid Spencer5f016e22007-07-11 17:01:13 +0000613 }
614}
615
616/// DoPrintPreprocessedInput - This implements -E mode.
617///
Chris Lattner5db17c92008-04-08 04:16:20 +0000618void clang::DoPrintPreprocessedInput(Preprocessor &PP,
619 const std::string &OutFile) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000620 // Inform the preprocessor whether we want it to retain comments or not, due
621 // to -C or -CC.
622 PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
623
Chris Lattnere988bc22008-01-27 23:55:11 +0000624 InitOutputBuffer(OutFile);
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000625 InitAvoidConcatTokenInfo();
Reid Spencer5f016e22007-07-11 17:01:13 +0000626
Chris Lattnerd2177732007-07-20 16:59:19 +0000627 Token Tok, PrevTok;
Reid Spencer5f016e22007-07-11 17:01:13 +0000628 char Buffer[256];
629 PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
630 PP.setPPCallbacks(Callbacks);
631
632 PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
633 PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
634
635 // After we have configured the preprocessor, enter the main file.
636
637 // Start parsing the specified input file.
Ted Kremenek95041a22007-12-19 22:51:13 +0000638 PP.EnterMainSourceFile();
Chris Lattner6f688e12007-10-10 20:45:16 +0000639
640 // Consume all of the tokens that come from the predefines buffer. Those
641 // should not be emitted into the output and are guaranteed to be at the
642 // start.
643 const SourceManager &SourceMgr = PP.getSourceManager();
644 do PP.Lex(Tok);
Chris Lattnera1a51782007-10-10 23:31:03 +0000645 while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner6f688e12007-10-10 20:45:16 +0000646 !strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
647
648 while (1) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000649
650 // If this token is at the start of a line, emit newlines if needed.
Chris Lattner5f180322007-12-09 21:11:08 +0000651 if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
652 // done.
Reid Spencer5f016e22007-07-11 17:01:13 +0000653 } else if (Tok.hasLeadingSpace() ||
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000654 // If we haven't emitted a token on this line yet, PrevTok isn't
655 // useful to look at and no concatenation could happen anyway.
Chris Lattnerb638a302007-07-23 23:21:34 +0000656 (Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattnerf0f2b292007-07-23 06:09:34 +0000657 // Don't print "-" next to "-", it would form "--".
658 Callbacks->AvoidConcat(PrevTok, Tok))) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000659 OutputChar(' ');
660 }
661
Chris Lattner2933f412007-07-23 06:14:36 +0000662 if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
663 const char *Str = II->getName();
664 unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
665 OutputString(Str, Len);
666 } else if (Tok.getLength() < 256) {
Reid Spencer5f016e22007-07-11 17:01:13 +0000667 const char *TokPtr = Buffer;
668 unsigned Len = PP.getSpelling(Tok, TokPtr);
669 OutputString(TokPtr, Len);
670 } else {
671 std::string S = PP.getSpelling(Tok);
672 OutputString(&S[0], S.size());
673 }
674 Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner6f688e12007-10-10 20:45:16 +0000675
676 if (Tok.is(tok::eof)) break;
677
678 PrevTok = Tok;
679 PP.Lex(Tok);
680 }
Reid Spencer5f016e22007-07-11 17:01:13 +0000681 OutputChar('\n');
682
Chris Lattner5db17c92008-04-08 04:16:20 +0000683 CleanupOutputBuffer(PP.getDiagnostics().hasErrorOccurred());
Reid Spencer5f016e22007-07-11 17:01:13 +0000684}
685