Blame - Driver/PrintPreprocessedOutput.cpp - fp2-dev/platform/external/clang

blob: 105e99e9c481f16a497ae7b729eadde1cc99e858 [file] [log] [blame]

Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	1	//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
Chris Lattner	0bc735f	2007-12-29 19:59:25 +0000	[diff] [blame]	5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This code simply runs the preprocessor on the input file and prints out the
				11	// result. This is the traditional behavior of the -E option.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang.h"
				16	#include "clang/Lex/PPCallbacks.h"
				17	#include "clang/Lex/Preprocessor.h"
				18	#include "clang/Lex/Pragma.h"
				19	#include "clang/Basic/SourceManager.h"
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	20	#include "clang/Basic/Diagnostic.h"
Chris Lattner	d8e3083	2007-07-24 06:57:14 +0000	[diff] [blame]	21	#include "llvm/ADT/SmallString.h"
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	22	#include "llvm/ADT/StringExtras.h"
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	23	#include "llvm/System/Path.h"
				24	#include "llvm/Support/CommandLine.h"
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	25	#include "llvm/Config/config.h"
				26	#include <cstdio>
				27	using namespace clang;
				28
				29	//===----------------------------------------------------------------------===//
				30	// Simple buffered I/O
				31	//===----------------------------------------------------------------------===//
				32	//
				33	// Empirically, iostream is over 30% slower than stdio for this workload, and
				34	// stdio itself isn't very well suited. The problem with stdio is use of
				35	// putchar_unlocked. We have many newline characters that need to be emitted,
				36	// but stdio needs to do extra checks to handle line buffering mode. These
				37	// extra checks make putchar_unlocked fall off its inlined code path, hitting
				38	// slow system code. In practice, using 'write' directly makes 'clang -E -P'
				39	// about 10% faster than using the stdio path on darwin.
				40
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	41	#if defined(HAVE_UNISTD_H) && defined(HAVE_FCNTL_H)
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	42	#include <unistd.h>
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	43	#include <fcntl.h>
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	44	#else
				45	#define USE_STDIO 1
				46	#endif
				47
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	48	static std::string OutputFilename;
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	49	#ifdef USE_STDIO
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	50	static FILE *OutputFILE;
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	51	#else
				52	static int OutputFD;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	53	static char OutBufStart = 0, OutBufEnd, *OutBufCur;
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	54	#endif
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	55
				56	/// InitOutputBuffer - Initialize our output buffer.
				57	///
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	58	static void InitOutputBuffer(const std::string& Output) {
				59	#ifdef USE_STDIO
				60	if (!Output.size() \|\| Output == "-")
				61	OutputFILE = stdout;
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	62	else {
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	63	OutputFilename = Output;
Chris Lattner	8808f00	2008-04-11 06:14:11 +0000	[diff] [blame]	64	OutputFILE = fopen(Output.c_str(), "w+");
				65
				66	if (OutputFILE == 0) {
				67	fprintf(stderr, "Error opening output file '%s'.\n", Output.c_str());
				68	exit(1);
				69	}
				70
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	71	}
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	72
				73	assert(OutputFILE && "failed to open output file");
				74	#else
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	75	OutBufStart = new char[64*1024];
				76	OutBufEnd = OutBufStart+64*1024;
				77	OutBufCur = OutBufStart;
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	78
				79	if (!Output.size() \|\| Output == "-")
				80	OutputFD = STDOUT_FILENO;
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	81	else {
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	82	OutputFilename = Output;
Chris Lattner	8808f00	2008-04-11 06:14:11 +0000	[diff] [blame]	83	OutputFD = open(Output.c_str(), O_WRONLY\|O_CREAT\|O_TRUNC, 0644);
				84	if (OutputFD < 0) {
				85	fprintf(stderr, "Error opening output file '%s'.\n", Output.c_str());
				86	exit(1);
				87	}
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	88	}
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	89	#endif
				90	}
				91
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	92	#ifndef USE_STDIO
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	93	/// FlushBuffer - Write the accumulated bytes to the output stream.
				94	///
				95	static void FlushBuffer() {
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	96	write(OutputFD, OutBufStart, OutBufCur-OutBufStart);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	97	OutBufCur = OutBufStart;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	98	}
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	99	#endif
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	100
				101	/// CleanupOutputBuffer - Finish up output.
				102	///
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	103	static void CleanupOutputBuffer(bool ErrorOccurred) {
				104	#ifdef USE_STDIO
				105	if (OutputFILE != stdout)
				106	fclose(OutputFILE);
				107	#else
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	108	FlushBuffer();
				109	delete [] OutBufStart;
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	110	if (OutputFD != STDOUT_FILENO)
				111	close(OutputFD);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	112	#endif
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	113
				114	// If an error occurred, remove the output file.
				115	if (ErrorOccurred && !OutputFilename.empty())
				116	llvm::sys::Path(OutputFilename).eraseFromDisk();
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	117	}
				118
				119	static void OutputChar(char c) {
Chris Lattner	6a4545e	2007-09-03 18:24:56 +0000	[diff] [blame]	120	#if defined(_MSC_VER)
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	121	putc(c, OutputFILE);
Chris Lattner	6a4545e	2007-09-03 18:24:56 +0000	[diff] [blame]	122	#elif defined(USE_STDIO)
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	123	putc_unlocked(c, OutputFILE);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	124	#else
				125	if (OutBufCur >= OutBufEnd)
				126	FlushBuffer();
				127	*OutBufCur++ = c;
				128	#endif
				129	}
				130
				131	static void OutputString(const char *Ptr, unsigned Size) {
				132	#ifdef USE_STDIO
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	133	fwrite(Ptr, Size, 1, OutputFILE);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	134	#else
				135	if (OutBufCur+Size >= OutBufEnd)
				136	FlushBuffer();
Chris Lattner	e225e37	2007-07-23 06:23:07 +0000	[diff] [blame]	137
				138	switch (Size) {
				139	default:
				140	memcpy(OutBufCur, Ptr, Size);
				141	break;
				142	case 3:
				143	OutBufCur[2] = Ptr[2];
				144	case 2:
				145	OutBufCur[1] = Ptr[1];
				146	case 1:
				147	OutBufCur[0] = Ptr[0];
				148	case 0:
				149	break;
				150	}
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	151	OutBufCur += Size;
				152	#endif
				153	}
				154
				155
				156	//===----------------------------------------------------------------------===//
				157	// Preprocessed token printer
				158	//===----------------------------------------------------------------------===//
				159
				160	static llvm::cl::opt<bool>
				161	DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode"));
				162	static llvm::cl::opt<bool>
				163	EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode"));
				164	static llvm::cl::opt<bool>
				165	EnableMacroCommentOutput("CC",
				166	llvm::cl::desc("Enable comment output in -E mode, "
				167	"even from macro expansions"));
				168
				169	namespace {
				170	class PrintPPOutputPPCallbacks : public PPCallbacks {
				171	Preprocessor &PP;
				172	unsigned CurLine;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	173	bool EmittedTokensOnThisLine;
				174	DirectoryLookup::DirType FileType;
Chris Lattner	d8e3083	2007-07-24 06:57:14 +0000	[diff] [blame]	175	llvm::SmallString<512> CurFilename;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	176	public:
				177	PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) {
				178	CurLine = 0;
Chris Lattner	d8e3083	2007-07-24 06:57:14 +0000	[diff] [blame]	179	CurFilename += "<uninit>";
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	180	EmittedTokensOnThisLine = false;
				181	FileType = DirectoryLookup::NormalHeaderDir;
				182	}
				183
				184	void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	185	bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	186
				187	virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
				188	DirectoryLookup::DirType FileType);
				189	virtual void Ident(SourceLocation Loc, const std::string &str);
				190
				191
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	192	bool HandleFirstTokOnLine(Token &Tok);
				193	bool MoveToLine(SourceLocation Loc);
Chris Lattner	d217773	2007-07-20 16:59:19 +0000	[diff] [blame]	194	bool AvoidConcat(const Token &PrevTok, const Token &Tok);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	195	};
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	196	} // end anonymous namespace
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	197
Chris Lattner	f063721	2007-07-23 06:31:11 +0000	[diff] [blame]	198	/// UToStr - Do itoa on the specified number, in-place in the specified buffer.
				199	/// endptr points to the end of the buffer.
				200	static char UToStr(unsigned N, char EndPtr) {
				201	// Null terminate the buffer.
				202	*--EndPtr = '\0';
				203	if (N == 0) // Zero is a special case.
				204	*--EndPtr = '0';
				205	while (N) {
				206	*--EndPtr = '0' + char(N % 10);
				207	N /= 10;
				208	}
				209	return EndPtr;
				210	}
				211
				212
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	213	/// MoveToLine - Move the output to the source line specified by the location
				214	/// object. We can do this by emitting some number of \n's, or be emitting a
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	215	/// #line directive. This returns false if already at the specified line, true
				216	/// if some newlines were emitted.
				217	bool PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	218	if (DisableLineMarkers) {
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	219	unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
				220	if (LineNo == CurLine) return false;
				221
				222	CurLine = LineNo;
				223
				224	if (!EmittedTokensOnThisLine)
				225	return true;
				226
				227	OutputChar('\n');
				228	EmittedTokensOnThisLine = false;
				229	return true;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	230	}
				231
Chris Lattner	9dc1f53	2007-07-20 16:37:10 +0000	[diff] [blame]	232	unsigned LineNo = PP.getSourceManager().getLogicalLineNumber(Loc);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	233
				234	// If this line is "close enough" to the original line, just print newlines,
				235	// otherwise print a #line directive.
				236	if (LineNo-CurLine < 8) {
Chris Lattner	822f940	2007-07-23 05:14:05 +0000	[diff] [blame]	237	if (LineNo-CurLine == 1)
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	238	OutputChar('\n');
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	239	else if (LineNo == CurLine)
				240	return false; // Phys line moved, but logical line didn't.
Chris Lattner	822f940	2007-07-23 05:14:05 +0000	[diff] [blame]	241	else {
				242	const char *NewLines = "\n\n\n\n\n\n\n\n";
				243	OutputString(NewLines, LineNo-CurLine);
Chris Lattner	822f940	2007-07-23 05:14:05 +0000	[diff] [blame]	244	}
Chris Lattner	5c0887c	2007-12-09 20:45:43 +0000	[diff] [blame]	245	CurLine = LineNo;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	246	} else {
				247	if (EmittedTokensOnThisLine) {
				248	OutputChar('\n');
				249	EmittedTokensOnThisLine = false;
				250	}
				251
				252	CurLine = LineNo;
				253
				254	OutputChar('#');
				255	OutputChar(' ');
Chris Lattner	f063721	2007-07-23 06:31:11 +0000	[diff] [blame]	256	char NumberBuffer[20];
				257	const char *NumStr = UToStr(LineNo, NumberBuffer+20);
				258	OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	259	OutputChar(' ');
Chris Lattner	0cbc4b5	2007-07-22 06:38:50 +0000	[diff] [blame]	260	OutputChar('"');
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	261	OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner	0cbc4b5	2007-07-22 06:38:50 +0000	[diff] [blame]	262	OutputChar('"');
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	263
				264	if (FileType == DirectoryLookup::SystemHeaderDir)
				265	OutputString(" 3", 2);
				266	else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
				267	OutputString(" 3 4", 4);
				268	OutputChar('\n');
				269	}
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	270	return true;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	271	}
				272
				273
				274	/// FileChanged - Whenever the preprocessor enters or exits a #include file
				275	/// it invokes this handler. Update our conception of the current source
				276	/// position.
				277	void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
				278	FileChangeReason Reason,
				279	DirectoryLookup::DirType FileType) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	280	// Unless we are exiting a #include, make sure to skip ahead to the line the
				281	// #include directive was at.
				282	SourceManager &SourceMgr = PP.getSourceManager();
				283	if (Reason == PPCallbacks::EnterFile) {
Chris Lattner	9dc1f53	2007-07-20 16:37:10 +0000	[diff] [blame]	284	MoveToLine(SourceMgr.getIncludeLoc(Loc));
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	285	} else if (Reason == PPCallbacks::SystemHeaderPragma) {
				286	MoveToLine(Loc);
				287
				288	// TODO GCC emits the # directive for this directive on the line AFTER the
				289	// directive and emits a bunch of spaces that aren't needed. Emulate this
				290	// strange behavior.
				291	}
				292
Chris Lattner	9dc1f53	2007-07-20 16:37:10 +0000	[diff] [blame]	293	Loc = SourceMgr.getLogicalLoc(Loc);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	294	CurLine = SourceMgr.getLineNumber(Loc);
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	295
				296	if (DisableLineMarkers) return;
				297
Chris Lattner	d8e3083	2007-07-24 06:57:14 +0000	[diff] [blame]	298	CurFilename.clear();
				299	CurFilename += SourceMgr.getSourceName(Loc);
				300	Lexer::Stringify(CurFilename);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	301	FileType = FileType;
				302
				303	if (EmittedTokensOnThisLine) {
				304	OutputChar('\n');
				305	EmittedTokensOnThisLine = false;
				306	}
				307
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	308	OutputChar('#');
				309	OutputChar(' ');
Chris Lattner	5143196	2007-07-24 06:59:01 +0000	[diff] [blame]	310
				311	char NumberBuffer[20];
				312	const char *NumStr = UToStr(CurLine, NumberBuffer+20);
				313	OutputString(NumStr, (NumberBuffer+20)-NumStr-1);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	314	OutputChar(' ');
Chris Lattner	0cbc4b5	2007-07-22 06:38:50 +0000	[diff] [blame]	315	OutputChar('"');
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	316	OutputString(&CurFilename[0], CurFilename.size());
Chris Lattner	0cbc4b5	2007-07-22 06:38:50 +0000	[diff] [blame]	317	OutputChar('"');
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	318
				319	switch (Reason) {
				320	case PPCallbacks::EnterFile:
				321	OutputString(" 1", 2);
				322	break;
				323	case PPCallbacks::ExitFile:
				324	OutputString(" 2", 2);
				325	break;
				326	case PPCallbacks::SystemHeaderPragma: break;
				327	case PPCallbacks::RenameFile: break;
				328	}
				329
				330	if (FileType == DirectoryLookup::SystemHeaderDir)
				331	OutputString(" 3", 2);
				332	else if (FileType == DirectoryLookup::ExternCSystemHeaderDir)
				333	OutputString(" 3 4", 4);
				334
				335	OutputChar('\n');
				336	}
				337
				338	/// HandleIdent - Handle #ident directives when read by the preprocessor.
				339	///
				340	void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
				341	MoveToLine(Loc);
				342
				343	OutputString("#ident ", strlen("#ident "));
				344	OutputString(&S[0], S.size());
				345	EmittedTokensOnThisLine = true;
				346	}
				347
				348	/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	349	/// is called for the first token on each new line. If this really is the start
				350	/// of a new logical line, handle it and return true, otherwise return false.
				351	/// This may not be the start of a logical line because the "start of line"
				352	/// marker is set for physical lines, not logical ones.
				353	bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	354	// Figure out what line we went to and insert the appropriate number of
				355	// newline characters.
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	356	if (!MoveToLine(Tok.getLocation()))
				357	return false;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	358
				359	// Print out space characters so that the first token on a line is
				360	// indented for easy reading.
Chris Lattner	9dc1f53	2007-07-20 16:37:10 +0000	[diff] [blame]	361	const SourceManager &SourceMgr = PP.getSourceManager();
				362	unsigned ColNo = SourceMgr.getLogicalColumnNumber(Tok.getLocation());
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	363
				364	// This hack prevents stuff like:
				365	// #define HASH #
				366	// HASH define foo bar
				367	// From having the # character end up at column 1, which makes it so it
				368	// is not handled as a #define next time through the preprocessor if in
				369	// -fpreprocessed mode.
Chris Lattner	057aaf6	2007-10-09 18:03:42 +0000	[diff] [blame]	370	if (ColNo <= 1 && Tok.is(tok::hash))
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	371	OutputChar(' ');
				372
				373	// Otherwise, indent the appropriate number of spaces.
				374	for (; ColNo > 1; --ColNo)
				375	OutputChar(' ');
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	376
				377	return true;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	378	}
				379
				380	namespace {
				381	struct UnknownPragmaHandler : public PragmaHandler {
				382	const char *Prefix;
				383	PrintPPOutputPPCallbacks *Callbacks;
				384
				385	UnknownPragmaHandler(const char prefix, PrintPPOutputPPCallbacks callbacks)
				386	: PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
Chris Lattner	d217773	2007-07-20 16:59:19 +0000	[diff] [blame]	387	virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	388	// Figure out what line we went to and insert the appropriate number of
				389	// newline characters.
				390	Callbacks->MoveToLine(PragmaTok.getLocation());
				391	OutputString(Prefix, strlen(Prefix));
				392
				393	// Read and print all of the pragma tokens.
Chris Lattner	057aaf6	2007-10-09 18:03:42 +0000	[diff] [blame]	394	while (PragmaTok.isNot(tok::eom)) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	395	if (PragmaTok.hasLeadingSpace())
				396	OutputChar(' ');
				397	std::string TokSpell = PP.getSpelling(PragmaTok);
				398	OutputString(&TokSpell[0], TokSpell.size());
				399	PP.LexUnexpandedToken(PragmaTok);
				400	}
				401	OutputChar('\n');
				402	}
				403	};
				404	} // end anonymous namespace
				405
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	406
				407	enum AvoidConcatInfo {
				408	/// By default, a token never needs to avoid concatenation. Most tokens (e.g.
				409	/// ',', ')', etc) don't cause a problem when concatenated.
				410	aci_never_avoid_concat = 0,
				411
				412	/// aci_custom_firstchar - AvoidConcat contains custom code to handle this
				413	/// token's requirements, and it needs to know the first character of the
				414	/// token.
				415	aci_custom_firstchar = 1,
				416
				417	/// aci_custom - AvoidConcat contains custom code to handle this token's
				418	/// requirements, but it doesn't need to know the first character of the
				419	/// token.
				420	aci_custom = 2,
				421
				422	/// aci_avoid_equal - Many tokens cannot be safely followed by an '='
				423	/// character. For example, "<<" turns into "<<=" when followed by an =.
				424	aci_avoid_equal = 4
				425	};
				426
				427	/// This array contains information for each token on what action to take when
				428	/// avoiding concatenation of tokens in the AvoidConcat method.
				429	static char TokenInfo[tok::NUM_TOKENS];
				430
				431	/// InitAvoidConcatTokenInfo - Tokens that must avoid concatenation should be
				432	/// marked by this function.
				433	static void InitAvoidConcatTokenInfo() {
				434	// These tokens have custom code in AvoidConcat.
				435	TokenInfo[tok::identifier ] \|= aci_custom;
				436	TokenInfo[tok::numeric_constant] \|= aci_custom_firstchar;
				437	TokenInfo[tok::period ] \|= aci_custom_firstchar;
				438	TokenInfo[tok::amp ] \|= aci_custom_firstchar;
				439	TokenInfo[tok::plus ] \|= aci_custom_firstchar;
				440	TokenInfo[tok::minus ] \|= aci_custom_firstchar;
				441	TokenInfo[tok::slash ] \|= aci_custom_firstchar;
				442	TokenInfo[tok::less ] \|= aci_custom_firstchar;
				443	TokenInfo[tok::greater ] \|= aci_custom_firstchar;
				444	TokenInfo[tok::pipe ] \|= aci_custom_firstchar;
				445	TokenInfo[tok::percent ] \|= aci_custom_firstchar;
				446	TokenInfo[tok::colon ] \|= aci_custom_firstchar;
				447	TokenInfo[tok::hash ] \|= aci_custom_firstchar;
				448	TokenInfo[tok::arrow ] \|= aci_custom_firstchar;
				449
				450	// These tokens change behavior if followed by an '='.
				451	TokenInfo[tok::amp ] \|= aci_avoid_equal; // &=
				452	TokenInfo[tok::plus ] \|= aci_avoid_equal; // +=
				453	TokenInfo[tok::minus ] \|= aci_avoid_equal; // -=
				454	TokenInfo[tok::slash ] \|= aci_avoid_equal; // /=
				455	TokenInfo[tok::less ] \|= aci_avoid_equal; // <=
				456	TokenInfo[tok::greater ] \|= aci_avoid_equal; // >=
				457	TokenInfo[tok::pipe ] \|= aci_avoid_equal; // \|=
				458	TokenInfo[tok::percent ] \|= aci_avoid_equal; // %=
				459	TokenInfo[tok::star ] \|= aci_avoid_equal; // *=
				460	TokenInfo[tok::exclaim ] \|= aci_avoid_equal; // !=
				461	TokenInfo[tok::lessless ] \|= aci_avoid_equal; // <<=
				462	TokenInfo[tok::greaterequal] \|= aci_avoid_equal; // >>=
				463	TokenInfo[tok::caret ] \|= aci_avoid_equal; // ^=
				464	TokenInfo[tok::equal ] \|= aci_avoid_equal; // ==
				465	}
				466
Chris Lattner	b1a17ae	2008-01-15 05:22:14 +0000	[diff] [blame]	467	/// StartsWithL - Return true if the spelling of this token starts with 'L'.
Chris Lattner	fdc0d3c	2008-01-15 05:14:19 +0000	[diff] [blame]	468	static bool StartsWithL(const Token &Tok, Preprocessor &PP) {
Chris Lattner	fdc0d3c	2008-01-15 05:14:19 +0000	[diff] [blame]	469	if (!Tok.needsCleaning()) {
				470	SourceManager &SrcMgr = PP.getSourceManager();
				471	return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
				472	== 'L';
				473	}
				474
				475	if (Tok.getLength() < 256) {
Chris Lattner	b1a17ae	2008-01-15 05:22:14 +0000	[diff] [blame]	476	char Buffer[256];
Chris Lattner	fdc0d3c	2008-01-15 05:14:19 +0000	[diff] [blame]	477	const char *TokPtr = Buffer;
				478	PP.getSpelling(Tok, TokPtr);
				479	return TokPtr[0] == 'L';
				480	}
				481
				482	return PP.getSpelling(Tok)[0] == 'L';
				483	}
				484
Chris Lattner	b1a17ae	2008-01-15 05:22:14 +0000	[diff] [blame]	485	/// IsIdentifierL - Return true if the spelling of this token is literally 'L'.
				486	static bool IsIdentifierL(const Token &Tok, Preprocessor &PP) {
				487	if (!Tok.needsCleaning()) {
				488	if (Tok.getLength() != 1)
				489	return false;
				490	SourceManager &SrcMgr = PP.getSourceManager();
				491	return *SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()))
				492	== 'L';
				493	}
				494
				495	if (Tok.getLength() < 256) {
				496	char Buffer[256];
				497	const char *TokPtr = Buffer;
				498	if (PP.getSpelling(Tok, TokPtr) != 1)
				499	return false;
				500	return TokPtr[0] == 'L';
				501	}
				502
				503	return PP.getSpelling(Tok) == "L";
				504	}
				505
				506
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	507	/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause
				508	/// the two individual tokens to be lexed as a single token, return true (which
				509	/// causes a space to be printed between them). This allows the output of -E
				510	/// mode to be lexed to the same token stream as lexing the input directly
				511	/// would.
				512	///
				513	/// This code must conservatively return true if it doesn't want to be 100%
				514	/// accurate. This will cause the output to include extra space characters, but
				515	/// the resulting output won't have incorrect concatenations going on. Examples
				516	/// include "..", which we print with a space between, because we don't want to
				517	/// track enough to tell "x.." from "...".
Chris Lattner	d217773	2007-07-20 16:59:19 +0000	[diff] [blame]	518	bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
				519	const Token &Tok) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	520	char Buffer[256];
				521
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	522	tok::TokenKind PrevKind = PrevTok.getKind();
				523	if (PrevTok.getIdentifierInfo()) // Language keyword or named operator.
				524	PrevKind = tok::identifier;
				525
				526	// Look up information on when we should avoid concatenation with prevtok.
				527	unsigned ConcatInfo = TokenInfo[PrevKind];
				528
				529	// If prevtok never causes a problem for anything after it, return quickly.
				530	if (ConcatInfo == 0) return false;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	531
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	532	if (ConcatInfo & aci_avoid_equal) {
				533	// If the next token is '=' or '==', avoid concatenation.
Chris Lattner	057aaf6	2007-10-09 18:03:42 +0000	[diff] [blame]	534	if (Tok.is(tok::equal) \|\| Tok.is(tok::equalequal))
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	535	return true;
Chris Lattner	b638a30	2007-07-23 23:21:34 +0000	[diff] [blame]	536	ConcatInfo &= ~aci_avoid_equal;
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	537	}
				538
				539	if (ConcatInfo == 0) return false;
				540
				541
				542
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	543	// Basic algorithm: we look at the first character of the second token, and
				544	// determine whether it, if appended to the first token, would form (or would
				545	// contribute) to a larger token if concatenated.
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	546	char FirstChar = 0;
				547	if (ConcatInfo & aci_custom) {
				548	// If the token does not need to know the first character, don't get it.
				549	} else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	550	// Avoid spelling identifiers, the most common form of token.
				551	FirstChar = II->getName()[0];
Chris Lattner	b19f5e8	2007-07-23 05:18:42 +0000	[diff] [blame]	552	} else if (!Tok.needsCleaning()) {
				553	SourceManager &SrcMgr = PP.getSourceManager();
				554	FirstChar =
				555	*SrcMgr.getCharacterData(SrcMgr.getPhysicalLoc(Tok.getLocation()));
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	556	} else if (Tok.getLength() < 256) {
				557	const char *TokPtr = Buffer;
				558	PP.getSpelling(Tok, TokPtr);
				559	FirstChar = TokPtr[0];
				560	} else {
				561	FirstChar = PP.getSpelling(Tok)[0];
				562	}
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	563
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	564	switch (PrevKind) {
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	565	default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	566	case tok::identifier: // id+id or id+number or id+L"foo".
Chris Lattner	057aaf6	2007-10-09 18:03:42 +0000	[diff] [blame]	567	if (Tok.is(tok::numeric_constant) \|\| Tok.getIdentifierInfo() \|\|
				568	Tok.is(tok::wide_string_literal) /* \|\|
				569	Tok.is(tok::wide_char_literal)*/)
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	570	return true;
Chris Lattner	fdc0d3c	2008-01-15 05:14:19 +0000	[diff] [blame]	571
				572	// If this isn't identifier + string, we're done.
				573	if (Tok.isNot(tok::char_constant) && Tok.isNot(tok::string_literal))
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	574	return false;
				575
				576	// FIXME: need a wide_char_constant!
Chris Lattner	fdc0d3c	2008-01-15 05:14:19 +0000	[diff] [blame]	577
				578	// If the string was a wide string L"foo" or wide char L'f', it would concat
				579	// with the previous identifier into fooL"bar". Avoid this.
				580	if (StartsWithL(Tok, PP))
				581	return true;
				582
Chris Lattner	b1a17ae	2008-01-15 05:22:14 +0000	[diff] [blame]	583	// Otherwise, this is a narrow character or string. If the identifier is
				584	// a literal 'L', avoid pasting L "foo" -> L"foo".
				585	return IsIdentifierL(PrevTok, PP);
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	586	case tok::numeric_constant:
Chris Lattner	057aaf6	2007-10-09 18:03:42 +0000	[diff] [blame]	587	return isalnum(FirstChar) \|\| Tok.is(tok::numeric_constant) \|\|
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	588	FirstChar == '+' \|\| FirstChar == '-' \|\| FirstChar == '.';
				589	case tok::period: // ..., .*, .1234
				590	return FirstChar == '.' \|\| FirstChar == '*' \|\| isdigit(FirstChar);
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	591	case tok::amp: // &&
				592	return FirstChar == '&';
				593	case tok::plus: // ++
				594	return FirstChar == '+';
				595	case tok::minus: // --, ->, ->*
				596	return FirstChar == '-' \|\| FirstChar == '>';
				597	case tok::slash: //, /*, //
				598	return FirstChar == '*' \|\| FirstChar == '/';
				599	case tok::less: // <<, <<=, <:, <%
				600	return FirstChar == '<' \|\| FirstChar == ':' \|\| FirstChar == '%';
				601	case tok::greater: // >>, >>=
				602	return FirstChar == '>';
				603	case tok::pipe: // \|\|
				604	return FirstChar == '\|';
				605	case tok::percent: // %>, %:
				606	return FirstChar == '>' \|\| FirstChar == ':';
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	607	case tok::colon: // ::, :>
				608	return FirstChar == ':' \|\| FirstChar == '>';
				609	case tok::hash: // ##, #@, %:%:
				610	return FirstChar == '#' \|\| FirstChar == '@' \|\| FirstChar == '%';
				611	case tok::arrow: // ->*
				612	return FirstChar == '*';
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	613	}
				614	}
				615
				616	/// DoPrintPreprocessedInput - This implements -E mode.
				617	///
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	618	void clang::DoPrintPreprocessedInput(Preprocessor &PP,
				619	const std::string &OutFile) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	620	// Inform the preprocessor whether we want it to retain comments or not, due
				621	// to -C or -CC.
				622	PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput);
				623
Chris Lattner	e988bc2	2008-01-27 23:55:11 +0000	[diff] [blame]	624	InitOutputBuffer(OutFile);
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	625	InitAvoidConcatTokenInfo();
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	626
Chris Lattner	d217773	2007-07-20 16:59:19 +0000	[diff] [blame]	627	Token Tok, PrevTok;
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	628	char Buffer[256];
				629	PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP);
				630	PP.setPPCallbacks(Callbacks);
				631
				632	PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
				633	PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks));
				634
				635	// After we have configured the preprocessor, enter the main file.
				636
				637	// Start parsing the specified input file.
Ted Kremenek	95041a2	2007-12-19 22:51:13 +0000	[diff] [blame]	638	PP.EnterMainSourceFile();
Chris Lattner	6f688e1	2007-10-10 20:45:16 +0000	[diff] [blame]	639
				640	// Consume all of the tokens that come from the predefines buffer. Those
				641	// should not be emitted into the output and are guaranteed to be at the
				642	// start.
				643	const SourceManager &SourceMgr = PP.getSourceManager();
				644	do PP.Lex(Tok);
Chris Lattner	a1a5178	2007-10-10 23:31:03 +0000	[diff] [blame]	645	while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
Chris Lattner	6f688e1	2007-10-10 20:45:16 +0000	[diff] [blame]	646	!strcmp(SourceMgr.getSourceName(Tok.getLocation()), "<predefines>"));
				647
				648	while (1) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	649
				650	// If this token is at the start of a line, emit newlines if needed.
Chris Lattner	5f18032	2007-12-09 21:11:08 +0000	[diff] [blame]	651	if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
				652	// done.
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	653	} else if (Tok.hasLeadingSpace() \|\|
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	654	// If we haven't emitted a token on this line yet, PrevTok isn't
				655	// useful to look at and no concatenation could happen anyway.
Chris Lattner	b638a30	2007-07-23 23:21:34 +0000	[diff] [blame]	656	(Callbacks->hasEmittedTokensOnThisLine() &&
Chris Lattner	f0f2b29	2007-07-23 06:09:34 +0000	[diff] [blame]	657	// Don't print "-" next to "-", it would form "--".
				658	Callbacks->AvoidConcat(PrevTok, Tok))) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	659	OutputChar(' ');
				660	}
				661
Chris Lattner	2933f41	2007-07-23 06:14:36 +0000	[diff] [blame]	662	if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
				663	const char *Str = II->getName();
				664	unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
				665	OutputString(Str, Len);
				666	} else if (Tok.getLength() < 256) {
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	667	const char *TokPtr = Buffer;
				668	unsigned Len = PP.getSpelling(Tok, TokPtr);
				669	OutputString(TokPtr, Len);
				670	} else {
				671	std::string S = PP.getSpelling(Tok);
				672	OutputString(&S[0], S.size());
				673	}
				674	Callbacks->SetEmittedTokensOnThisLine();
Chris Lattner	6f688e1	2007-10-10 20:45:16 +0000	[diff] [blame]	675
				676	if (Tok.is(tok::eof)) break;
				677
				678	PrevTok = Tok;
				679	PP.Lex(Tok);
				680	}
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	681	OutputChar('\n');
				682
Chris Lattner	5db17c9	2008-04-08 04:16:20 +0000	[diff] [blame]	683	CleanupOutputBuffer(PP.getDiagnostics().hasErrorOccurred());
Reid Spencer	5f016e2	2007-07-11 17:01:13 +0000	[diff] [blame]	684	}
				685