Blame - clang/Lex/Preprocessor.cpp - toolchain/llvm-project

blob: 7398e7e181006d1ac55b8ac2bcc1f3e93b04123b [file] [log] [blame]

Chris Lattner	22eb972	2006-06-18 05:43:12 +0000	[diff] [blame^]	1	//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file was developed by Chris Lattner and is distributed under
				6	// the University of Illinois Open Source License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the Preprocessor interface.
				11	//
				12	//===----------------------------------------------------------------------===//
				13	//
				14	// TODO: GCC Diagnostics emitted by the lexer:
				15	//
				16	// ERROR : __VA_ARGS__ can only appear in the expansion of a C99 variadic macro
				17	//
				18	// Options to support:
				19	// -H - Print the name of each header file used.
				20	// -C -CC - Do not discard comments for cpp.
				21	// -P - Do not emit #line directives.
				22	// -d[MDNI] - Dump various things.
				23	// -fworking-directory - #line's with preprocessor's working dir.
				24	// -fpreprocessed
				25	// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
				26	// -W*
				27	// -w
				28	//
				29	// Messages to emit:
				30	// "Multiple include guards may be useful for:\n"
				31	//
				32	// TODO: Implement the include guard optimization.
				33	//
				34	//===----------------------------------------------------------------------===//
				35
				36	#include "clang/Lex/Preprocessor.h"
				37	#include "clang/Lex/MacroInfo.h"
				38	#include "clang/Basic/Diagnostic.h"
				39	#include "clang/Basic/FileManager.h"
				40	#include "clang/Basic/SourceManager.h"
				41	#include <iostream>
				42	using namespace llvm;
				43	using namespace clang;
				44
				45	//===----------------------------------------------------------------------===//
				46
				47	Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
				48	FileManager &FM, SourceManager &SM)
				49	: Diags(diags), Features(opts), FileMgr(FM), SourceMgr(SM),
				50	SystemDirIdx(0), NoCurDirSearch(false),
				51	CurLexer(0), CurNextDirLookup(0), CurMacroExpander(0) {
				52	// Clear stats.
				53	NumDirectives = NumIncluded = NumDefined = NumUndefined = NumPragma = 0;
				54	NumIf = NumElse = NumEndif = 0;
				55	NumEnteredSourceFiles = NumMacroExpanded = NumFastMacroExpanded = 0;
				56	MaxIncludeStackDepth = MaxMacroStackDepth = 0;
				57	NumSkipped = 0;
				58
				59	// Macro expansion is enabled.
				60	DisableMacroExpansion = false;
				61	SkippingContents = false;
				62	}
				63
				64	Preprocessor::~Preprocessor() {
				65	// Free any active lexers.
				66	delete CurLexer;
				67
				68	while (!IncludeStack.empty()) {
				69	delete IncludeStack.back().TheLexer;
				70	IncludeStack.pop_back();
				71	}
				72	}
				73
				74	/// getFileInfo - Return the PerFileInfo structure for the specified
				75	/// FileEntry.
				76	Preprocessor::PerFileInfo &Preprocessor::getFileInfo(const FileEntry *FE) {
				77	if (FE->getUID() >= FileInfo.size())
				78	FileInfo.resize(FE->getUID()+1);
				79	return FileInfo[FE->getUID()];
				80	}
				81
				82
				83	/// AddKeywords - Add all keywords to the symbol table.
				84	///
				85	void Preprocessor::AddKeywords() {
				86	enum {
				87	C90Shift = 0,
				88	EXTC90 = 1 << C90Shift,
				89	NOTC90 = 2 << C90Shift,
				90	C99Shift = 2,
				91	EXTC99 = 1 << C99Shift,
				92	NOTC99 = 2 << C99Shift,
				93	CPPShift = 4,
				94	EXTCPP = 1 << CPPShift,
				95	NOTCPP = 2 << CPPShift,
				96	Mask = 3
				97	};
				98
				99	// Add keywords and tokens for the current language.
				100	#define KEYWORD(NAME, FLAGS) \
				101	AddKeyword(#NAME+1, tok::kw##NAME, \
				102	(FLAGS >> C90Shift) & Mask, \
				103	(FLAGS >> C99Shift) & Mask, \
				104	(FLAGS >> CPPShift) & Mask);
				105	#define ALIAS(NAME, TOK) \
				106	AddKeyword(NAME, tok::kw_ ## TOK, 0, 0, 0);
				107	#include "clang/Basic/TokenKinds.def"
				108	}
				109
				110	/// Diag - Forwarding function for diagnostics. This emits a diagnostic at
				111	/// the specified LexerToken's location, translating the token's start
				112	/// position in the current buffer into a SourcePosition object for rendering.
				113	bool Preprocessor::Diag(SourceLocation Loc, unsigned DiagID,
				114	const std::string &Msg) {
				115	// If we are in a '#if 0' block, don't emit any diagnostics for notes,
				116	// warnings or extensions.
				117	if (isSkipping() && Diagnostic::isNoteWarningOrExtension(DiagID))
				118	return false;
				119
				120	return Diags.Report(Loc, DiagID, Msg);
				121	}
				122	bool Preprocessor::Diag(const LexerToken &Tok, unsigned DiagID,
				123	const std::string &Msg) {
				124	// If we are in a '#if 0' block, don't emit any diagnostics for notes,
				125	// warnings or extensions.
				126	if (isSkipping() && Diagnostic::isNoteWarningOrExtension(DiagID))
				127	return false;
				128
				129	return Diag(Tok.getSourceLocation(), DiagID, Msg);
				130	}
				131
				132	void Preprocessor::PrintStats() {
				133	std::cerr << "\n*** Preprocessor Stats:\n";
				134	std::cerr << FileInfo.size() << " files tracked.\n";
				135	unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
				136	for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
				137	NumOnceOnlyFiles += FileInfo[i].isImport;
				138	if (MaxNumIncludes < FileInfo[i].NumIncludes)
				139	MaxNumIncludes = FileInfo[i].NumIncludes;
				140	NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
				141	}
				142	std::cerr << " " << NumOnceOnlyFiles << " #import/#pragma once files.\n";
				143	std::cerr << " " << NumSingleIncludedFiles << " included exactly once.\n";
				144	std::cerr << " " << MaxNumIncludes << " max times a file is included.\n";
				145
				146	std::cerr << NumDirectives << " directives found:\n";
				147	std::cerr << " " << NumDefined << " #define.\n";
				148	std::cerr << " " << NumUndefined << " #undef.\n";
				149	std::cerr << " " << NumIncluded << " #include/#include_next/#import.\n";
				150	std::cerr << " " << NumEnteredSourceFiles << " source files entered.\n";
				151	std::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n";
				152	std::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n";
				153	std::cerr << " " << NumElse << " #else/#elif.\n";
				154	std::cerr << " " << NumEndif << " #endif.\n";
				155	std::cerr << " " << NumPragma << " #pragma.\n";
				156	std::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
				157
				158	std::cerr << NumMacroExpanded << " macros expanded, "
				159	<< NumFastMacroExpanded << " on the fast path.\n";
				160	if (MaxMacroStackDepth > 1)
				161	std::cerr << " " << MaxMacroStackDepth << " max macroexpand stack depth\n";
				162	}
				163
				164	//===----------------------------------------------------------------------===//
				165	// Source File Location Methods.
				166	//===----------------------------------------------------------------------===//
				167
				168
				169	/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file,
				170	/// return null on failure. isAngled indicates whether the file reference is
				171	/// for system #include's or not (i.e. using <> instead of "").
				172	const FileEntry *Preprocessor::LookupFile(const std::string &Filename,
				173	bool isSystem,
				174	const DirectoryLookup *FromDir,
				175	const DirectoryLookup *&NextDir) {
				176	assert(CurLexer && "Cannot enter a #include inside a macro expansion!");
				177	NextDir = 0;
				178
				179	// If 'Filename' is absolute, check to see if it exists and no searching.
				180	// FIXME: this should be a sys::Path interface, this doesn't handle things
				181	// like C:\foo.txt right, nor win32 \\network\device\blah.
				182	if (Filename[0] == '/') {
				183	// If this was an #include_next "/absolute/file", fail.
				184	if (FromDir) return 0;
				185
				186	// Otherwise, just return the file.
				187	return FileMgr.getFile(Filename);
				188	}
				189
				190	// Step #0, unless disabled, check to see if the file is in the #includer's
				191	// directory. This search is not done for <> headers.
				192	if (!isSystem && !FromDir && !NoCurDirSearch) {
				193	const FileEntry *CurFE =
				194	SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID());
				195	if (CurFE) {
				196	if (const FileEntry *FE =
				197	FileMgr.getFile(CurFE->getDir()->getName()+"/"+Filename)) {
				198	if (CurNextDirLookup)
				199	NextDir = CurNextDirLookup;
				200	else
				201	NextDir = &SearchDirs[0];
				202	return FE;
				203	}
				204	}
				205	}
				206
				207	// If this is a system #include, ignore the user #include locs.
				208	unsigned i = isSystem ? SystemDirIdx : 0;
				209
				210	// If this is a #include_next request, start searching after the directory the
				211	// file was found in.
				212	if (FromDir)
				213	i = FromDir-&SearchDirs[0];
				214
				215	// Check each directory in sequence to see if it contains this file.
				216	for (; i != SearchDirs.size(); ++i) {
				217	// Concatenate the requested file onto the directory.
				218	// FIXME: should be in sys::Path.
				219	if (const FileEntry *FE =
				220	FileMgr.getFile(SearchDirs[i].getDir()->getName()+"/"+Filename)) {
				221	NextDir = &SearchDirs[i+1];
				222	return FE;
				223	}
				224	}
				225
				226	// Otherwise, didn't find it.
				227	return 0;
				228	}
				229
				230	/// EnterSourceFile - Add a source file to the top of the include stack and
				231	/// start lexing tokens from it instead of the current buffer. Return true
				232	/// on failure.
				233	void Preprocessor::EnterSourceFile(unsigned FileID,
				234	const DirectoryLookup *NextDir) {
				235	++NumEnteredSourceFiles;
				236
				237	// Add the current lexer to the include stack.
				238	if (CurLexer) {
				239	IncludeStack.push_back(IncludeStackInfo(CurLexer, CurNextDirLookup));
				240	} else {
				241	assert(CurMacroExpander == 0 && "Cannot #include a file inside a macro!");
				242	}
				243
				244	if (MaxIncludeStackDepth < IncludeStack.size())
				245	MaxIncludeStackDepth = IncludeStack.size();
				246
				247	const SourceBuffer *Buffer = SourceMgr.getBuffer(FileID);
				248
				249	CurLexer = new Lexer(Buffer, FileID, *this);
				250	CurNextDirLookup = NextDir;
				251	}
				252
				253	/// EnterMacro - Add a Macro to the top of the include stack and start lexing
				254	/// tokens from it instead of the current buffer. Return true on failure.
				255	bool Preprocessor::EnterMacro(LexerToken &Tok) {
				256	IdentifierTokenInfo *Identifier = Tok.getIdentifierInfo();
				257	MacroInfo &MI = *Identifier->getMacroInfo();
				258	SourceLocation ExpandLoc = Tok.getSourceLocation();
				259	unsigned MacroID = SourceMgr.getMacroID(Identifier, ExpandLoc);
				260	if (CurLexer) {
				261	IncludeStack.push_back(IncludeStackInfo(CurLexer, CurNextDirLookup));
				262	CurLexer = 0;
				263	CurNextDirLookup = 0;
				264	} else if (CurMacroExpander) {
				265	MacroStack.push_back(CurMacroExpander);
				266	}
				267
				268	if (MaxMacroStackDepth < MacroStack.size())
				269	MaxMacroStackDepth = MacroStack.size();
				270
				271	// TODO: Figure out arguments.
				272
				273	// Mark the macro as currently disabled, so that it is not recursively
				274	// expanded.
				275	MI.DisableMacro();
				276
				277	CurMacroExpander = new MacroExpander(MI, MacroID, *this,
				278	Tok.isAtStartOfLine(),
				279	Tok.hasLeadingSpace());
				280	return false;
				281	}
				282
				283
				284	//===----------------------------------------------------------------------===//
				285	// Lexer Event Handling.
				286	//===----------------------------------------------------------------------===//
				287
				288	/// HandleIdentifier - This callback is invoked when the lexer reads an
				289	/// identifier. This callback looks up the identifier in the map and/or
				290	/// potentially macro expands it or turns it into a named token (like 'for').
				291	bool Preprocessor::HandleIdentifier(LexerToken &Identifier) {
				292	if (Identifier.getIdentifierInfo() == 0) {
				293	// If we are skipping tokens (because we are in a #if 0 block), there will
				294	// be no identifier info, just return the token.
				295	assert(isSkipping() && "Token isn't an identifier?");
				296	return false;
				297	}
				298	IdentifierTokenInfo &ITI = *Identifier.getIdentifierInfo();
				299
				300	// FIXME: Check for poisoning in ITI?
				301
				302	if (MacroInfo *MI = ITI.getMacroInfo()) {
				303	if (MI->isEnabled() && !DisableMacroExpansion) {
				304	++NumMacroExpanded;
				305	// If we started lexing a macro, enter the macro expansion body.
				306	// FIXME: Read/Validate the argument list here!
				307
				308	// If this macro expands to no tokens, don't bother to push it onto the
				309	// expansion stack, only to take it right back off.
				310	if (MI->getNumTokens() == 0) {
				311	// Ignore this macro use, just return the next token in the current
				312	// buffer.
				313	bool HadLeadingSpace = Identifier.hasLeadingSpace();
				314	bool IsAtStartOfLine = Identifier.isAtStartOfLine();
				315
				316	if (Lex(Identifier)) return true;
				317
				318	// If the identifier isn't on some OTHER line, inherit the leading
				319	// whitespace/first-on-a-line property of this token. This handles
				320	// stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is
				321	// empty.
				322	if (!Identifier.isAtStartOfLine()) {
				323	if (IsAtStartOfLine) Identifier.SetFlag(LexerToken::StartOfLine);
				324	if (HadLeadingSpace) Identifier.SetFlag(LexerToken::LeadingSpace);
				325	}
				326	++NumFastMacroExpanded;
				327	return false;
				328
				329	} else if (MI->getNumTokens() == 1 &&
				330	// Don't handle identifiers, which might need recursive
				331	// expansion.
				332	MI->getReplacementToken(0).getIdentifierInfo() == 0) {
				333	// FIXME: Function-style macros only if no arguments?
				334
				335	// Otherwise, if this macro expands into a single trivially-expanded
				336	// token: expand it now. This handles common cases like
				337	// "#define VAL 42".
				338
				339	// Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro
				340	// identifier to the expanded token.
				341	bool isAtStartOfLine = Identifier.isAtStartOfLine();
				342	bool hasLeadingSpace = Identifier.hasLeadingSpace();
				343
				344	// Replace the result token.
				345	Identifier = MI->getReplacementToken(0);
				346
				347	// Restore the StartOfLine/LeadingSpace markers.
				348	Identifier.SetFlagValue(LexerToken::StartOfLine , isAtStartOfLine);
				349	Identifier.SetFlagValue(LexerToken::LeadingSpace, hasLeadingSpace);
				350
				351	// FIXME: Get correct macro expansion stack location info!
				352
				353	// Since this is not an identifier token, it can't be macro expanded, so
				354	// we're done.
				355	++NumFastMacroExpanded;
				356	return false;
				357	}
				358
				359	// Start expanding the macro (FIXME, pass arguments).
				360	if (EnterMacro(Identifier))
				361	return true;
				362
				363	// Now that the macro is at the top of the include stack, ask the
				364	// preprocessor to read the next token from it.
				365	return Lex(Identifier);
				366	}
				367	}
				368
				369	// Change the kind of this identifier to the appropriate token kind, e.g.
				370	// turning "for" into a keyword.
				371	Identifier.SetKind(ITI.getTokenID());
				372
				373	// If this is an extension token, diagnose its use.
				374	if (ITI.isExtensionToken() && Diag(Identifier, diag::ext_token_used))
				375	return true;
				376	return false;
				377	}
				378
				379	/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
				380	/// the current file. This either returns the EOF token or pops a level off
				381	/// the include stack and keeps going.
				382	bool Preprocessor::HandleEndOfFile(LexerToken &Result) {
				383	assert(!CurMacroExpander &&
				384	"Ending a file when currently in a macro!");
				385
				386	// If we are in a #if 0 block skipping tokens, and we see the end of the file,
				387	// this is an error condition. Just return the EOF token up to
				388	// SkipExcludedConditionalBlock. The Lexer will have already have issued
				389	// errors for the unterminated #if's on the conditional stack.
				390	if (isSkipping()) {
				391	Result.StartToken(CurLexer);
				392	Result.SetKind(tok::eof);
				393	Result.SetStart(CurLexer->BufferEnd);
				394	Result.SetEnd(CurLexer->BufferEnd);
				395	return false;
				396	}
				397
				398	// If this is a #include'd file, pop it off the include stack and continue
				399	// lexing the #includer file.
				400	if (!IncludeStack.empty()) {
				401	// We're done with the #included file.
				402	delete CurLexer;
				403	CurLexer = IncludeStack.back().TheLexer;
				404	CurNextDirLookup = IncludeStack.back().TheDirLookup;
				405	IncludeStack.pop_back();
				406	return Lex(Result);
				407	}
				408
				409	Result.StartToken(CurLexer);
				410	Result.SetKind(tok::eof);
				411	Result.SetStart(CurLexer->BufferEnd);
				412	Result.SetEnd(CurLexer->BufferEnd);
				413
				414	// We're done with the #included file.
				415	delete CurLexer;
				416	CurLexer = 0;
				417	return false;
				418	}
				419
				420	/// HandleEndOfMacro - This callback is invoked when the lexer hits the end of
				421	/// the current macro. This either returns the EOF token or pops a level off
				422	/// the include stack and keeps going.
				423	bool Preprocessor::HandleEndOfMacro(LexerToken &Result) {
				424	assert(CurMacroExpander && !CurLexer &&
				425	"Ending a macro when currently in a #include file!");
				426
				427	// Mark macro not ignored now that it is no longer being expanded.
				428	CurMacroExpander->getMacro().EnableMacro();
				429	delete CurMacroExpander;
				430
				431	if (!MacroStack.empty()) {
				432	// In a nested macro invocation, continue lexing from the macro.
				433	CurMacroExpander = MacroStack.back();
				434	MacroStack.pop_back();
				435	return Lex(Result);
				436	} else {
				437	CurMacroExpander = 0;
				438	// Handle this like a #include file being popped off the stack.
				439	return HandleEndOfFile(Result);
				440	}
				441	}
				442
				443
				444	//===----------------------------------------------------------------------===//
				445	// Utility Methods for Preprocessor Directive Handling.
				446	//===----------------------------------------------------------------------===//
				447
				448	/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the
				449	/// current line until the tok::eom token is found.
				450	bool Preprocessor::DiscardUntilEndOfDirective() {
				451	LexerToken Tmp;
				452	do {
				453	if (LexUnexpandedToken(Tmp)) return true;
				454	} while (Tmp.getKind() != tok::eom);
				455	return false;
				456	}
				457
				458	/// ReadMacroName - Lex and validate a macro name, which occurs after a
				459	/// #define or #undef. This sets the token kind to eom and discards the rest
				460	/// of the macro line if the macro name is invalid.
				461	bool Preprocessor::ReadMacroName(LexerToken &MacroNameTok) {
				462	// Read the token, don't allow macro expansion on it.
				463	if (LexUnexpandedToken(MacroNameTok))
				464	return true;
				465
				466	// Missing macro name?
				467	if (MacroNameTok.getKind() == tok::eom)
				468	return Diag(MacroNameTok, diag::err_pp_missing_macro_name);
				469
				470	if (MacroNameTok.getIdentifierInfo() == 0) {
				471	if (Diag(MacroNameTok, diag::err_pp_macro_not_identifier))
				472	return true;
				473	// Fall through on error.
				474	} else if (0) {
				475	// FIXME: Error if defining a C++ named operator.
				476
				477	} else if (0) {
				478	// FIXME: Error if defining "defined", "__DATE__", and other predef macros
				479	// in C99 6.10.8.4.
				480	} else {
				481	// Okay, we got a good identifier node. Return it.
				482	return false;
				483	}
				484
				485
				486	// Invalid macro name, read and discard the rest of the line. Then set the
				487	// token kind to tok::eom.
				488	MacroNameTok.SetKind(tok::eom);
				489	return DiscardUntilEndOfDirective();
				490	}
				491
				492	/// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If
				493	/// not, emit a diagnostic and consume up until the eom.
				494	bool Preprocessor::CheckEndOfDirective(const char *DirType) {
				495	LexerToken Tmp;
				496	if (Lex(Tmp)) return true;
				497	// There should be no tokens after the directive, but we allow them as an
				498	// extension.
				499	if (Tmp.getKind() != tok::eom) {
				500	if (Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType) \|\|
				501	DiscardUntilEndOfDirective())
				502	return true;
				503	}
				504	return false;
				505	}
				506
				507
				508
				509	/// SkipExcludedConditionalBlock - We just read a #if or related directive and
				510	/// decided that the subsequent tokens are in the #if'd out portion of the
				511	/// file. Lex the rest of the file, until we see an #endif. If
				512	/// FoundNonSkipPortion is true, then we have already emitted code for part of
				513	/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk
				514	/// is true, then #else directives are ok, if not, then we have already seen one
				515	/// so a #else directive is a duplicate. When this returns, the caller can lex
				516	/// the first valid token.
				517	bool Preprocessor::SkipExcludedConditionalBlock(const char *IfTokenLoc,
				518	bool FoundNonSkipPortion,
				519	bool FoundElse) {
				520	++NumSkipped;
				521	assert(MacroStack.empty() && CurMacroExpander == 0 && CurLexer &&
				522	"Lexing a macro, not a file?");
				523
				524	CurLexer->pushConditionalLevel(IfTokenLoc, /isSkipping/false,
				525	FoundNonSkipPortion, FoundElse);
				526
				527	// Know that we are going to be skipping tokens. Set this flag to indicate
				528	// this, which has a couple of effects:
				529	// 1. If EOF of the current lexer is found, the include stack isn't popped.
				530	// 2. Identifier information is not looked up for identifier tokens. As an
				531	// effect of this, implicit macro expansion is naturally disabled.
				532	// 3. "#" tokens at the start of a line are treated as normal tokens, not
				533	// implicitly transformed by the lexer.
				534	// 4. All notes, warnings, and extension messages are disabled.
				535	//
				536	SkippingContents = true;
				537	LexerToken Tok;
				538	while (1) {
				539	if (CurLexer->Lex(Tok)) return true;
				540
				541	// If this is the end of the buffer, we have an error. The lexer will have
				542	// already handled this error condition, so just return and let the caller
				543	// lex after this #include.
				544	if (Tok.getKind() == tok::eof) break;
				545
				546	// If this token is not a preprocessor directive, just skip it.
				547	if (Tok.getKind() != tok::hash \|\| !Tok.isAtStartOfLine())
				548	continue;
				549
				550	// We just parsed a # character at the start of a line, so we're in
				551	// directive mode. Tell the lexer this so any newlines we see will be
				552	// converted into an EOM token (this terminates the macro).
				553	CurLexer->ParsingPreprocessorDirective = true;
				554
				555	// Read the next token, the directive flavor.
				556	if (LexUnexpandedToken(Tok)) return true;
				557
				558	// If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
				559	// something bogus), skip it.
				560	if (Tok.getKind() != tok::identifier) {
				561	CurLexer->ParsingPreprocessorDirective = false;
				562	continue;
				563	}
				564
				565	// If the first letter isn't i or e, it isn't intesting to us. We know that
				566	// this is safe in the face of spelling differences, because there is no way
				567	// to spell an i/e in a strange way that is another letter. Skipping this
				568	// allows us to avoid computing the spelling for #define/#undef and other
				569	// common directives.
				570	char FirstChar = Tok.getStart()[0];
				571	if (FirstChar >= 'a' && FirstChar <= 'z' &&
				572	FirstChar != 'i' && FirstChar != 'e') {
				573	CurLexer->ParsingPreprocessorDirective = false;
				574	continue;
				575	}
				576
				577	// Strip out trigraphs and embedded newlines.
				578	std::string Directive = Lexer::getSpelling(Tok, Features);
				579	FirstChar = Directive[0];
				580	if (FirstChar == 'i' && Directive[1] == 'f') {
				581	if (Directive == "if" \|\| Directive == "ifdef" \|\| Directive == "ifndef") {
				582	// We know the entire #if/#ifdef/#ifndef block will be skipped, don't
				583	// bother parsing the condition.
				584	if (DiscardUntilEndOfDirective()) return true;
				585	CurLexer->pushConditionalLevel(Tok.getStart(), /wasskipping/true,
				586	/foundnonskip/false,/fnddelse/false);
				587	}
				588	} else if (FirstChar == 'e') {
				589	if (Directive == "endif") {
				590	if (CheckEndOfDirective("#endif")) return true;
				591	PPConditionalInfo CondInfo;
				592	CondInfo.WasSkipping = true; // Silence bogus warning.
				593	bool InCond = CurLexer->popConditionalLevel(CondInfo);
				594	assert(!InCond && "Can't be skipping if not in a conditional!");
				595
				596	// If we popped the outermost skipping block, we're done skipping!
				597	if (!CondInfo.WasSkipping)
				598	break;
				599	} else if (Directive == "else") {
				600	// #else directive in a skipping conditional. If not in some other
				601	// skipping conditional, and if #else hasn't already been seen, enter it
				602	// as a non-skipping conditional.
				603	if (CheckEndOfDirective("#else")) return true;
				604	PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
				605
				606	// If this is a #else with a #else before it, report the error.
				607	if (CondInfo.FoundElse && Diag(Tok, diag::pp_err_else_after_else))
				608	return true;
				609
				610	// Note that we've seen a #else in this conditional.
				611	CondInfo.FoundElse = true;
				612
				613	// If the conditional is at the top level, and the #if block wasn't
				614	// entered, enter the #else block now.
				615	if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) {
				616	CondInfo.FoundNonSkip = true;
				617	break;
				618	}
				619	} else if (Directive == "elif") {
				620	PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel();
				621
				622	bool ShouldEnter;
				623	// If this is in a skipping block or if we're already handled this #if
				624	// block, don't bother parsing the condition.
				625	if (CondInfo.WasSkipping \|\| CondInfo.FoundNonSkip) {
				626	if (DiscardUntilEndOfDirective()) return true;
				627	ShouldEnter = false;
				628	} else {
				629	// Evaluate the #elif condition!
				630	const char *Start = CurLexer->BufferPtr;
				631
				632	// Restore the value of SkippingContents so that identifiers are
				633	// looked up, etc, inside the #elif expression.
				634	assert(SkippingContents && "We have to be skipping here!");
				635	SkippingContents = false;
				636	if (EvaluateDirectiveExpression(ShouldEnter))
				637	return true;
				638	SkippingContents = true;
				639	}
				640
				641	// If this is a #elif with a #else before it, report the error.
				642	if (CondInfo.FoundElse && Diag(Tok, diag::pp_err_elif_after_else))
				643	return true;
				644
				645	// If this condition is true, enter it!
				646	if (ShouldEnter) {
				647	CondInfo.FoundNonSkip = true;
				648	break;
				649	}
				650	}
				651	}
				652
				653	CurLexer->ParsingPreprocessorDirective = false;
				654	}
				655
				656	// Finally, if we are out of the conditional (saw an #endif or ran off the end
				657	// of the file, just stop skipping and return to lexing whatever came after
				658	// the #if block.
				659	SkippingContents = false;
				660
				661	return false;
				662	}
				663
				664	//===----------------------------------------------------------------------===//
				665	// Preprocessor Directive Handling.
				666	//===----------------------------------------------------------------------===//
				667
				668	/// HandleDirective - This callback is invoked when the lexer sees a # token
				669	/// at the start of a line. This consumes the directive, modifies the
				670	/// lexer/preprocessor state, and advances the lexer(s) so that the next token
				671	/// read is the correct one.
				672	bool Preprocessor::HandleDirective(LexerToken &Result) {
				673	// FIXME: TRADITIONAL: # with whitespace before it not recognized by K&R?
				674
				675	// We just parsed a # character at the start of a line, so we're in directive
				676	// mode. Tell the lexer this so any newlines we see will be converted into an
				677	// EOM token (this terminates the macro).
				678	CurLexer->ParsingPreprocessorDirective = true;
				679
				680	++NumDirectives;
				681
				682	// Read the next token, the directive flavor.
				683	if (LexUnexpandedToken(Result))
				684	return true; // Bail out.
				685
				686	switch (Result.getKind()) {
				687	default: break;
				688	case tok::eom:
				689	return false; // null directive.
				690
				691	#if 0
				692	case tok::numeric_constant:
				693	// FIXME: implement # 7 line numbers!
				694	break;
				695	#endif
				696	case tok::kw_else:
				697	return HandleElseDirective(Result);
				698	case tok::kw_if:
				699	return HandleIfDirective(Result);
				700	case tok::identifier:
				701	// Strip out trigraphs and embedded newlines.
				702	std::string Directive = Lexer::getSpelling(Result, Features);
				703	bool isExtension = false;
				704	switch (Directive.size()) {
				705	case 4:
				706	if (Directive == "line")
				707	;
				708	if (Directive == "elif")
				709	return HandleElifDirective(Result);
				710	if (Directive == "sccs") {
				711	isExtension = true;
				712	// SCCS is the same as #ident.
				713	}
				714	break;
				715	case 5:
				716	if (Directive == "endif")
				717	return HandleEndifDirective(Result);
				718	if (Directive == "ifdef")
				719	return HandleIfdefDirective(Result, false);
				720	if (Directive == "undef")
				721	return HandleUndefDirective(Result);
				722	if (Directive == "error")
				723	return HandleUserDiagnosticDirective(Result, false);
				724	if (Directive == "ident")
				725	isExtension = true;
				726	break;
				727	case 6:
				728	if (Directive == "define")
				729	return HandleDefineDirective(Result);
				730	if (Directive == "ifndef")
				731	return HandleIfdefDirective(Result, true);
				732	if (Directive == "import")
				733	return HandleImportDirective(Result);
				734	if (Directive == "pragma") {
				735	// FIXME: implement #pragma
				736	++NumPragma;
				737	#if 1
				738	// Read the rest of the PP line.
				739	do {
				740	if (Lex(Result)) return true;
				741	} while (Result.getKind() != tok::eom);
				742
				743	return false;
				744	#endif
				745	} else if (Directive == "assert") {
				746	isExtension = true;
				747	}
				748	break;
				749	case 7:
				750	if (Directive == "include") // Handle #include.
				751	return HandleIncludeDirective(Result);
				752	if (Directive == "warning")
				753	return Diag(Result, diag::ext_pp_warning_directive) \|\|
				754	HandleUserDiagnosticDirective(Result, true);
				755	break;
				756	case 8:
				757	if (Directive == "unassert") {
				758	isExtension = true;
				759	}
				760	break;
				761	case 12:
				762	if (Directive == "include_next") // Handle #include_next.
				763	return HandleIncludeNextDirective(Result);
				764	break;
				765	}
				766	break;
				767	}
				768
				769	// If we reached here, the preprocessing token is not valid!
				770	if (Diag(Result, diag::err_pp_invalid_directive))
				771	return true;
				772
				773	// Read the rest of the PP line.
				774	do {
				775	if (Lex(Result)) return true;
				776	} while (Result.getKind() != tok::eom);
				777
				778	// Okay, we're done parsing the directive.
				779	return false;
				780	}
				781
				782	bool Preprocessor::HandleUserDiagnosticDirective(LexerToken &Result,
				783	bool isWarning) {
				784	// Read the rest of the line raw. We do this because we don't want macros
				785	// to be expanded and we don't require that the tokens be valid preprocessing
				786	// tokens. For example, this is allowed: "#warning ` 'foo". GCC does
				787	// collapse multiple consequtive white space between tokens, but this isn't
				788	// specified by the standard.
				789	std::string Message = CurLexer->ReadToEndOfLine();
				790
				791	unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error;
				792	return Diag(Result, DiagID, Message);
				793	}
				794
				795	/// HandleIncludeDirective - The "#include" tokens have just been read, read the
				796	/// file to be included from the lexer, then include it! This is a common
				797	/// routine with functionality shared between #include, #include_next and
				798	/// #import.
				799	bool Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok,
				800	const DirectoryLookup *LookupFrom,
				801	bool isImport) {
				802	++NumIncluded;
				803	LexerToken FilenameTok;
				804	if (CurLexer->LexIncludeFilename(FilenameTok))
				805	return true;
				806
				807	// If the token kind is EOM, the error has already been diagnosed.
				808	if (FilenameTok.getKind() == tok::eom)
				809	return false;
				810
				811	// Check that we don't have infinite #include recursion.
				812	if (IncludeStack.size() == MaxAllowedIncludeStackDepth-1)
				813	return Diag(FilenameTok, diag::err_pp_include_too_deep);
				814
				815	// Get the text form of the filename.
				816	std::string Filename = CurLexer->getSpelling(FilenameTok);
				817	assert(!Filename.empty() && "Can't have tokens with empty spellings!");
				818
				819	// Make sure the filename is <x> or "x".
				820	bool isAngled;
				821	if (Filename[0] == '<') {
				822	isAngled = true;
				823	if (Filename[Filename.size()-1] != '>')
				824	return Diag(FilenameTok, diag::err_pp_expects_filename);
				825	} else if (Filename[0] == '"') {
				826	isAngled = false;
				827	if (Filename[Filename.size()-1] != '"')
				828	return Diag(FilenameTok, diag::err_pp_expects_filename);
				829	} else {
				830	return Diag(FilenameTok, diag::err_pp_expects_filename);
				831	}
				832
				833	// Remove the quotes.
				834	Filename = std::string(Filename.begin()+1, Filename.end()-1);
				835
				836	// Diagnose #include "" as invalid.
				837	if (Filename.empty())
				838	return Diag(FilenameTok, diag::err_pp_empty_filename);
				839
				840	// Search include directories.
				841	const DirectoryLookup *NextDir;
				842	const FileEntry *File = LookupFile(Filename, isAngled, LookupFrom, NextDir);
				843	if (File == 0)
				844	return Diag(FilenameTok, diag::err_pp_file_not_found);
				845
				846	// Get information about this file.
				847	PerFileInfo &FileInfo = getFileInfo(File);
				848
				849	// If this is a #import directive, check that we have not already imported
				850	// this header.
				851	if (isImport) {
				852	// If this has already been imported, don't import it again.
				853	FileInfo.isImport = true;
				854
				855	// Has this already been #import'ed or #include'd?
				856	if (FileInfo.NumIncludes) return false;
				857	} else {
				858	// Otherwise, if this is a #include of a file that was previously #import'd
				859	// or if this is the second #include of a #pragma once file, ignore it.
				860	if (FileInfo.isImport)
				861	return false;
				862	}
				863
				864	// Look up the file, create a File ID for it.
				865	unsigned FileID =
				866	SourceMgr.createFileID(File, FilenameTok.getSourceLocation());
				867	if (FileID == 0)
				868	return Diag(FilenameTok, diag::err_pp_file_not_found);
				869
				870	// Finally, if all is good, enter the new file!
				871	EnterSourceFile(FileID, NextDir);
				872
				873	// Increment the number of times this file has been included.
				874	++FileInfo.NumIncludes;
				875
				876	return false;
				877	}
				878
				879	/// HandleIncludeNextDirective - Implements #include_next.
				880	///
				881	bool Preprocessor::HandleIncludeNextDirective(LexerToken &IncludeNextTok) {
				882	if (Diag(IncludeNextTok, diag::ext_pp_include_next_directive))
				883	return true;
				884
				885	// #include_next is like #include, except that we start searching after
				886	// the current found directory. If we can't do this, issue a
				887	// diagnostic.
				888	const DirectoryLookup *Lookup = CurNextDirLookup;
				889	if (IncludeStack.empty()) {
				890	Lookup = 0;
				891	if (Diag(IncludeNextTok, diag::pp_include_next_in_primary))
				892	return true;
				893	} else if (Lookup == 0) {
				894	if (Diag(IncludeNextTok, diag::pp_include_next_absolute_path))
				895	return true;
				896	}
				897
				898	return HandleIncludeDirective(IncludeNextTok, Lookup);
				899	}
				900
				901	/// HandleImportDirective - Implements #import.
				902	///
				903	bool Preprocessor::HandleImportDirective(LexerToken &ImportTok) {
				904	if (Diag(ImportTok, diag::ext_pp_import_directive)) return true;
				905
				906	return HandleIncludeDirective(ImportTok, 0, true);
				907	}
				908
				909	/// HandleDefineDirective - Implements #define. This consumes the entire macro
				910	/// line then lets the caller lex the next real token.
				911	///
				912	bool Preprocessor::HandleDefineDirective(LexerToken &DefineTok) {
				913	++NumDefined;
				914	LexerToken MacroNameTok;
				915	if (ReadMacroName(MacroNameTok))
				916	return true;
				917
				918	// Error reading macro name? If so, diagnostic already issued.
				919	if (MacroNameTok.getKind() == tok::eom)
				920	return false;
				921
				922	MacroInfo *MI = new MacroInfo(MacroNameTok.getSourceLocation());
				923
				924	LexerToken Tok;
				925	if (LexUnexpandedToken(Tok)) return true;
				926
				927	if (Tok.getKind() == tok::eom) {
				928	// If there is no body to this macro, we have no special handling here.
				929	} else if (Tok.getKind() == tok::l_paren && !Tok.hasLeadingSpace()) {
				930	// This is a function-like macro definition.
				931	//assert(0 && "Function-like macros not implemented!");
				932	#warning function like macros
				933	return DiscardUntilEndOfDirective();
				934
				935	} else if (!Tok.hasLeadingSpace()) {
				936	// C99 requires whitespace between the macro definition and the body. Emit
				937	// a diagnostic for something like "#define X+".
				938	if (Features.C99) {
				939	if (Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name))
				940	return true;
				941	} else {
				942	// FIXME: C90/C++ do not get this diagnostic, but it does get a similar
				943	// one in some cases!
				944	}
				945	} else {
				946	// This is a normal token with leading space. Clear the leading space
				947	// marker on the first token to get proper expansion.
				948	Tok.ClearFlag(LexerToken::LeadingSpace);
				949	}
				950
				951	// Read the rest of the macro body.
				952	while (Tok.getKind() != tok::eom) {
				953	MI->AddTokenToBody(Tok);
				954
				955	// FIXME: See create_iso_definition.
				956
				957	// Get the next token of the macro.
				958	if (LexUnexpandedToken(Tok)) return true;
				959	}
				960
				961	// Finally, if this identifier already had a macro defined for it, verify that
				962	// the macro bodies are identical and free the old definition.
				963	if (MacroInfo *OtherMI = MacroNameTok.getIdentifierInfo()->getMacroInfo()) {
				964	// FIXME: Verify the definition is the same.
				965	// Macros must be identical. This means all tokes and whitespace separation
				966	// must be the same.
				967	delete OtherMI;
				968	}
				969
				970	MacroNameTok.getIdentifierInfo()->setMacroInfo(MI);
				971	return false;
				972	}
				973
				974
				975	/// HandleUndefDirective - Implements #undef.
				976	///
				977	bool Preprocessor::HandleUndefDirective(LexerToken &UndefTok) {
				978	++NumUndefined;
				979	LexerToken MacroNameTok;
				980	if (ReadMacroName(MacroNameTok))
				981	return true;
				982
				983	// Error reading macro name? If so, diagnostic already issued.
				984	if (MacroNameTok.getKind() == tok::eom)
				985	return false;
				986
				987	// Check to see if this is the last token on the #undef line.
				988	if (CheckEndOfDirective("#undef")) return true;
				989
				990	// Okay, we finally have a valid identifier to undef.
				991	MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo();
				992
				993	// If the macro is not defined, this is a noop undef, just return.
				994	if (MI == 0) return false;
				995
				996	#if 0 // FIXME: implement warn_unused_macros.
				997	if (CPP_OPTION (pfile, warn_unused_macros))
				998	_cpp_warn_if_unused_macro (pfile, node, NULL);
				999	#endif
				1000
				1001	// Free macro definition.
				1002	delete MI;
				1003	MacroNameTok.getIdentifierInfo()->setMacroInfo(0);
				1004	return false;
				1005	}
				1006
				1007
				1008	/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is
				1009	/// true when this is a #ifndef directive.
				1010	///
				1011	bool Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef) {
				1012	++NumIf;
				1013	LexerToken DirectiveTok = Result;
				1014
				1015	LexerToken MacroNameTok;
				1016	if (ReadMacroName(MacroNameTok))
				1017	return true;
				1018
				1019	// Error reading macro name? If so, diagnostic already issued.
				1020	if (MacroNameTok.getKind() == tok::eom)
				1021	return false;
				1022
				1023	// Check to see if this is the last token on the #if[n]def line.
				1024	if (CheckEndOfDirective("#ifdef")) return true;
				1025
				1026	// Should we include the stuff contained by this directive?
				1027	if (!MacroNameTok.getIdentifierInfo()->getMacroInfo() == isIfndef) {
				1028	// Yes, remember that we are inside a conditional, then lex the next token.
				1029	CurLexer->pushConditionalLevel(DirectiveTok.getStart(), /wasskip/false,
				1030	/foundnonskip/true, /foundelse/false);
				1031	return false;
				1032	} else {
				1033	// No, skip the contents of this block and return the first token after it.
				1034	return SkipExcludedConditionalBlock(DirectiveTok.getStart(),
				1035	/Foundnonskip/false,
				1036	/FoundElse/false);
				1037	}
				1038	}
				1039
				1040	/// HandleIfDirective - Implements the #if directive.
				1041	///
				1042	bool Preprocessor::HandleIfDirective(LexerToken &IfToken) {
				1043	++NumIf;
				1044	const char *Start = CurLexer->BufferPtr;
				1045
				1046	bool ConditionalTrue = false;
				1047	if (EvaluateDirectiveExpression(ConditionalTrue))
				1048	return true;
				1049
				1050	// Should we include the stuff contained by this directive?
				1051	if (ConditionalTrue) {
				1052	// Yes, remember that we are inside a conditional, then lex the next token.
				1053	CurLexer->pushConditionalLevel(IfToken.getStart(), /wasskip/false,
				1054	/foundnonskip/true, /foundelse/false);
				1055	return false;
				1056	} else {
				1057	// No, skip the contents of this block and return the first token after it.
				1058	return SkipExcludedConditionalBlock(IfToken.getStart(),
				1059	/Foundnonskip/false,
				1060	/FoundElse/false);
				1061	}
				1062	}
				1063
				1064	/// HandleEndifDirective - Implements the #endif directive.
				1065	///
				1066	bool Preprocessor::HandleEndifDirective(LexerToken &EndifToken) {
				1067	++NumEndif;
				1068	// Check that this is the whole directive.
				1069	if (CheckEndOfDirective("#endif")) return true;
				1070
				1071	PPConditionalInfo CondInfo;
				1072	if (CurLexer->popConditionalLevel(CondInfo)) {
				1073	// No conditionals on the stack: this is an #endif without an #if.
				1074	return Diag(EndifToken, diag::err_pp_endif_without_if);
				1075	}
				1076
				1077	assert(!CondInfo.WasSkipping && !isSkipping() &&
				1078	"This code should only be reachable in the non-skipping case!");
				1079	return false;
				1080	}
				1081
				1082
				1083	bool Preprocessor::HandleElseDirective(LexerToken &Result) {
				1084	++NumElse;
				1085	// #else directive in a non-skipping conditional... start skipping.
				1086	if (CheckEndOfDirective("#else")) return true;
				1087
				1088	PPConditionalInfo CI;
				1089	if (CurLexer->popConditionalLevel(CI))
				1090	return Diag(Result, diag::pp_err_else_without_if);
				1091
				1092	// If this is a #else with a #else before it, report the error.
				1093	if (CI.FoundElse && Diag(Result, diag::pp_err_else_after_else))
				1094	return true;
				1095
				1096	// Finally, skip the rest of the contents of this block and return the first
				1097	// token after it.
				1098	return SkipExcludedConditionalBlock(CI.IfLoc, /Foundnonskip/true,
				1099	/FoundElse/true);
				1100	}
				1101
				1102	bool Preprocessor::HandleElifDirective(LexerToken &ElifToken) {
				1103	++NumElse;
				1104	// #elif directive in a non-skipping conditional... start skipping.
				1105	// We don't care what the condition is, because we will always skip it (since
				1106	// the block immediately before it was included).
				1107	if (DiscardUntilEndOfDirective()) return true;
				1108
				1109	PPConditionalInfo CI;
				1110	if (CurLexer->popConditionalLevel(CI))
				1111	return Diag(ElifToken, diag::pp_err_elif_without_if);
				1112
				1113	// If this is a #elif with a #else before it, report the error.
				1114	if (CI.FoundElse && Diag(ElifToken, diag::pp_err_elif_after_else))
				1115	return true;
				1116
				1117	// Finally, skip the rest of the contents of this block and return the first
				1118	// token after it.
				1119	return SkipExcludedConditionalBlock(CI.IfLoc, /Foundnonskip/true,
				1120	/FoundElse/CI.FoundElse);
				1121	}