Blame - lib/Lex/PPLexerChange.cpp - fp2-dev/platform/external/clang

blob: c15675114d314cf0d207b31d8461e824c3c6dfe0 [file] [log] [blame]

Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	1	//===--- PPLexerChange.cpp - Handle changing lexers in the preprocessor ---===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements pieces of the Preprocessor interface that manage the
				11	// current lexer stack.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "clang/Lex/Preprocessor.h"
				16	#include "clang/Lex/HeaderSearch.h"
				17	#include "clang/Lex/MacroInfo.h"
				18	#include "clang/Lex/PPCallbacks.h"
				19	#include "clang/Basic/Diagnostic.h"
				20	#include "clang/Basic/SourceManager.h"
				21	using namespace clang;
				22
				23	PPCallbacks::~PPCallbacks() {
				24	}
				25
				26
				27	//===----------------------------------------------------------------------===//
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	28	// Miscellaneous Methods.
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	29	//===----------------------------------------------------------------------===//
				30
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	31	/// isInPrimaryFile - Return true if we're in the top-level file, not in a
Chris Lattner	7d39d74	2008-03-09 04:49:35 +0000	[diff] [blame]	32	/// #include. This looks through macro expansions and active _Pragma lexers.
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	33	bool Preprocessor::isInPrimaryFile() const {
				34	if (CurLexer && !CurLexer->Is_PragmaLexer)
				35	return IncludeMacroStack.empty();
				36
				37	// If there are any stacked lexers, we're in a #include.
				38	assert(IncludeMacroStack[0].TheLexer &&
				39	!IncludeMacroStack[0].TheLexer->Is_PragmaLexer &&
				40	"Top level include stack isn't our primary lexer?");
				41	for (unsigned i = 1, e = IncludeMacroStack.size(); i != e; ++i)
				42	if (IncludeMacroStack[i].TheLexer &&
				43	!IncludeMacroStack[i].TheLexer->Is_PragmaLexer)
				44	return false;
				45	return true;
				46	}
				47
				48	/// getCurrentLexer - Return the current file lexer being lexed from. Note
				49	/// that this ignores any potentially active macro expansions and _Pragma
				50	/// expansions going on at the time.
				51	Lexer *Preprocessor::getCurrentFileLexer() const {
				52	if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer;
				53
				54	// Look for a stacked lexer.
				55	for (unsigned i = IncludeMacroStack.size(); i != 0; --i) {
				56	Lexer *L = IncludeMacroStack[i-1].TheLexer;
				57	if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions.
				58	return L;
				59	}
				60	return 0;
				61	}
				62
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	63	/// LookAhead - This peeks ahead N tokens and returns that token without
				64	/// consuming any tokens. LookAhead(0) returns 'Tok', LookAhead(1) returns
				65	/// the token after Tok, etc.
				66	///
				67	/// NOTE: is a relatively expensive method, so it should not be used in common
				68	/// code paths if possible!
				69	///
				70	Token Preprocessor::LookAhead(unsigned N) {
Chris Lattner	5f9e24c	2008-03-10 06:20:22 +0000	[diff] [blame]	71	// FIXME: Optimize the case where multiple lookahead calls are used back to
				72	// back. Consider if the the parser contained (dynamically):
				73	// Lookahead(1); Lookahead(1); Lookahead(1)
				74	// This would return the same token 3 times, but would end up making lots of
				75	// token stream lexers to do it. To handle this common case, see if the top
				76	// of the lexer stack is a TokenStreamLexer with macro expansion disabled. If
				77	// so, see if it has 'N' tokens available in it. If so, just return the
				78	// token.
				79
				80	// FIXME: Optimize the case when the parser does multiple nearby lookahead
				81	// calls. For example, consider:
				82	// Lookahead(0); Lookahead(1); Lookahead(2);
				83	// The previous optimization won't apply, and there won't be any space left in
				84	// the array that was previously new'd. To handle this, always round up the
				85	// size we new to a multiple of 16 tokens. If the previous buffer has space
				86	// left, we can just grow it. This means we only have to do the new 1/16th as
				87	// often.
				88
Chris Lattner	ea301e1	2008-03-24 21:14:55 +0000	[diff] [blame]	89	Token *LookaheadTokens = new Token[N+1];
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	90
				91	// Read N+1 tokens into LookaheadTokens. After this loop, Tok is the token
				92	// to return.
				93	Token Tok;
				94	unsigned NumTokens = 0;
				95	for (; N != ~0U; --N, ++NumTokens) {
				96	Lex(Tok);
				97	LookaheadTokens[NumTokens] = Tok;
				98
				99	// If we got to EOF, don't lex past it. This will cause LookAhead to return
				100	// the EOF token.
				101	if (Tok.is(tok::eof))
				102	break;
				103	}
				104
				105	// Okay, at this point, we have the token we want to return in Tok. However,
				106	// we read it and a bunch of other stuff (in LookaheadTokens) that we must
				107	// allow subsequent calls to 'Lex' to return. To do this, we push a new token
				108	// lexer onto the lexer stack with the tokens we read here. This passes
				109	// ownership of LookaheadTokens to EnterTokenStream.
				110	//
				111	// Note that we disable macro expansion of the tokens from this buffer, since
				112	// any macros have already been expanded, and the internal preprocessor state
				113	// may already read past new macros. Consider something like LookAhead(1) on
				114	// X
				115	// #define X 14
				116	// Y
				117	// The lookahead call should return 'Y', and the next Lex call should return
				118	// 'X' even though X -> 14 has already been entered as a macro.
				119	//
				120	EnterTokenStream(LookaheadTokens, NumTokens, true /DisableExpansion/,
				121	true /OwnsTokens/);
				122	return Tok;
				123	}
				124
				125
				126	//===----------------------------------------------------------------------===//
				127	// Methods for Entering and Callbacks for leaving various contexts
				128	//===----------------------------------------------------------------------===//
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	129
				130	/// EnterSourceFile - Add a source file to the top of the include stack and
				131	/// start lexing tokens from it instead of the current buffer. Return true
				132	/// on failure.
				133	void Preprocessor::EnterSourceFile(unsigned FileID,
				134	const DirectoryLookup *CurDir) {
				135	assert(CurTokenLexer == 0 && "Cannot #include a file inside a macro!");
				136	++NumEnteredSourceFiles;
				137
				138	if (MaxIncludeStackDepth < IncludeMacroStack.size())
				139	MaxIncludeStackDepth = IncludeMacroStack.size();
				140
				141	Lexer TheLexer = new Lexer(SourceLocation::getFileLoc(FileID, 0), this);
				142	EnterSourceFileWithLexer(TheLexer, CurDir);
				143	}
				144
				145	/// EnterSourceFile - Add a source file to the top of the include stack and
				146	/// start lexing tokens from it instead of the current buffer.
				147	void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
				148	const DirectoryLookup *CurDir) {
				149
				150	// Add the current lexer to the include stack.
				151	if (CurLexer \|\| CurTokenLexer)
				152	IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
				153	CurTokenLexer));
				154
				155	CurLexer = TheLexer;
				156	CurDirLookup = CurDir;
				157	CurTokenLexer = 0;
				158
				159	// Notify the client, if desired, that we are in a new source file.
				160	if (Callbacks && !CurLexer->Is_PragmaLexer) {
				161	DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
				162
				163	// Get the file entry for the current file.
				164	if (const FileEntry *FE =
				165	SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
				166	FileType = HeaderInfo.getFileDirFlavor(FE);
				167
				168	Callbacks->FileChanged(CurLexer->getFileLoc(),
				169	PPCallbacks::EnterFile, FileType);
				170	}
				171	}
				172
				173
				174
				175	/// EnterMacro - Add a Macro to the top of the include stack and start lexing
				176	/// tokens from it instead of the current buffer.
				177	void Preprocessor::EnterMacro(Token &Tok, MacroArgs *Args) {
				178	IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
				179	CurTokenLexer));
				180	CurLexer = 0;
				181	CurDirLookup = 0;
				182
				183	if (NumCachedTokenLexers == 0) {
				184	CurTokenLexer = new TokenLexer(Tok, Args, *this);
				185	} else {
				186	CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
				187	CurTokenLexer->Init(Tok, Args);
				188	}
				189	}
				190
				191	/// EnterTokenStream - Add a "macro" context to the top of the include stack,
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	192	/// which will cause the lexer to start returning the specified tokens.
				193	///
				194	/// If DisableMacroExpansion is true, tokens lexed from the token stream will
				195	/// not be subject to further macro expansion. Otherwise, these tokens will
				196	/// be re-macro-expanded when/if expansion is enabled.
				197	///
				198	/// If OwnsTokens is false, this method assumes that the specified stream of
				199	/// tokens has a permanent owner somewhere, so they do not need to be copied.
				200	/// If it is true, it assumes the array of tokens is allocated with new[] and
				201	/// must be freed.
				202	///
				203	void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks,
				204	bool DisableMacroExpansion,
				205	bool OwnsTokens) {
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	206	// Save our current state.
				207	IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup,
				208	CurTokenLexer));
				209	CurLexer = 0;
				210	CurDirLookup = 0;
				211
				212	// Create a macro expander to expand from the specified token stream.
				213	if (NumCachedTokenLexers == 0) {
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	214	CurTokenLexer = new TokenLexer(Toks, NumToks, DisableMacroExpansion,
				215	OwnsTokens, *this);
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	216	} else {
				217	CurTokenLexer = TokenLexerCache[--NumCachedTokenLexers];
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	218	CurTokenLexer->Init(Toks, NumToks, DisableMacroExpansion, OwnsTokens);
Chris Lattner	8c32b1a	2008-03-09 04:10:46 +0000	[diff] [blame]	219	}
				220	}
				221
				222	/// HandleEndOfFile - This callback is invoked when the lexer hits the end of
				223	/// the current file. This either returns the EOF token or pops a level off
				224	/// the include stack and keeps going.
				225	bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
				226	assert(!CurTokenLexer &&
				227	"Ending a file when currently in a macro!");
				228
				229	// See if this file had a controlling macro.
				230	if (CurLexer) { // Not ending a macro, ignore it.
				231	if (const IdentifierInfo *ControllingMacro =
				232	CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
				233	// Okay, this has a controlling macro, remember in PerFileInfo.
				234	if (const FileEntry *FE =
				235	SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
				236	HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
				237	}
				238	}
				239
				240	// If this is a #include'd file, pop it off the include stack and continue
				241	// lexing the #includer file.
				242	if (!IncludeMacroStack.empty()) {
				243	// We're done with the #included file.
				244	RemoveTopOfLexerStack();
				245
				246	// Notify the client, if desired, that we are in a new source file.
				247	if (Callbacks && !isEndOfMacro && CurLexer) {
				248	DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir;
				249
				250	// Get the file entry for the current file.
				251	if (const FileEntry *FE =
				252	SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
				253	FileType = HeaderInfo.getFileDirFlavor(FE);
				254
				255	Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr),
				256	PPCallbacks::ExitFile, FileType);
				257	}
				258
				259	// Client should lex another token.
				260	return false;
				261	}
				262
				263	// If the file ends with a newline, form the EOF token on the newline itself,
				264	// rather than "on the line following it", which doesn't exist. This makes
				265	// diagnostics relating to the end of file include the last file that the user
				266	// actually typed, which is goodness.
				267	const char *EndPos = CurLexer->BufferEnd;
				268	if (EndPos != CurLexer->BufferStart &&
				269	(EndPos[-1] == '\n' \|\| EndPos[-1] == '\r')) {
				270	--EndPos;
				271
				272	// Handle \n\r and \r\n:
				273	if (EndPos != CurLexer->BufferStart &&
				274	(EndPos[-1] == '\n' \|\| EndPos[-1] == '\r') &&
				275	EndPos[-1] != EndPos[0])
				276	--EndPos;
				277	}
				278
				279	Result.startToken();
				280	CurLexer->BufferPtr = EndPos;
				281	CurLexer->FormTokenWithChars(Result, EndPos);
				282	Result.setKind(tok::eof);
				283
				284	// We're done with the #included file.
				285	delete CurLexer;
				286	CurLexer = 0;
				287
				288	// This is the end of the top-level file. If the diag::pp_macro_not_used
				289	// diagnostic is enabled, look for macros that have not been used.
				290	if (Diags.getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored){
				291	for (llvm::DenseMap<IdentifierInfo, MacroInfo>::iterator I =
				292	Macros.begin(), E = Macros.end(); I != E; ++I) {
				293	if (!I->second->isUsed())
				294	Diag(I->second->getDefinitionLoc(), diag::pp_macro_not_used);
				295	}
				296	}
				297	return true;
				298	}
				299
				300	/// HandleEndOfTokenLexer - This callback is invoked when the current TokenLexer
				301	/// hits the end of its token stream.
				302	bool Preprocessor::HandleEndOfTokenLexer(Token &Result) {
				303	assert(CurTokenLexer && !CurLexer &&
				304	"Ending a macro when currently in a #include file!");
				305
				306	// Delete or cache the now-dead macro expander.
				307	if (NumCachedTokenLexers == TokenLexerCacheSize)
				308	delete CurTokenLexer;
				309	else
				310	TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer;
				311
				312	// Handle this like a #include file being popped off the stack.
				313	CurTokenLexer = 0;
				314	return HandleEndOfFile(Result, true);
				315	}
				316
				317	/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the
				318	/// lexer stack. This should only be used in situations where the current
				319	/// state of the top-of-stack lexer is unknown.
				320	void Preprocessor::RemoveTopOfLexerStack() {
				321	assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load");
				322
				323	if (CurTokenLexer) {
				324	// Delete or cache the now-dead macro expander.
				325	if (NumCachedTokenLexers == TokenLexerCacheSize)
				326	delete CurTokenLexer;
				327	else
				328	TokenLexerCache[NumCachedTokenLexers++] = CurTokenLexer;
				329	} else {
				330	delete CurLexer;
				331	}
				332	CurLexer = IncludeMacroStack.back().TheLexer;
				333	CurDirLookup = IncludeMacroStack.back().TheDirLookup;
				334	CurTokenLexer = IncludeMacroStack.back().TheTokenLexer;
				335	IncludeMacroStack.pop_back();
				336	}
				337
				338	/// HandleMicrosoftCommentPaste - When the macro expander pastes together a
				339	/// comment (/##/) in microsoft mode, this method handles updating the current
				340	/// state, returning the token on the next source line.
				341	void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
				342	assert(CurTokenLexer && !CurLexer &&
				343	"Pasted comment can only be formed from macro");
				344
				345	// We handle this by scanning for the closest real lexer, switching it to
				346	// raw mode and preprocessor mode. This will cause it to return \n as an
				347	// explicit EOM token.
				348	Lexer *FoundLexer = 0;
				349	bool LexerWasInPPMode = false;
				350	for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
				351	IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1);
				352	if (ISI.TheLexer == 0) continue; // Scan for a real lexer.
				353
				354	// Once we find a real lexer, mark it as raw mode (disabling macro
				355	// expansions) and preprocessor mode (return EOM). We know that the lexer
				356	// was not in raw mode before, because the macro that the comment came
				357	// from was expanded. However, it could have already been in preprocessor
				358	// mode (#if COMMENT) in which case we have to return it to that mode and
				359	// return EOM.
				360	FoundLexer = ISI.TheLexer;
				361	FoundLexer->LexingRawMode = true;
				362	LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective;
				363	FoundLexer->ParsingPreprocessorDirective = true;
				364	break;
				365	}
				366
				367	// Okay, we either found and switched over the lexer, or we didn't find a
				368	// lexer. In either case, finish off the macro the comment came from, getting
				369	// the next token.
				370	if (!HandleEndOfTokenLexer(Tok)) Lex(Tok);
				371
				372	// Discarding comments as long as we don't have EOF or EOM. This 'comments
				373	// out' the rest of the line, including any tokens that came from other macros
				374	// that were active, as in:
				375	// #define submacro a COMMENT b
				376	// submacro c
				377	// which should lex to 'a' only: 'b' and 'c' should be removed.
				378	while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof))
				379	Lex(Tok);
				380
				381	// If we got an eom token, then we successfully found the end of the line.
				382	if (Tok.is(tok::eom)) {
				383	assert(FoundLexer && "Can't get end of line without an active lexer");
				384	// Restore the lexer back to normal mode instead of raw mode.
				385	FoundLexer->LexingRawMode = false;
				386
				387	// If the lexer was already in preprocessor mode, just return the EOM token
				388	// to finish the preprocessor line.
				389	if (LexerWasInPPMode) return;
				390
				391	// Otherwise, switch out of PP mode and return the next lexed token.
				392	FoundLexer->ParsingPreprocessorDirective = false;
				393	return Lex(Tok);
				394	}
				395
				396	// If we got an EOF token, then we reached the end of the token stream but
				397	// didn't find an explicit \n. This can only happen if there was no lexer
				398	// active (an active lexer would return EOM at EOF if there was no \n in
				399	// preprocessor directive mode), so just return EOF as our token.
				400	assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode");
				401	}