Blame - lib/Lex/MacroArgs.cpp - platform/external/clang

blob: 1846d1c05e3084f3df7752d270abb95475e8d8a8 [file] [log] [blame]

Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	1	//===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// This file implements the TokenLexer interface.
				11	//
				12	//===----------------------------------------------------------------------===//
				13
				14	#include "MacroArgs.h"
				15	#include "clang/Lex/MacroInfo.h"
				16	#include "clang/Lex/Preprocessor.h"
Chris Lattner	500d329	2009-01-29 05:15:15 +0000	[diff] [blame]	17	#include "clang/Lex/LexDiagnostic.h"
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	18
				19	#include <algorithm>
				20
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	21	using namespace clang;
				22
				23	/// MacroArgs ctor function - This destroys the vector passed in.
				24	MacroArgs MacroArgs::create(const MacroInfo MI,
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	25	llvm::ArrayRef<Token> UnexpArgTokens,
				26	bool VarargsElided, Preprocessor &PP) {
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	27	assert(MI->isFunctionLike() &&
				28	"Can't have args for an object-like macro!");
Chris Lattner	4fe739f	2009-12-28 06:36:46 +0000	[diff] [blame]	29	MacroArgs **ResultEnt = 0;
				30	unsigned ClosestMatch = ~0U;
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	31
				32	// See if we have an entry with a big enough argument list to reuse on the
				33	// free list. If so, reuse it.
				34	for (MacroArgs *Entry = &PP.MacroArgCache; Entry;
				35	Entry = &(*Entry)->ArgCache)
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	36	if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
Chris Lattner	4fe739f	2009-12-28 06:36:46 +0000	[diff] [blame]	37	(*Entry)->NumUnexpArgTokens < ClosestMatch) {
				38	ResultEnt = Entry;
				39
				40	// If we have an exact match, use it.
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	41	if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
Chris Lattner	4fe739f	2009-12-28 06:36:46 +0000	[diff] [blame]	42	break;
				43	// Otherwise, use the best fit.
				44	ClosestMatch = (*Entry)->NumUnexpArgTokens;
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	45	}
				46
Chris Lattner	4fe739f	2009-12-28 06:36:46 +0000	[diff] [blame]	47	MacroArgs *Result;
				48	if (ResultEnt == 0) {
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	49	// Allocate memory for a MacroArgs object with the lexer tokens at the end.
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	50	Result = (MacroArgs*)malloc(sizeof(MacroArgs) +
				51	UnexpArgTokens.size() * sizeof(Token));
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	52	// Construct the MacroArgs object.
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	53	new (Result) MacroArgs(UnexpArgTokens.size(), VarargsElided);
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	54	} else {
Chris Lattner	4fe739f	2009-12-28 06:36:46 +0000	[diff] [blame]	55	Result = *ResultEnt;
				56	// Unlink this node from the preprocessors singly linked list.
				57	*ResultEnt = Result->ArgCache;
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	58	Result->NumUnexpArgTokens = UnexpArgTokens.size();
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	59	Result->VarargsElided = VarargsElided;
				60	}
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	61
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	62	// Copy the actual unexpanded tokens to immediately after the result ptr.
David Blaikie	d7bb6a0	2011-09-22 02:03:12 +0000	[diff] [blame]	63	if (!UnexpArgTokens.empty())
				64	std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
				65	const_cast<Token*>(Result->getUnexpArgument(0)));
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	66
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	67	return Result;
				68	}
				69
				70	/// destroy - Destroy and deallocate the memory for this object.
				71	///
Chris Lattner	561395b	2009-12-14 22:12:52 +0000	[diff] [blame]	72	void MacroArgs::destroy(Preprocessor &PP) {
Chris Lattner	4608459	2009-12-15 20:48:12 +0000	[diff] [blame]	73	StringifiedArgs.clear();
				74
				75	// Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
				76	// would deallocate the element vectors.
				77	for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
				78	PreExpArgTokens[i].clear();
				79
				80	// Add this to the preprocessor's free list.
				81	ArgCache = PP.MacroArgCache;
				82	PP.MacroArgCache = this;
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	83	}
				84
Chris Lattner	23f77e5	2009-12-15 01:51:03 +0000	[diff] [blame]	85	/// deallocate - This should only be called by the Preprocessor when managing
				86	/// its freelist.
				87	MacroArgs *MacroArgs::deallocate() {
				88	MacroArgs *Next = ArgCache;
				89
				90	// Run the dtor to deallocate the vectors.
				91	this->~MacroArgs();
				92	// Release the memory for the object.
				93	free(this);
				94
				95	return Next;
				96	}
				97
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	98
				99	/// getArgLength - Given a pointer to an expanded or unexpanded argument,
				100	/// return the number of tokens, not counting the EOF, that make up the
				101	/// argument.
				102	unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
				103	unsigned NumArgTokens = 0;
				104	for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
				105	++NumArgTokens;
				106	return NumArgTokens;
				107	}
				108
				109
				110	/// getUnexpArgument - Return the unexpanded tokens for the specified formal.
				111	///
				112	const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
				113	// The unexpanded argument tokens start immediately after the MacroArgs object
				114	// in memory.
				115	const Token Start = (const Token )(this+1);
				116	const Token *Result = Start;
				117	// Scan to find Arg.
				118	for (; Arg; ++Result) {
				119	assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
				120	if (Result->is(tok::eof))
				121	--Arg;
				122	}
Chris Lattner	9fc9e77	2009-05-13 00:55:26 +0000	[diff] [blame]	123	assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	124	return Result;
				125	}
				126
				127
				128	/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
				129	/// by pre-expansion, return false. Otherwise, conservatively return true.
				130	bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
				131	Preprocessor &PP) const {
				132	// If there are no identifiers in the argument list, or if the identifiers are
				133	// known to not be macros, pre-expansion won't modify it.
				134	for (; ArgTok->isNot(tok::eof); ++ArgTok)
				135	if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
				136	if (II->hasMacroDefinition() && PP.getMacroInfo(II)->isEnabled())
				137	// Return true even though the macro could be a function-like macro
				138	// without a following '(' token.
				139	return true;
				140	}
				141	return false;
				142	}
				143
				144	/// getPreExpArgument - Return the pre-expanded form of the specified
				145	/// argument.
				146	const std::vector<Token> &
Chris Lattner	f5809a7	2009-12-28 06:17:16 +0000	[diff] [blame]	147	MacroArgs::getPreExpArgument(unsigned Arg, const MacroInfo *MI,
				148	Preprocessor &PP) {
				149	assert(Arg < MI->getNumArgs() && "Invalid argument number!");
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	150
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	151	// If we have already computed this, return it.
Chris Lattner	f5809a7	2009-12-28 06:17:16 +0000	[diff] [blame]	152	if (PreExpArgTokens.size() < MI->getNumArgs())
				153	PreExpArgTokens.resize(MI->getNumArgs());
				154
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	155	std::vector<Token> &Result = PreExpArgTokens[Arg];
				156	if (!Result.empty()) return Result;
				157
				158	const Token *AT = getUnexpArgument(Arg);
				159	unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	160
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	161	// Otherwise, we have to pre-expand this argument, populating Result. To do
				162	// this, we set up a fake TokenLexer to lex from the unexpanded argument
				163	// list. With this installed, we lex expanded tokens until we hit the EOF
				164	// token at the end of the unexp list.
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	165	PP.EnterTokenStream(AT, NumToks, false /disable expand/,
Chris Lattner	6b88450	2008-03-10 06:06:04 +0000	[diff] [blame]	166	false /owns tokens/);
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	167
				168	// Lex all of the macro-expanded tokens into Result.
				169	do {
				170	Result.push_back(Token());
Chris Lattner	7c35122	2009-01-26 04:33:10 +0000	[diff] [blame]	171	Token &Tok = Result.back();
				172	PP.Lex(Tok);
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	173	} while (Result.back().isNot(tok::eof));
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	174
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	175	// Pop the token stream off the top of the stack. We know that the internal
				176	// pointer inside of it is to the "end" of the token stream, but the stack
				177	// will not otherwise be popped until the next token is lexed. The problem is
				178	// that the token may be lexed sometime after the vector of tokens itself is
				179	// destroyed, which would be badness.
				180	PP.RemoveTopOfLexerStack();
				181	return Result;
				182	}
				183
				184
				185	/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
				186	/// tokens into the literal string token that should be produced by the C #
				187	/// preprocessor operator. If Charify is true, then it should be turned into
				188	/// a character literal for the Microsoft charize (#@) extension.
				189	///
				190	Token MacroArgs::StringifyArgument(const Token *ArgToks,
Argyrios Kyrtzidis	b73377e	2011-07-07 03:40:34 +0000	[diff] [blame]	191	Preprocessor &PP, bool Charify,
Abramo Bagnara	a08529c	2011-10-03 18:39:03 +0000	[diff] [blame]	192	SourceLocation ExpansionLocStart,
				193	SourceLocation ExpansionLocEnd) {
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	194	Token Tok;
				195	Tok.startToken();
Chris Lattner	6633522	2009-12-23 19:15:27 +0000	[diff] [blame]	196	Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	197
				198	const Token *ArgTokStart = ArgToks;
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	199
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	200	// Stringify all the tokens.
Chris Lattner	c19e8a2	2009-01-05 23:04:18 +0000	[diff] [blame]	201	llvm::SmallString<128> Result;
				202	Result += "\"";
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	203
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	204	bool isFirst = true;
				205	for (; ArgToks->isNot(tok::eof); ++ArgToks) {
				206	const Token &Tok = *ArgToks;
				207	if (!isFirst && (Tok.hasLeadingSpace() \|\| Tok.isAtStartOfLine()))
				208	Result += ' ';
				209	isFirst = false;
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	210
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	211	// If this is a string or character constant, escape the token as specified
				212	// by 6.10.3.2p2.
				213	if (Tok.is(tok::string_literal) \|\| // "foo"
				214	Tok.is(tok::wide_string_literal) \|\| // L"foo"
Douglas Gregor	5cee119	2011-07-27 05:40:30 +0000	[diff] [blame]	215	Tok.is(tok::utf8_string_literal) \|\| // u8"foo"
				216	Tok.is(tok::utf16_string_literal) \|\| // u"foo"
				217	Tok.is(tok::utf32_string_literal) \|\| // U"foo"
				218	Tok.is(tok::char_constant) \|\| // 'x'
				219	Tok.is(tok::wide_char_constant) \|\| // L'x'.
				220	Tok.is(tok::utf16_char_constant) \|\| // u'x'.
				221	Tok.is(tok::utf32_char_constant)) { // U'x'.
Douglas Gregor	453091c	2010-03-16 22:30:13 +0000	[diff] [blame]	222	bool Invalid = false;
				223	std::string TokStr = PP.getSpelling(Tok, &Invalid);
				224	if (!Invalid) {
				225	std::string Str = Lexer::Stringify(TokStr);
				226	Result.append(Str.begin(), Str.end());
				227	}
Argyrios Kyrtzidis	8e85e85	2011-09-04 03:32:19 +0000	[diff] [blame]	228	} else if (Tok.is(tok::code_completion)) {
				229	PP.CodeCompleteNaturalLanguage();
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	230	} else {
Chris Lattner	c19e8a2	2009-01-05 23:04:18 +0000	[diff] [blame]	231	// Otherwise, just append the token. Do some gymnastics to get the token
				232	// in place and avoid copies where possible.
				233	unsigned CurStrLen = Result.size();
				234	Result.resize(CurStrLen+Tok.getLength());
				235	const char *BufPtr = &Result[CurStrLen];
Douglas Gregor	453091c	2010-03-16 22:30:13 +0000	[diff] [blame]	236	bool Invalid = false;
				237	unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	238
Douglas Gregor	453091c	2010-03-16 22:30:13 +0000	[diff] [blame]	239	if (!Invalid) {
				240	// If getSpelling returned a pointer to an already uniqued version of
				241	// the string instead of filling in BufPtr, memcpy it onto our string.
				242	if (BufPtr != &Result[CurStrLen])
				243	memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	244
Douglas Gregor	453091c	2010-03-16 22:30:13 +0000	[diff] [blame]	245	// If the token was dirty, the spelling may be shorter than the token.
				246	if (ActualTokLen != Tok.getLength())
				247	Result.resize(CurStrLen+ActualTokLen);
				248	}
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	249	}
				250	}
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	251
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	252	// If the last character of the string is a \, and if it isn't escaped, this
				253	// is an invalid string literal, diagnose it as specified in C99.
Chris Lattner	c19e8a2	2009-01-05 23:04:18 +0000	[diff] [blame]	254	if (Result.back() == '\\') {
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	255	// Count the number of consequtive \ characters. If even, then they are
				256	// just escaped backslashes, otherwise it's an error.
				257	unsigned FirstNonSlash = Result.size()-2;
				258	// Guaranteed to find the starting " if nothing else.
				259	while (Result[FirstNonSlash] == '\\')
				260	--FirstNonSlash;
				261	if ((Result.size()-1-FirstNonSlash) & 1) {
				262	// Diagnose errors for things like: #define F(X) #X / F(\)
				263	PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
Chris Lattner	c19e8a2	2009-01-05 23:04:18 +0000	[diff] [blame]	264	Result.pop_back(); // remove one of the \'s.
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	265	}
				266	}
				267	Result += '"';
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	268
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	269	// If this is the charify operation and the result is not a legal character
				270	// constant, diagnose it.
				271	if (Charify) {
				272	// First step, turn double quotes into single quotes:
				273	Result[0] = '\'';
				274	Result[Result.size()-1] = '\'';
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	275
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	276	// Check for bogus character.
				277	bool isBad = false;
Chris Lattner	c19e8a2	2009-01-05 23:04:18 +0000	[diff] [blame]	278	if (Result.size() == 3)
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	279	isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
Chris Lattner	c19e8a2	2009-01-05 23:04:18 +0000	[diff] [blame]	280	else
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	281	isBad = (Result.size() != 4 \|\| Result[1] != '\\'); // Not '\x'
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	282
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	283	if (isBad) {
				284	PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
				285	Result = "' '"; // Use something arbitrary, but legal.
				286	}
				287	}
Mike Stump	1eb4433	2009-09-09 15:08:12 +0000	[diff] [blame]	288
Abramo Bagnara	a08529c	2011-10-03 18:39:03 +0000	[diff] [blame]	289	PP.CreateString(&Result[0], Result.size(), Tok,
				290	ExpansionLocStart, ExpansionLocEnd);
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	291	return Tok;
				292	}
				293
				294	/// getStringifiedArgument - Compute, cache, and return the specified argument
				295	/// that has been 'stringified' as required by the # operator.
				296	const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
Argyrios Kyrtzidis	b73377e	2011-07-07 03:40:34 +0000	[diff] [blame]	297	Preprocessor &PP,
Abramo Bagnara	a08529c	2011-10-03 18:39:03 +0000	[diff] [blame]	298	SourceLocation ExpansionLocStart,
				299	SourceLocation ExpansionLocEnd) {
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	300	assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
				301	if (StringifiedArgs.empty()) {
				302	StringifiedArgs.resize(getNumArguments());
Chandler Carruth	75c4064	2011-04-28 08:19:45 +0000	[diff] [blame]	303	memset((void*)&StringifiedArgs[0], 0,
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	304	sizeof(StringifiedArgs[0])*getNumArguments());
				305	}
				306	if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
Argyrios Kyrtzidis	b73377e	2011-07-07 03:40:34 +0000	[diff] [blame]	307	StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP,
Abramo Bagnara	a08529c	2011-10-03 18:39:03 +0000	[diff] [blame]	308	/Charify=/false,
				309	ExpansionLocStart,
				310	ExpansionLocEnd);
Chris Lattner	e5c8ffe	2008-03-09 02:55:12 +0000	[diff] [blame]	311	return StringifiedArgs[ArgNo];
				312	}