Blame - lib/ASTMatchers/Dynamic/Parser.cpp - fp2-dev/platform/external/clang

blob: fc09a30ddd9928ae3b4bb7b9bc6f33ab3b1bae2f [file] [log] [blame]

Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	1	//===--- Parser.cpp - Matcher expression parser ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// \file
				11	/// \brief Recursive parser implementation for the matcher expression grammar.
				12	///
				13	//===----------------------------------------------------------------------===//
				14
				15	#include <string>
				16	#include <vector>
				17
				18	#include "clang/ASTMatchers/Dynamic/Parser.h"
				19	#include "clang/ASTMatchers/Dynamic/Registry.h"
				20	#include "clang/Basic/CharInfo.h"
				21	#include "llvm/ADT/Twine.h"
				22
				23	namespace clang {
				24	namespace ast_matchers {
				25	namespace dynamic {
				26
				27	/// \brief Simple structure to hold information for one token from the parser.
				28	struct Parser::TokenInfo {
				29	/// \brief Different possible tokens.
				30	enum TokenKind {
				31	TK_Eof = 0,
				32	TK_OpenParen = 1,
				33	TK_CloseParen = 2,
				34	TK_Comma = 3,
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	35	TK_Period = 4,
				36	TK_Literal = 5,
				37	TK_Ident = 6,
				38	TK_InvalidChar = 7,
				39	TK_Error = 8
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	40	};
				41
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	42	/// \brief Some known identifiers.
				43	static const char* const ID_Bind;
				44
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	45	TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
				46
				47	StringRef Text;
				48	TokenKind Kind;
				49	SourceRange Range;
				50	VariantValue Value;
				51	};
				52
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	53	const char* const Parser::TokenInfo::ID_Bind = "bind";
				54
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	55	/// \brief Simple tokenizer for the parser.
				56	class Parser::CodeTokenizer {
				57	public:
				58	explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
				59	: Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
				60	NextToken = getNextToken();
				61	}
				62
				63	/// \brief Returns but doesn't consume the next token.
				64	const TokenInfo &peekNextToken() const { return NextToken; }
				65
				66	/// \brief Consumes and returns the next token.
				67	TokenInfo consumeNextToken() {
				68	TokenInfo ThisToken = NextToken;
				69	NextToken = getNextToken();
				70	return ThisToken;
				71	}
				72
				73	TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
				74
				75	private:
				76	TokenInfo getNextToken() {
				77	consumeWhitespace();
				78	TokenInfo Result;
				79	Result.Range.Start = currentLocation();
				80
				81	if (Code.empty()) {
				82	Result.Kind = TokenInfo::TK_Eof;
				83	Result.Text = "";
				84	return Result;
				85	}
				86
				87	switch (Code[0]) {
				88	case ',':
				89	Result.Kind = TokenInfo::TK_Comma;
				90	Result.Text = Code.substr(0, 1);
				91	Code = Code.drop_front();
				92	break;
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	93	case '.':
				94	Result.Kind = TokenInfo::TK_Period;
				95	Result.Text = Code.substr(0, 1);
				96	Code = Code.drop_front();
				97	break;
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	98	case '(':
				99	Result.Kind = TokenInfo::TK_OpenParen;
				100	Result.Text = Code.substr(0, 1);
				101	Code = Code.drop_front();
				102	break;
				103	case ')':
				104	Result.Kind = TokenInfo::TK_CloseParen;
				105	Result.Text = Code.substr(0, 1);
				106	Code = Code.drop_front();
				107	break;
				108
				109	case '"':
				110	case '\'':
				111	// Parse a string literal.
				112	consumeStringLiteral(&Result);
				113	break;
				114
Samuel Benzaquen	7a337af	2013-06-04 15:46:22 +0000	[diff] [blame]	115	case '0': case '1': case '2': case '3': case '4':
				116	case '5': case '6': case '7': case '8': case '9':
				117	// Parse an unsigned literal.
				118	consumeUnsignedLiteral(&Result);
				119	break;
				120
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	121	default:
				122	if (isAlphanumeric(Code[0])) {
				123	// Parse an identifier
				124	size_t TokenLength = 1;
				125	while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
				126	++TokenLength;
				127	Result.Kind = TokenInfo::TK_Ident;
				128	Result.Text = Code.substr(0, TokenLength);
				129	Code = Code.drop_front(TokenLength);
				130	} else {
				131	Result.Kind = TokenInfo::TK_InvalidChar;
				132	Result.Text = Code.substr(0, 1);
				133	Code = Code.drop_front(1);
				134	}
				135	break;
				136	}
				137
				138	Result.Range.End = currentLocation();
				139	return Result;
				140	}
				141
Samuel Benzaquen	7a337af	2013-06-04 15:46:22 +0000	[diff] [blame]	142	/// \brief Consume an unsigned literal.
				143	void consumeUnsignedLiteral(TokenInfo *Result) {
				144	unsigned Length = 1;
				145	if (Code.size() > 1) {
				146	// Consume the 'x' or 'b' radix modifier, if present.
				147	switch (toLowercase(Code[1])) {
				148	case 'x': case 'b': Length = 2;
				149	}
				150	}
				151	while (Length < Code.size() && isHexDigit(Code[Length]))
				152	++Length;
				153
				154	Result->Text = Code.substr(0, Length);
				155	Code = Code.drop_front(Length);
				156
				157	unsigned Value;
				158	if (!Result->Text.getAsInteger(0, Value)) {
				159	Result->Kind = TokenInfo::TK_Literal;
				160	Result->Value = Value;
				161	} else {
				162	SourceRange Range;
				163	Range.Start = Result->Range.Start;
				164	Range.End = currentLocation();
				165	Error->pushErrorFrame(Range, Error->ET_ParserUnsignedError)
				166	<< Result->Text;
				167	Result->Kind = TokenInfo::TK_Error;
				168	}
				169	}
				170
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	171	/// \brief Consume a string literal.
				172	///
				173	/// \c Code must be positioned at the start of the literal (the opening
				174	/// quote). Consumed until it finds the same closing quote character.
				175	void consumeStringLiteral(TokenInfo *Result) {
				176	bool InEscape = false;
				177	const char Marker = Code[0];
				178	for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
				179	if (InEscape) {
				180	InEscape = false;
				181	continue;
				182	}
				183	if (Code[Length] == '\\') {
				184	InEscape = true;
				185	continue;
				186	}
				187	if (Code[Length] == Marker) {
				188	Result->Kind = TokenInfo::TK_Literal;
				189	Result->Text = Code.substr(0, Length + 1);
				190	Result->Value = Code.substr(1, Length - 1).str();
				191	Code = Code.drop_front(Length + 1);
				192	return;
				193	}
				194	}
				195
				196	StringRef ErrorText = Code;
				197	Code = Code.drop_front(Code.size());
				198	SourceRange Range;
				199	Range.Start = Result->Range.Start;
				200	Range.End = currentLocation();
				201	Error->pushErrorFrame(Range, Error->ET_ParserStringError)
				202	<< ErrorText;
				203	Result->Kind = TokenInfo::TK_Error;
				204	}
				205
				206	/// \brief Consume all leading whitespace from \c Code.
				207	void consumeWhitespace() {
				208	while (!Code.empty() && isWhitespace(Code[0])) {
				209	if (Code[0] == '\n') {
				210	++Line;
				211	StartOfLine = Code.drop_front();
				212	}
				213	Code = Code.drop_front();
				214	}
				215	}
				216
				217	SourceLocation currentLocation() {
				218	SourceLocation Location;
				219	Location.Line = Line;
				220	Location.Column = Code.data() - StartOfLine.data() + 1;
				221	return Location;
				222	}
				223
				224	StringRef Code;
				225	StringRef StartOfLine;
				226	unsigned Line;
				227	Diagnostics *Error;
				228	TokenInfo NextToken;
				229	};
				230
				231	Parser::Sema::~Sema() {}
				232
				233	/// \brief Parse and validate a matcher expression.
				234	/// \return \c true on success, in which case \c Value has the matcher parsed.
				235	/// If the input is malformed, or some argument has an error, it
				236	/// returns \c false.
				237	bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
				238	const TokenInfo NameToken = Tokenizer->consumeNextToken();
				239	assert(NameToken.Kind == TokenInfo::TK_Ident);
				240	const TokenInfo OpenToken = Tokenizer->consumeNextToken();
				241	if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
				242	Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoOpenParen)
				243	<< OpenToken.Text;
				244	return false;
				245	}
				246
				247	std::vector<ParserValue> Args;
				248	TokenInfo EndToken;
				249	while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
				250	if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
				251	// End of args.
				252	EndToken = Tokenizer->consumeNextToken();
				253	break;
				254	}
				255	if (Args.size() > 0) {
				256	// We must find a , token to continue.
				257	const TokenInfo CommaToken = Tokenizer->consumeNextToken();
				258	if (CommaToken.Kind != TokenInfo::TK_Comma) {
				259	Error->pushErrorFrame(CommaToken.Range, Error->ET_ParserNoComma)
				260	<< CommaToken.Text;
				261	return false;
				262	}
				263	}
				264
				265	ParserValue ArgValue;
				266	ArgValue.Text = Tokenizer->peekNextToken().Text;
				267	ArgValue.Range = Tokenizer->peekNextToken().Range;
				268	if (!parseExpressionImpl(&ArgValue.Value)) {
				269	Error->pushErrorFrame(NameToken.Range,
				270	Error->ET_ParserMatcherArgFailure)
				271	<< (Args.size() + 1) << NameToken.Text;
				272	return false;
				273	}
				274
				275	Args.push_back(ArgValue);
				276	}
				277
				278	if (EndToken.Kind == TokenInfo::TK_Eof) {
				279	Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoCloseParen);
				280	return false;
				281	}
				282
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	283	std::string BindID;
				284	if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
				285	// Parse .bind("foo")
				286	Tokenizer->consumeNextToken(); // consume the period.
				287	const TokenInfo BindToken = Tokenizer->consumeNextToken();
				288	const TokenInfo OpenToken = Tokenizer->consumeNextToken();
				289	const TokenInfo IDToken = Tokenizer->consumeNextToken();
				290	const TokenInfo CloseToken = Tokenizer->consumeNextToken();
				291
				292	// TODO: We could use different error codes for each/some to be more
				293	// explicit about the syntax error.
				294	if (BindToken.Kind != TokenInfo::TK_Ident \|\|
				295	BindToken.Text != TokenInfo::ID_Bind) {
				296	Error->pushErrorFrame(BindToken.Range, Error->ET_ParserMalformedBindExpr);
				297	return false;
				298	}
				299	if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
				300	Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
				301	return false;
				302	}
				303	if (IDToken.Kind != TokenInfo::TK_Literal \|\| !IDToken.Value.isString()) {
				304	Error->pushErrorFrame(IDToken.Range, Error->ET_ParserMalformedBindExpr);
				305	return false;
				306	}
				307	if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
				308	Error->pushErrorFrame(CloseToken.Range,
				309	Error->ET_ParserMalformedBindExpr);
				310	return false;
				311	}
				312	BindID = IDToken.Value.getString();
				313	}
				314
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	315	// Merge the start and end infos.
				316	SourceRange MatcherRange = NameToken.Range;
				317	MatcherRange.End = EndToken.Range.End;
Samuel Benzaquen	ef7eb02	2013-06-21 15:51:31 +0000	[diff] [blame^]	318	MatcherList Result = S->actOnMatcherExpression(
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	319	NameToken.Text, MatcherRange, BindID, Args, Error);
Samuel Benzaquen	ef7eb02	2013-06-21 15:51:31 +0000	[diff] [blame^]	320	if (Result.empty()) {
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	321	Error->pushErrorFrame(NameToken.Range, Error->ET_ParserMatcherFailure)
				322	<< NameToken.Text;
				323	return false;
				324	}
				325
Samuel Benzaquen	ef7eb02	2013-06-21 15:51:31 +0000	[diff] [blame^]	326	*Value = Result;
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	327	return true;
				328	}
				329
				330	/// \brief Parse an <Expresssion>
				331	bool Parser::parseExpressionImpl(VariantValue *Value) {
				332	switch (Tokenizer->nextTokenKind()) {
				333	case TokenInfo::TK_Literal:
				334	*Value = Tokenizer->consumeNextToken().Value;
				335	return true;
				336
				337	case TokenInfo::TK_Ident:
				338	return parseMatcherExpressionImpl(Value);
				339
				340	case TokenInfo::TK_Eof:
				341	Error->pushErrorFrame(Tokenizer->consumeNextToken().Range,
				342	Error->ET_ParserNoCode);
				343	return false;
				344
				345	case TokenInfo::TK_Error:
				346	// This error was already reported by the tokenizer.
				347	return false;
				348
				349	case TokenInfo::TK_OpenParen:
				350	case TokenInfo::TK_CloseParen:
				351	case TokenInfo::TK_Comma:
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	352	case TokenInfo::TK_Period:
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	353	case TokenInfo::TK_InvalidChar:
				354	const TokenInfo Token = Tokenizer->consumeNextToken();
				355	Error->pushErrorFrame(Token.Range, Error->ET_ParserInvalidToken)
				356	<< Token.Text;
				357	return false;
				358	}
				359
				360	llvm_unreachable("Unknown token kind.");
				361	}
				362
				363	Parser::Parser(CodeTokenizer Tokenizer, Sema S,
				364	Diagnostics *Error)
				365	: Tokenizer(Tokenizer), S(S), Error(Error) {}
				366
				367	class RegistrySema : public Parser::Sema {
				368	public:
				369	virtual ~RegistrySema() {}
Samuel Benzaquen	ef7eb02	2013-06-21 15:51:31 +0000	[diff] [blame^]	370	MatcherList actOnMatcherExpression(StringRef MatcherName,
				371	const SourceRange &NameRange,
				372	StringRef BindID,
				373	ArrayRef<ParserValue> Args,
				374	Diagnostics *Error) {
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	375	if (BindID.empty()) {
				376	return Registry::constructMatcher(MatcherName, NameRange, Args, Error);
				377	} else {
				378	return Registry::constructBoundMatcher(MatcherName, NameRange, BindID,
				379	Args, Error);
				380	}
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	381	}
				382	};
				383
				384	bool Parser::parseExpression(StringRef Code, VariantValue *Value,
				385	Diagnostics *Error) {
				386	RegistrySema S;
				387	return parseExpression(Code, &S, Value, Error);
				388	}
				389
				390	bool Parser::parseExpression(StringRef Code, Sema *S,
				391	VariantValue Value, Diagnostics Error) {
				392	CodeTokenizer Tokenizer(Code, Error);
Samuel Benzaquen	4f37d92	2013-06-03 19:31:08 +0000	[diff] [blame]	393	if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
				394	if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
				395	Error->pushErrorFrame(Tokenizer.peekNextToken().Range,
				396	Error->ET_ParserTrailingCode);
				397	return false;
				398	}
				399	return true;
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	400	}
				401
				402	DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
				403	Diagnostics *Error) {
				404	RegistrySema S;
				405	return parseMatcherExpression(Code, &S, Error);
				406	}
				407
				408	DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
				409	Parser::Sema *S,
				410	Diagnostics *Error) {
				411	VariantValue Value;
				412	if (!parseExpression(Code, S, &Value, Error))
				413	return NULL;
Samuel Benzaquen	ef7eb02	2013-06-21 15:51:31 +0000	[diff] [blame^]	414	if (!Value.isMatchers()) {
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	415	Error->pushErrorFrame(SourceRange(), Error->ET_ParserNotAMatcher);
				416	return NULL;
				417	}
Samuel Benzaquen	ef7eb02	2013-06-21 15:51:31 +0000	[diff] [blame^]	418	if (Value.getMatchers().matchers().size() != 1) {
				419	Error->pushErrorFrame(SourceRange(), Error->ET_ParserOverloadedType)
				420	<< Value.getTypeAsString();
				421	return NULL;
				422	}
				423	return Value.getMatchers().matchers()[0]->clone();
Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	424	}
				425
				426	} // namespace dynamic
				427	} // namespace ast_matchers
				428	} // namespace clang