Blame - lib/ASTMatchers/Dynamic/Parser.cpp - platform/external/clang

blob: 1678820da0156d5ec375137d3cf348af18650ed7 [file] [log] [blame]

Manuel Klimek	f7f295f	2013-05-14 09:13:00 +0000	[diff] [blame]	1	//===--- Parser.cpp - Matcher expression parser ------ C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	///
				10	/// \file
				11	/// \brief Recursive parser implementation for the matcher expression grammar.
				12	///
				13	//===----------------------------------------------------------------------===//
				14
				15	#include <string>
				16	#include <vector>
				17
				18	#include "clang/ASTMatchers/Dynamic/Parser.h"
				19	#include "clang/ASTMatchers/Dynamic/Registry.h"
				20	#include "clang/Basic/CharInfo.h"
				21	#include "llvm/ADT/Twine.h"
				22
				23	namespace clang {
				24	namespace ast_matchers {
				25	namespace dynamic {
				26
				27	/// \brief Simple structure to hold information for one token from the parser.
				28	struct Parser::TokenInfo {
				29	/// \brief Different possible tokens.
				30	enum TokenKind {
				31	TK_Eof = 0,
				32	TK_OpenParen = 1,
				33	TK_CloseParen = 2,
				34	TK_Comma = 3,
				35	TK_Literal = 4,
				36	TK_Ident = 5,
				37	TK_InvalidChar = 6,
				38	TK_Error = 7
				39	};
				40
				41	TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
				42
				43	StringRef Text;
				44	TokenKind Kind;
				45	SourceRange Range;
				46	VariantValue Value;
				47	};
				48
				49	/// \brief Simple tokenizer for the parser.
				50	class Parser::CodeTokenizer {
				51	public:
				52	explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
				53	: Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
				54	NextToken = getNextToken();
				55	}
				56
				57	/// \brief Returns but doesn't consume the next token.
				58	const TokenInfo &peekNextToken() const { return NextToken; }
				59
				60	/// \brief Consumes and returns the next token.
				61	TokenInfo consumeNextToken() {
				62	TokenInfo ThisToken = NextToken;
				63	NextToken = getNextToken();
				64	return ThisToken;
				65	}
				66
				67	TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
				68
				69	private:
				70	TokenInfo getNextToken() {
				71	consumeWhitespace();
				72	TokenInfo Result;
				73	Result.Range.Start = currentLocation();
				74
				75	if (Code.empty()) {
				76	Result.Kind = TokenInfo::TK_Eof;
				77	Result.Text = "";
				78	return Result;
				79	}
				80
				81	switch (Code[0]) {
				82	case ',':
				83	Result.Kind = TokenInfo::TK_Comma;
				84	Result.Text = Code.substr(0, 1);
				85	Code = Code.drop_front();
				86	break;
				87	case '(':
				88	Result.Kind = TokenInfo::TK_OpenParen;
				89	Result.Text = Code.substr(0, 1);
				90	Code = Code.drop_front();
				91	break;
				92	case ')':
				93	Result.Kind = TokenInfo::TK_CloseParen;
				94	Result.Text = Code.substr(0, 1);
				95	Code = Code.drop_front();
				96	break;
				97
				98	case '"':
				99	case '\'':
				100	// Parse a string literal.
				101	consumeStringLiteral(&Result);
				102	break;
				103
				104	default:
				105	if (isAlphanumeric(Code[0])) {
				106	// Parse an identifier
				107	size_t TokenLength = 1;
				108	while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
				109	++TokenLength;
				110	Result.Kind = TokenInfo::TK_Ident;
				111	Result.Text = Code.substr(0, TokenLength);
				112	Code = Code.drop_front(TokenLength);
				113	} else {
				114	Result.Kind = TokenInfo::TK_InvalidChar;
				115	Result.Text = Code.substr(0, 1);
				116	Code = Code.drop_front(1);
				117	}
				118	break;
				119	}
				120
				121	Result.Range.End = currentLocation();
				122	return Result;
				123	}
				124
				125	/// \brief Consume a string literal.
				126	///
				127	/// \c Code must be positioned at the start of the literal (the opening
				128	/// quote). Consumed until it finds the same closing quote character.
				129	void consumeStringLiteral(TokenInfo *Result) {
				130	bool InEscape = false;
				131	const char Marker = Code[0];
				132	for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
				133	if (InEscape) {
				134	InEscape = false;
				135	continue;
				136	}
				137	if (Code[Length] == '\\') {
				138	InEscape = true;
				139	continue;
				140	}
				141	if (Code[Length] == Marker) {
				142	Result->Kind = TokenInfo::TK_Literal;
				143	Result->Text = Code.substr(0, Length + 1);
				144	Result->Value = Code.substr(1, Length - 1).str();
				145	Code = Code.drop_front(Length + 1);
				146	return;
				147	}
				148	}
				149
				150	StringRef ErrorText = Code;
				151	Code = Code.drop_front(Code.size());
				152	SourceRange Range;
				153	Range.Start = Result->Range.Start;
				154	Range.End = currentLocation();
				155	Error->pushErrorFrame(Range, Error->ET_ParserStringError)
				156	<< ErrorText;
				157	Result->Kind = TokenInfo::TK_Error;
				158	}
				159
				160	/// \brief Consume all leading whitespace from \c Code.
				161	void consumeWhitespace() {
				162	while (!Code.empty() && isWhitespace(Code[0])) {
				163	if (Code[0] == '\n') {
				164	++Line;
				165	StartOfLine = Code.drop_front();
				166	}
				167	Code = Code.drop_front();
				168	}
				169	}
				170
				171	SourceLocation currentLocation() {
				172	SourceLocation Location;
				173	Location.Line = Line;
				174	Location.Column = Code.data() - StartOfLine.data() + 1;
				175	return Location;
				176	}
				177
				178	StringRef Code;
				179	StringRef StartOfLine;
				180	unsigned Line;
				181	Diagnostics *Error;
				182	TokenInfo NextToken;
				183	};
				184
				185	Parser::Sema::~Sema() {}
				186
				187	/// \brief Parse and validate a matcher expression.
				188	/// \return \c true on success, in which case \c Value has the matcher parsed.
				189	/// If the input is malformed, or some argument has an error, it
				190	/// returns \c false.
				191	bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
				192	const TokenInfo NameToken = Tokenizer->consumeNextToken();
				193	assert(NameToken.Kind == TokenInfo::TK_Ident);
				194	const TokenInfo OpenToken = Tokenizer->consumeNextToken();
				195	if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
				196	Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoOpenParen)
				197	<< OpenToken.Text;
				198	return false;
				199	}
				200
				201	std::vector<ParserValue> Args;
				202	TokenInfo EndToken;
				203	while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
				204	if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
				205	// End of args.
				206	EndToken = Tokenizer->consumeNextToken();
				207	break;
				208	}
				209	if (Args.size() > 0) {
				210	// We must find a , token to continue.
				211	const TokenInfo CommaToken = Tokenizer->consumeNextToken();
				212	if (CommaToken.Kind != TokenInfo::TK_Comma) {
				213	Error->pushErrorFrame(CommaToken.Range, Error->ET_ParserNoComma)
				214	<< CommaToken.Text;
				215	return false;
				216	}
				217	}
				218
				219	ParserValue ArgValue;
				220	ArgValue.Text = Tokenizer->peekNextToken().Text;
				221	ArgValue.Range = Tokenizer->peekNextToken().Range;
				222	if (!parseExpressionImpl(&ArgValue.Value)) {
				223	Error->pushErrorFrame(NameToken.Range,
				224	Error->ET_ParserMatcherArgFailure)
				225	<< (Args.size() + 1) << NameToken.Text;
				226	return false;
				227	}
				228
				229	Args.push_back(ArgValue);
				230	}
				231
				232	if (EndToken.Kind == TokenInfo::TK_Eof) {
				233	Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoCloseParen);
				234	return false;
				235	}
				236
				237	// Merge the start and end infos.
				238	SourceRange MatcherRange = NameToken.Range;
				239	MatcherRange.End = EndToken.Range.End;
				240	DynTypedMatcher *Result =
				241	S->actOnMatcherExpression(NameToken.Text, MatcherRange, Args, Error);
				242	if (Result == NULL) {
				243	Error->pushErrorFrame(NameToken.Range, Error->ET_ParserMatcherFailure)
				244	<< NameToken.Text;
				245	return false;
				246	}
				247
				248	Value->takeMatcher(Result);
				249	return true;
				250	}
				251
				252	/// \brief Parse an <Expresssion>
				253	bool Parser::parseExpressionImpl(VariantValue *Value) {
				254	switch (Tokenizer->nextTokenKind()) {
				255	case TokenInfo::TK_Literal:
				256	*Value = Tokenizer->consumeNextToken().Value;
				257	return true;
				258
				259	case TokenInfo::TK_Ident:
				260	return parseMatcherExpressionImpl(Value);
				261
				262	case TokenInfo::TK_Eof:
				263	Error->pushErrorFrame(Tokenizer->consumeNextToken().Range,
				264	Error->ET_ParserNoCode);
				265	return false;
				266
				267	case TokenInfo::TK_Error:
				268	// This error was already reported by the tokenizer.
				269	return false;
				270
				271	case TokenInfo::TK_OpenParen:
				272	case TokenInfo::TK_CloseParen:
				273	case TokenInfo::TK_Comma:
				274	case TokenInfo::TK_InvalidChar:
				275	const TokenInfo Token = Tokenizer->consumeNextToken();
				276	Error->pushErrorFrame(Token.Range, Error->ET_ParserInvalidToken)
				277	<< Token.Text;
				278	return false;
				279	}
				280
				281	llvm_unreachable("Unknown token kind.");
				282	}
				283
				284	Parser::Parser(CodeTokenizer Tokenizer, Sema S,
				285	Diagnostics *Error)
				286	: Tokenizer(Tokenizer), S(S), Error(Error) {}
				287
				288	class RegistrySema : public Parser::Sema {
				289	public:
				290	virtual ~RegistrySema() {}
				291	DynTypedMatcher *actOnMatcherExpression(StringRef MatcherName,
				292	const SourceRange &NameRange,
				293	ArrayRef<ParserValue> Args,
				294	Diagnostics *Error) {
				295	return Registry::constructMatcher(MatcherName, NameRange, Args, Error);
				296	}
				297	};
				298
				299	bool Parser::parseExpression(StringRef Code, VariantValue *Value,
				300	Diagnostics *Error) {
				301	RegistrySema S;
				302	return parseExpression(Code, &S, Value, Error);
				303	}
				304
				305	bool Parser::parseExpression(StringRef Code, Sema *S,
				306	VariantValue Value, Diagnostics Error) {
				307	CodeTokenizer Tokenizer(Code, Error);
				308	return Parser(&Tokenizer, S, Error).parseExpressionImpl(Value);
				309	}
				310
				311	DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
				312	Diagnostics *Error) {
				313	RegistrySema S;
				314	return parseMatcherExpression(Code, &S, Error);
				315	}
				316
				317	DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
				318	Parser::Sema *S,
				319	Diagnostics *Error) {
				320	VariantValue Value;
				321	if (!parseExpression(Code, S, &Value, Error))
				322	return NULL;
				323	if (!Value.isMatcher()) {
				324	Error->pushErrorFrame(SourceRange(), Error->ET_ParserNotAMatcher);
				325	return NULL;
				326	}
				327	return Value.getMatcher().clone();
				328	}
				329
				330	} // namespace dynamic
				331	} // namespace ast_matchers
				332	} // namespace clang