blob: 59a75479ef482aa2df0d7727bd6c4a0d4b358e5c [file] [log] [blame]
Manuel Klimek24db0f02013-05-14 09:13:00 +00001//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Recursive parser implementation for the matcher expression grammar.
12///
13//===----------------------------------------------------------------------===//
14
15#include <string>
16#include <vector>
17
18#include "clang/ASTMatchers/Dynamic/Parser.h"
19#include "clang/ASTMatchers/Dynamic/Registry.h"
20#include "clang/Basic/CharInfo.h"
Peter Collingbourne00cba4f2013-11-23 01:13:16 +000021#include "llvm/ADT/Optional.h"
Manuel Klimek24db0f02013-05-14 09:13:00 +000022#include "llvm/ADT/Twine.h"
23
24namespace clang {
25namespace ast_matchers {
26namespace dynamic {
27
28/// \brief Simple structure to hold information for one token from the parser.
29struct Parser::TokenInfo {
30 /// \brief Different possible tokens.
31 enum TokenKind {
32 TK_Eof = 0,
33 TK_OpenParen = 1,
34 TK_CloseParen = 2,
35 TK_Comma = 3,
Samuel Benzaquen31edb512013-06-03 19:31:08 +000036 TK_Period = 4,
37 TK_Literal = 5,
38 TK_Ident = 6,
39 TK_InvalidChar = 7,
40 TK_Error = 8
Manuel Klimek24db0f02013-05-14 09:13:00 +000041 };
42
Samuel Benzaquen31edb512013-06-03 19:31:08 +000043 /// \brief Some known identifiers.
44 static const char* const ID_Bind;
45
Manuel Klimek24db0f02013-05-14 09:13:00 +000046 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
47
48 StringRef Text;
49 TokenKind Kind;
50 SourceRange Range;
51 VariantValue Value;
52};
53
Samuel Benzaquen31edb512013-06-03 19:31:08 +000054const char* const Parser::TokenInfo::ID_Bind = "bind";
55
Manuel Klimek24db0f02013-05-14 09:13:00 +000056/// \brief Simple tokenizer for the parser.
57class Parser::CodeTokenizer {
58public:
59 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
60 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
61 NextToken = getNextToken();
62 }
63
64 /// \brief Returns but doesn't consume the next token.
65 const TokenInfo &peekNextToken() const { return NextToken; }
66
67 /// \brief Consumes and returns the next token.
68 TokenInfo consumeNextToken() {
69 TokenInfo ThisToken = NextToken;
70 NextToken = getNextToken();
71 return ThisToken;
72 }
73
74 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
75
76private:
77 TokenInfo getNextToken() {
78 consumeWhitespace();
79 TokenInfo Result;
80 Result.Range.Start = currentLocation();
81
82 if (Code.empty()) {
83 Result.Kind = TokenInfo::TK_Eof;
84 Result.Text = "";
85 return Result;
86 }
87
88 switch (Code[0]) {
89 case ',':
90 Result.Kind = TokenInfo::TK_Comma;
91 Result.Text = Code.substr(0, 1);
92 Code = Code.drop_front();
93 break;
Samuel Benzaquen31edb512013-06-03 19:31:08 +000094 case '.':
95 Result.Kind = TokenInfo::TK_Period;
96 Result.Text = Code.substr(0, 1);
97 Code = Code.drop_front();
98 break;
Manuel Klimek24db0f02013-05-14 09:13:00 +000099 case '(':
100 Result.Kind = TokenInfo::TK_OpenParen;
101 Result.Text = Code.substr(0, 1);
102 Code = Code.drop_front();
103 break;
104 case ')':
105 Result.Kind = TokenInfo::TK_CloseParen;
106 Result.Text = Code.substr(0, 1);
107 Code = Code.drop_front();
108 break;
109
110 case '"':
111 case '\'':
112 // Parse a string literal.
113 consumeStringLiteral(&Result);
114 break;
115
Samuel Benzaquenc31b3522013-06-04 15:46:22 +0000116 case '0': case '1': case '2': case '3': case '4':
117 case '5': case '6': case '7': case '8': case '9':
118 // Parse an unsigned literal.
119 consumeUnsignedLiteral(&Result);
120 break;
121
Manuel Klimek24db0f02013-05-14 09:13:00 +0000122 default:
123 if (isAlphanumeric(Code[0])) {
124 // Parse an identifier
125 size_t TokenLength = 1;
126 while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
127 ++TokenLength;
128 Result.Kind = TokenInfo::TK_Ident;
129 Result.Text = Code.substr(0, TokenLength);
130 Code = Code.drop_front(TokenLength);
131 } else {
132 Result.Kind = TokenInfo::TK_InvalidChar;
133 Result.Text = Code.substr(0, 1);
134 Code = Code.drop_front(1);
135 }
136 break;
137 }
138
139 Result.Range.End = currentLocation();
140 return Result;
141 }
142
Samuel Benzaquenc31b3522013-06-04 15:46:22 +0000143 /// \brief Consume an unsigned literal.
144 void consumeUnsignedLiteral(TokenInfo *Result) {
145 unsigned Length = 1;
146 if (Code.size() > 1) {
147 // Consume the 'x' or 'b' radix modifier, if present.
148 switch (toLowercase(Code[1])) {
149 case 'x': case 'b': Length = 2;
150 }
151 }
152 while (Length < Code.size() && isHexDigit(Code[Length]))
153 ++Length;
154
155 Result->Text = Code.substr(0, Length);
156 Code = Code.drop_front(Length);
157
158 unsigned Value;
159 if (!Result->Text.getAsInteger(0, Value)) {
160 Result->Kind = TokenInfo::TK_Literal;
161 Result->Value = Value;
162 } else {
163 SourceRange Range;
164 Range.Start = Result->Range.Start;
165 Range.End = currentLocation();
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000166 Error->addError(Range, Error->ET_ParserUnsignedError) << Result->Text;
Samuel Benzaquenc31b3522013-06-04 15:46:22 +0000167 Result->Kind = TokenInfo::TK_Error;
168 }
169 }
170
Manuel Klimek24db0f02013-05-14 09:13:00 +0000171 /// \brief Consume a string literal.
172 ///
173 /// \c Code must be positioned at the start of the literal (the opening
174 /// quote). Consumed until it finds the same closing quote character.
175 void consumeStringLiteral(TokenInfo *Result) {
176 bool InEscape = false;
177 const char Marker = Code[0];
178 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
179 if (InEscape) {
180 InEscape = false;
181 continue;
182 }
183 if (Code[Length] == '\\') {
184 InEscape = true;
185 continue;
186 }
187 if (Code[Length] == Marker) {
188 Result->Kind = TokenInfo::TK_Literal;
189 Result->Text = Code.substr(0, Length + 1);
190 Result->Value = Code.substr(1, Length - 1).str();
191 Code = Code.drop_front(Length + 1);
192 return;
193 }
194 }
195
196 StringRef ErrorText = Code;
197 Code = Code.drop_front(Code.size());
198 SourceRange Range;
199 Range.Start = Result->Range.Start;
200 Range.End = currentLocation();
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000201 Error->addError(Range, Error->ET_ParserStringError) << ErrorText;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000202 Result->Kind = TokenInfo::TK_Error;
203 }
204
205 /// \brief Consume all leading whitespace from \c Code.
206 void consumeWhitespace() {
207 while (!Code.empty() && isWhitespace(Code[0])) {
208 if (Code[0] == '\n') {
209 ++Line;
210 StartOfLine = Code.drop_front();
211 }
212 Code = Code.drop_front();
213 }
214 }
215
216 SourceLocation currentLocation() {
217 SourceLocation Location;
218 Location.Line = Line;
219 Location.Column = Code.data() - StartOfLine.data() + 1;
220 return Location;
221 }
222
223 StringRef Code;
224 StringRef StartOfLine;
225 unsigned Line;
226 Diagnostics *Error;
227 TokenInfo NextToken;
228};
229
230Parser::Sema::~Sema() {}
231
232/// \brief Parse and validate a matcher expression.
233/// \return \c true on success, in which case \c Value has the matcher parsed.
234/// If the input is malformed, or some argument has an error, it
235/// returns \c false.
236bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
237 const TokenInfo NameToken = Tokenizer->consumeNextToken();
238 assert(NameToken.Kind == TokenInfo::TK_Ident);
239 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
240 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000241 Error->addError(OpenToken.Range, Error->ET_ParserNoOpenParen)
Manuel Klimek24db0f02013-05-14 09:13:00 +0000242 << OpenToken.Text;
243 return false;
244 }
245
Peter Collingbourne00cba4f2013-11-23 01:13:16 +0000246 llvm::Optional<MatcherCtor> Ctor =
247 S->lookupMatcherCtor(NameToken.Text, NameToken.Range, Error);
248
Manuel Klimek24db0f02013-05-14 09:13:00 +0000249 std::vector<ParserValue> Args;
250 TokenInfo EndToken;
251 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
252 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
253 // End of args.
254 EndToken = Tokenizer->consumeNextToken();
255 break;
256 }
257 if (Args.size() > 0) {
258 // We must find a , token to continue.
259 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
260 if (CommaToken.Kind != TokenInfo::TK_Comma) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000261 Error->addError(CommaToken.Range, Error->ET_ParserNoComma)
Manuel Klimek24db0f02013-05-14 09:13:00 +0000262 << CommaToken.Text;
263 return false;
264 }
265 }
266
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000267 Diagnostics::Context Ctx(Diagnostics::Context::MatcherArg, Error,
268 NameToken.Text, NameToken.Range, Args.size() + 1);
Manuel Klimek24db0f02013-05-14 09:13:00 +0000269 ParserValue ArgValue;
270 ArgValue.Text = Tokenizer->peekNextToken().Text;
271 ArgValue.Range = Tokenizer->peekNextToken().Range;
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000272 if (!parseExpressionImpl(&ArgValue.Value)) return false;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000273
274 Args.push_back(ArgValue);
275 }
276
277 if (EndToken.Kind == TokenInfo::TK_Eof) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000278 Error->addError(OpenToken.Range, Error->ET_ParserNoCloseParen);
Manuel Klimek24db0f02013-05-14 09:13:00 +0000279 return false;
280 }
281
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000282 std::string BindID;
283 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
284 // Parse .bind("foo")
285 Tokenizer->consumeNextToken(); // consume the period.
286 const TokenInfo BindToken = Tokenizer->consumeNextToken();
287 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
288 const TokenInfo IDToken = Tokenizer->consumeNextToken();
289 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
290
291 // TODO: We could use different error codes for each/some to be more
292 // explicit about the syntax error.
293 if (BindToken.Kind != TokenInfo::TK_Ident ||
294 BindToken.Text != TokenInfo::ID_Bind) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000295 Error->addError(BindToken.Range, Error->ET_ParserMalformedBindExpr);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000296 return false;
297 }
298 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000299 Error->addError(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000300 return false;
301 }
302 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000303 Error->addError(IDToken.Range, Error->ET_ParserMalformedBindExpr);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000304 return false;
305 }
306 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000307 Error->addError(CloseToken.Range, Error->ET_ParserMalformedBindExpr);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000308 return false;
309 }
310 BindID = IDToken.Value.getString();
311 }
312
Peter Collingbourne00cba4f2013-11-23 01:13:16 +0000313 if (!Ctor)
314 return false;
315
Manuel Klimek24db0f02013-05-14 09:13:00 +0000316 // Merge the start and end infos.
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000317 Diagnostics::Context Ctx(Diagnostics::Context::ConstructMatcher, Error,
318 NameToken.Text, NameToken.Range);
Manuel Klimek24db0f02013-05-14 09:13:00 +0000319 SourceRange MatcherRange = NameToken.Range;
320 MatcherRange.End = EndToken.Range.End;
Samuel Benzaquen0239b692013-08-13 14:54:51 +0000321 VariantMatcher Result = S->actOnMatcherExpression(
Peter Collingbourne00cba4f2013-11-23 01:13:16 +0000322 *Ctor, MatcherRange, BindID, Args, Error);
Samuel Benzaquen0239b692013-08-13 14:54:51 +0000323 if (Result.isNull()) return false;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000324
Samuel Benzaquenc6f2c9b2013-06-21 15:51:31 +0000325 *Value = Result;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000326 return true;
327}
328
329/// \brief Parse an <Expresssion>
330bool Parser::parseExpressionImpl(VariantValue *Value) {
331 switch (Tokenizer->nextTokenKind()) {
332 case TokenInfo::TK_Literal:
333 *Value = Tokenizer->consumeNextToken().Value;
334 return true;
335
336 case TokenInfo::TK_Ident:
337 return parseMatcherExpressionImpl(Value);
338
339 case TokenInfo::TK_Eof:
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000340 Error->addError(Tokenizer->consumeNextToken().Range,
341 Error->ET_ParserNoCode);
Manuel Klimek24db0f02013-05-14 09:13:00 +0000342 return false;
343
344 case TokenInfo::TK_Error:
345 // This error was already reported by the tokenizer.
346 return false;
347
348 case TokenInfo::TK_OpenParen:
349 case TokenInfo::TK_CloseParen:
350 case TokenInfo::TK_Comma:
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000351 case TokenInfo::TK_Period:
Manuel Klimek24db0f02013-05-14 09:13:00 +0000352 case TokenInfo::TK_InvalidChar:
353 const TokenInfo Token = Tokenizer->consumeNextToken();
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000354 Error->addError(Token.Range, Error->ET_ParserInvalidToken) << Token.Text;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000355 return false;
356 }
357
358 llvm_unreachable("Unknown token kind.");
359}
360
361Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
362 Diagnostics *Error)
363 : Tokenizer(Tokenizer), S(S), Error(Error) {}
364
365class RegistrySema : public Parser::Sema {
366public:
367 virtual ~RegistrySema() {}
Peter Collingbourne00cba4f2013-11-23 01:13:16 +0000368 llvm::Optional<MatcherCtor> lookupMatcherCtor(StringRef MatcherName,
369 const SourceRange &NameRange,
370 Diagnostics *Error) {
371 return Registry::lookupMatcherCtor(MatcherName, NameRange, Error);
372 }
373 VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
Samuel Benzaquen0239b692013-08-13 14:54:51 +0000374 const SourceRange &NameRange,
375 StringRef BindID,
376 ArrayRef<ParserValue> Args,
377 Diagnostics *Error) {
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000378 if (BindID.empty()) {
Peter Collingbourne00cba4f2013-11-23 01:13:16 +0000379 return Registry::constructMatcher(Ctor, NameRange, Args, Error);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000380 } else {
Peter Collingbourne00cba4f2013-11-23 01:13:16 +0000381 return Registry::constructBoundMatcher(Ctor, NameRange, BindID, Args,
382 Error);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000383 }
Manuel Klimek24db0f02013-05-14 09:13:00 +0000384 }
385};
386
387bool Parser::parseExpression(StringRef Code, VariantValue *Value,
388 Diagnostics *Error) {
389 RegistrySema S;
390 return parseExpression(Code, &S, Value, Error);
391}
392
393bool Parser::parseExpression(StringRef Code, Sema *S,
394 VariantValue *Value, Diagnostics *Error) {
395 CodeTokenizer Tokenizer(Code, Error);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000396 if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
397 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000398 Error->addError(Tokenizer.peekNextToken().Range,
399 Error->ET_ParserTrailingCode);
Samuel Benzaquen31edb512013-06-03 19:31:08 +0000400 return false;
401 }
402 return true;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000403}
404
Samuel Benzaquenf34ac3e2013-10-29 14:37:15 +0000405llvm::Optional<DynTypedMatcher>
406Parser::parseMatcherExpression(StringRef Code, Diagnostics *Error) {
Manuel Klimek24db0f02013-05-14 09:13:00 +0000407 RegistrySema S;
408 return parseMatcherExpression(Code, &S, Error);
409}
410
Samuel Benzaquenf34ac3e2013-10-29 14:37:15 +0000411llvm::Optional<DynTypedMatcher>
412Parser::parseMatcherExpression(StringRef Code, Parser::Sema *S,
413 Diagnostics *Error) {
Manuel Klimek24db0f02013-05-14 09:13:00 +0000414 VariantValue Value;
415 if (!parseExpression(Code, S, &Value, Error))
Samuel Benzaquenf34ac3e2013-10-29 14:37:15 +0000416 return llvm::Optional<DynTypedMatcher>();
Samuel Benzaquen0239b692013-08-13 14:54:51 +0000417 if (!Value.isMatcher()) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000418 Error->addError(SourceRange(), Error->ET_ParserNotAMatcher);
Samuel Benzaquenf34ac3e2013-10-29 14:37:15 +0000419 return llvm::Optional<DynTypedMatcher>();
Manuel Klimek24db0f02013-05-14 09:13:00 +0000420 }
Samuel Benzaquenf34ac3e2013-10-29 14:37:15 +0000421 llvm::Optional<DynTypedMatcher> Result =
422 Value.getMatcher().getSingleMatcher();
423 if (!Result.hasValue()) {
Samuel Benzaquena37bb8c2013-07-18 19:47:59 +0000424 Error->addError(SourceRange(), Error->ET_ParserOverloadedType)
Samuel Benzaquenc6f2c9b2013-06-21 15:51:31 +0000425 << Value.getTypeAsString();
Samuel Benzaquenc6f2c9b2013-06-21 15:51:31 +0000426 }
Samuel Benzaquenf34ac3e2013-10-29 14:37:15 +0000427 return Result;
Manuel Klimek24db0f02013-05-14 09:13:00 +0000428}
429
430} // namespace dynamic
431} // namespace ast_matchers
432} // namespace clang