blob: 1ed40f3050fed2bf15da1ec94fa724ca75a06ceb [file] [log] [blame]
Manuel Klimekf7f295f2013-05-14 09:13:00 +00001//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Recursive parser implementation for the matcher expression grammar.
12///
13//===----------------------------------------------------------------------===//
14
15#include <string>
16#include <vector>
17
18#include "clang/ASTMatchers/Dynamic/Parser.h"
19#include "clang/ASTMatchers/Dynamic/Registry.h"
20#include "clang/Basic/CharInfo.h"
21#include "llvm/ADT/Twine.h"
22
23namespace clang {
24namespace ast_matchers {
25namespace dynamic {
26
27/// \brief Simple structure to hold information for one token from the parser.
28struct Parser::TokenInfo {
29 /// \brief Different possible tokens.
30 enum TokenKind {
31 TK_Eof = 0,
32 TK_OpenParen = 1,
33 TK_CloseParen = 2,
34 TK_Comma = 3,
Samuel Benzaquen4f37d922013-06-03 19:31:08 +000035 TK_Period = 4,
36 TK_Literal = 5,
37 TK_Ident = 6,
38 TK_InvalidChar = 7,
39 TK_Error = 8
Manuel Klimekf7f295f2013-05-14 09:13:00 +000040 };
41
Samuel Benzaquen4f37d922013-06-03 19:31:08 +000042 /// \brief Some known identifiers.
43 static const char* const ID_Bind;
44
Manuel Klimekf7f295f2013-05-14 09:13:00 +000045 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
46
47 StringRef Text;
48 TokenKind Kind;
49 SourceRange Range;
50 VariantValue Value;
51};
52
Samuel Benzaquen4f37d922013-06-03 19:31:08 +000053const char* const Parser::TokenInfo::ID_Bind = "bind";
54
Manuel Klimekf7f295f2013-05-14 09:13:00 +000055/// \brief Simple tokenizer for the parser.
56class Parser::CodeTokenizer {
57public:
58 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
59 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
60 NextToken = getNextToken();
61 }
62
63 /// \brief Returns but doesn't consume the next token.
64 const TokenInfo &peekNextToken() const { return NextToken; }
65
66 /// \brief Consumes and returns the next token.
67 TokenInfo consumeNextToken() {
68 TokenInfo ThisToken = NextToken;
69 NextToken = getNextToken();
70 return ThisToken;
71 }
72
73 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
74
75private:
76 TokenInfo getNextToken() {
77 consumeWhitespace();
78 TokenInfo Result;
79 Result.Range.Start = currentLocation();
80
81 if (Code.empty()) {
82 Result.Kind = TokenInfo::TK_Eof;
83 Result.Text = "";
84 return Result;
85 }
86
87 switch (Code[0]) {
88 case ',':
89 Result.Kind = TokenInfo::TK_Comma;
90 Result.Text = Code.substr(0, 1);
91 Code = Code.drop_front();
92 break;
Samuel Benzaquen4f37d922013-06-03 19:31:08 +000093 case '.':
94 Result.Kind = TokenInfo::TK_Period;
95 Result.Text = Code.substr(0, 1);
96 Code = Code.drop_front();
97 break;
Manuel Klimekf7f295f2013-05-14 09:13:00 +000098 case '(':
99 Result.Kind = TokenInfo::TK_OpenParen;
100 Result.Text = Code.substr(0, 1);
101 Code = Code.drop_front();
102 break;
103 case ')':
104 Result.Kind = TokenInfo::TK_CloseParen;
105 Result.Text = Code.substr(0, 1);
106 Code = Code.drop_front();
107 break;
108
109 case '"':
110 case '\'':
111 // Parse a string literal.
112 consumeStringLiteral(&Result);
113 break;
114
115 default:
116 if (isAlphanumeric(Code[0])) {
117 // Parse an identifier
118 size_t TokenLength = 1;
119 while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
120 ++TokenLength;
121 Result.Kind = TokenInfo::TK_Ident;
122 Result.Text = Code.substr(0, TokenLength);
123 Code = Code.drop_front(TokenLength);
124 } else {
125 Result.Kind = TokenInfo::TK_InvalidChar;
126 Result.Text = Code.substr(0, 1);
127 Code = Code.drop_front(1);
128 }
129 break;
130 }
131
132 Result.Range.End = currentLocation();
133 return Result;
134 }
135
136 /// \brief Consume a string literal.
137 ///
138 /// \c Code must be positioned at the start of the literal (the opening
139 /// quote). Consumed until it finds the same closing quote character.
140 void consumeStringLiteral(TokenInfo *Result) {
141 bool InEscape = false;
142 const char Marker = Code[0];
143 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
144 if (InEscape) {
145 InEscape = false;
146 continue;
147 }
148 if (Code[Length] == '\\') {
149 InEscape = true;
150 continue;
151 }
152 if (Code[Length] == Marker) {
153 Result->Kind = TokenInfo::TK_Literal;
154 Result->Text = Code.substr(0, Length + 1);
155 Result->Value = Code.substr(1, Length - 1).str();
156 Code = Code.drop_front(Length + 1);
157 return;
158 }
159 }
160
161 StringRef ErrorText = Code;
162 Code = Code.drop_front(Code.size());
163 SourceRange Range;
164 Range.Start = Result->Range.Start;
165 Range.End = currentLocation();
166 Error->pushErrorFrame(Range, Error->ET_ParserStringError)
167 << ErrorText;
168 Result->Kind = TokenInfo::TK_Error;
169 }
170
171 /// \brief Consume all leading whitespace from \c Code.
172 void consumeWhitespace() {
173 while (!Code.empty() && isWhitespace(Code[0])) {
174 if (Code[0] == '\n') {
175 ++Line;
176 StartOfLine = Code.drop_front();
177 }
178 Code = Code.drop_front();
179 }
180 }
181
182 SourceLocation currentLocation() {
183 SourceLocation Location;
184 Location.Line = Line;
185 Location.Column = Code.data() - StartOfLine.data() + 1;
186 return Location;
187 }
188
189 StringRef Code;
190 StringRef StartOfLine;
191 unsigned Line;
192 Diagnostics *Error;
193 TokenInfo NextToken;
194};
195
196Parser::Sema::~Sema() {}
197
198/// \brief Parse and validate a matcher expression.
199/// \return \c true on success, in which case \c Value has the matcher parsed.
200/// If the input is malformed, or some argument has an error, it
201/// returns \c false.
202bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
203 const TokenInfo NameToken = Tokenizer->consumeNextToken();
204 assert(NameToken.Kind == TokenInfo::TK_Ident);
205 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
206 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
207 Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoOpenParen)
208 << OpenToken.Text;
209 return false;
210 }
211
212 std::vector<ParserValue> Args;
213 TokenInfo EndToken;
214 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
215 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
216 // End of args.
217 EndToken = Tokenizer->consumeNextToken();
218 break;
219 }
220 if (Args.size() > 0) {
221 // We must find a , token to continue.
222 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
223 if (CommaToken.Kind != TokenInfo::TK_Comma) {
224 Error->pushErrorFrame(CommaToken.Range, Error->ET_ParserNoComma)
225 << CommaToken.Text;
226 return false;
227 }
228 }
229
230 ParserValue ArgValue;
231 ArgValue.Text = Tokenizer->peekNextToken().Text;
232 ArgValue.Range = Tokenizer->peekNextToken().Range;
233 if (!parseExpressionImpl(&ArgValue.Value)) {
234 Error->pushErrorFrame(NameToken.Range,
235 Error->ET_ParserMatcherArgFailure)
236 << (Args.size() + 1) << NameToken.Text;
237 return false;
238 }
239
240 Args.push_back(ArgValue);
241 }
242
243 if (EndToken.Kind == TokenInfo::TK_Eof) {
244 Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoCloseParen);
245 return false;
246 }
247
Samuel Benzaquen4f37d922013-06-03 19:31:08 +0000248 std::string BindID;
249 if (Tokenizer->peekNextToken().Kind == TokenInfo::TK_Period) {
250 // Parse .bind("foo")
251 Tokenizer->consumeNextToken(); // consume the period.
252 const TokenInfo BindToken = Tokenizer->consumeNextToken();
253 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
254 const TokenInfo IDToken = Tokenizer->consumeNextToken();
255 const TokenInfo CloseToken = Tokenizer->consumeNextToken();
256
257 // TODO: We could use different error codes for each/some to be more
258 // explicit about the syntax error.
259 if (BindToken.Kind != TokenInfo::TK_Ident ||
260 BindToken.Text != TokenInfo::ID_Bind) {
261 Error->pushErrorFrame(BindToken.Range, Error->ET_ParserMalformedBindExpr);
262 return false;
263 }
264 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
265 Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserMalformedBindExpr);
266 return false;
267 }
268 if (IDToken.Kind != TokenInfo::TK_Literal || !IDToken.Value.isString()) {
269 Error->pushErrorFrame(IDToken.Range, Error->ET_ParserMalformedBindExpr);
270 return false;
271 }
272 if (CloseToken.Kind != TokenInfo::TK_CloseParen) {
273 Error->pushErrorFrame(CloseToken.Range,
274 Error->ET_ParserMalformedBindExpr);
275 return false;
276 }
277 BindID = IDToken.Value.getString();
278 }
279
Manuel Klimekf7f295f2013-05-14 09:13:00 +0000280 // Merge the start and end infos.
281 SourceRange MatcherRange = NameToken.Range;
282 MatcherRange.End = EndToken.Range.End;
Samuel Benzaquen4f37d922013-06-03 19:31:08 +0000283 DynTypedMatcher *Result = S->actOnMatcherExpression(
284 NameToken.Text, MatcherRange, BindID, Args, Error);
Manuel Klimekf7f295f2013-05-14 09:13:00 +0000285 if (Result == NULL) {
286 Error->pushErrorFrame(NameToken.Range, Error->ET_ParserMatcherFailure)
287 << NameToken.Text;
288 return false;
289 }
290
291 Value->takeMatcher(Result);
292 return true;
293}
294
295/// \brief Parse an <Expresssion>
296bool Parser::parseExpressionImpl(VariantValue *Value) {
297 switch (Tokenizer->nextTokenKind()) {
298 case TokenInfo::TK_Literal:
299 *Value = Tokenizer->consumeNextToken().Value;
300 return true;
301
302 case TokenInfo::TK_Ident:
303 return parseMatcherExpressionImpl(Value);
304
305 case TokenInfo::TK_Eof:
306 Error->pushErrorFrame(Tokenizer->consumeNextToken().Range,
307 Error->ET_ParserNoCode);
308 return false;
309
310 case TokenInfo::TK_Error:
311 // This error was already reported by the tokenizer.
312 return false;
313
314 case TokenInfo::TK_OpenParen:
315 case TokenInfo::TK_CloseParen:
316 case TokenInfo::TK_Comma:
Samuel Benzaquen4f37d922013-06-03 19:31:08 +0000317 case TokenInfo::TK_Period:
Manuel Klimekf7f295f2013-05-14 09:13:00 +0000318 case TokenInfo::TK_InvalidChar:
319 const TokenInfo Token = Tokenizer->consumeNextToken();
320 Error->pushErrorFrame(Token.Range, Error->ET_ParserInvalidToken)
321 << Token.Text;
322 return false;
323 }
324
325 llvm_unreachable("Unknown token kind.");
326}
327
328Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
329 Diagnostics *Error)
330 : Tokenizer(Tokenizer), S(S), Error(Error) {}
331
332class RegistrySema : public Parser::Sema {
333public:
334 virtual ~RegistrySema() {}
335 DynTypedMatcher *actOnMatcherExpression(StringRef MatcherName,
336 const SourceRange &NameRange,
Samuel Benzaquen4f37d922013-06-03 19:31:08 +0000337 StringRef BindID,
Manuel Klimekf7f295f2013-05-14 09:13:00 +0000338 ArrayRef<ParserValue> Args,
339 Diagnostics *Error) {
Samuel Benzaquen4f37d922013-06-03 19:31:08 +0000340 if (BindID.empty()) {
341 return Registry::constructMatcher(MatcherName, NameRange, Args, Error);
342 } else {
343 return Registry::constructBoundMatcher(MatcherName, NameRange, BindID,
344 Args, Error);
345 }
Manuel Klimekf7f295f2013-05-14 09:13:00 +0000346 }
347};
348
349bool Parser::parseExpression(StringRef Code, VariantValue *Value,
350 Diagnostics *Error) {
351 RegistrySema S;
352 return parseExpression(Code, &S, Value, Error);
353}
354
355bool Parser::parseExpression(StringRef Code, Sema *S,
356 VariantValue *Value, Diagnostics *Error) {
357 CodeTokenizer Tokenizer(Code, Error);
Samuel Benzaquen4f37d922013-06-03 19:31:08 +0000358 if (!Parser(&Tokenizer, S, Error).parseExpressionImpl(Value)) return false;
359 if (Tokenizer.peekNextToken().Kind != TokenInfo::TK_Eof) {
360 Error->pushErrorFrame(Tokenizer.peekNextToken().Range,
361 Error->ET_ParserTrailingCode);
362 return false;
363 }
364 return true;
Manuel Klimekf7f295f2013-05-14 09:13:00 +0000365}
366
367DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
368 Diagnostics *Error) {
369 RegistrySema S;
370 return parseMatcherExpression(Code, &S, Error);
371}
372
373DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
374 Parser::Sema *S,
375 Diagnostics *Error) {
376 VariantValue Value;
377 if (!parseExpression(Code, S, &Value, Error))
378 return NULL;
379 if (!Value.isMatcher()) {
380 Error->pushErrorFrame(SourceRange(), Error->ET_ParserNotAMatcher);
381 return NULL;
382 }
383 return Value.getMatcher().clone();
384}
385
386} // namespace dynamic
387} // namespace ast_matchers
388} // namespace clang