blob: 1678820da0156d5ec375137d3cf348af18650ed7 [file] [log] [blame]
Manuel Klimekf7f295f2013-05-14 09:13:00 +00001//===--- Parser.cpp - Matcher expression parser -----*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief Recursive parser implementation for the matcher expression grammar.
12///
13//===----------------------------------------------------------------------===//
14
15#include <string>
16#include <vector>
17
18#include "clang/ASTMatchers/Dynamic/Parser.h"
19#include "clang/ASTMatchers/Dynamic/Registry.h"
20#include "clang/Basic/CharInfo.h"
21#include "llvm/ADT/Twine.h"
22
23namespace clang {
24namespace ast_matchers {
25namespace dynamic {
26
27/// \brief Simple structure to hold information for one token from the parser.
28struct Parser::TokenInfo {
29 /// \brief Different possible tokens.
30 enum TokenKind {
31 TK_Eof = 0,
32 TK_OpenParen = 1,
33 TK_CloseParen = 2,
34 TK_Comma = 3,
35 TK_Literal = 4,
36 TK_Ident = 5,
37 TK_InvalidChar = 6,
38 TK_Error = 7
39 };
40
41 TokenInfo() : Text(), Kind(TK_Eof), Range(), Value() {}
42
43 StringRef Text;
44 TokenKind Kind;
45 SourceRange Range;
46 VariantValue Value;
47};
48
49/// \brief Simple tokenizer for the parser.
50class Parser::CodeTokenizer {
51public:
52 explicit CodeTokenizer(StringRef MatcherCode, Diagnostics *Error)
53 : Code(MatcherCode), StartOfLine(MatcherCode), Line(1), Error(Error) {
54 NextToken = getNextToken();
55 }
56
57 /// \brief Returns but doesn't consume the next token.
58 const TokenInfo &peekNextToken() const { return NextToken; }
59
60 /// \brief Consumes and returns the next token.
61 TokenInfo consumeNextToken() {
62 TokenInfo ThisToken = NextToken;
63 NextToken = getNextToken();
64 return ThisToken;
65 }
66
67 TokenInfo::TokenKind nextTokenKind() const { return NextToken.Kind; }
68
69private:
70 TokenInfo getNextToken() {
71 consumeWhitespace();
72 TokenInfo Result;
73 Result.Range.Start = currentLocation();
74
75 if (Code.empty()) {
76 Result.Kind = TokenInfo::TK_Eof;
77 Result.Text = "";
78 return Result;
79 }
80
81 switch (Code[0]) {
82 case ',':
83 Result.Kind = TokenInfo::TK_Comma;
84 Result.Text = Code.substr(0, 1);
85 Code = Code.drop_front();
86 break;
87 case '(':
88 Result.Kind = TokenInfo::TK_OpenParen;
89 Result.Text = Code.substr(0, 1);
90 Code = Code.drop_front();
91 break;
92 case ')':
93 Result.Kind = TokenInfo::TK_CloseParen;
94 Result.Text = Code.substr(0, 1);
95 Code = Code.drop_front();
96 break;
97
98 case '"':
99 case '\'':
100 // Parse a string literal.
101 consumeStringLiteral(&Result);
102 break;
103
104 default:
105 if (isAlphanumeric(Code[0])) {
106 // Parse an identifier
107 size_t TokenLength = 1;
108 while (TokenLength < Code.size() && isAlphanumeric(Code[TokenLength]))
109 ++TokenLength;
110 Result.Kind = TokenInfo::TK_Ident;
111 Result.Text = Code.substr(0, TokenLength);
112 Code = Code.drop_front(TokenLength);
113 } else {
114 Result.Kind = TokenInfo::TK_InvalidChar;
115 Result.Text = Code.substr(0, 1);
116 Code = Code.drop_front(1);
117 }
118 break;
119 }
120
121 Result.Range.End = currentLocation();
122 return Result;
123 }
124
125 /// \brief Consume a string literal.
126 ///
127 /// \c Code must be positioned at the start of the literal (the opening
128 /// quote). Consumed until it finds the same closing quote character.
129 void consumeStringLiteral(TokenInfo *Result) {
130 bool InEscape = false;
131 const char Marker = Code[0];
132 for (size_t Length = 1, Size = Code.size(); Length != Size; ++Length) {
133 if (InEscape) {
134 InEscape = false;
135 continue;
136 }
137 if (Code[Length] == '\\') {
138 InEscape = true;
139 continue;
140 }
141 if (Code[Length] == Marker) {
142 Result->Kind = TokenInfo::TK_Literal;
143 Result->Text = Code.substr(0, Length + 1);
144 Result->Value = Code.substr(1, Length - 1).str();
145 Code = Code.drop_front(Length + 1);
146 return;
147 }
148 }
149
150 StringRef ErrorText = Code;
151 Code = Code.drop_front(Code.size());
152 SourceRange Range;
153 Range.Start = Result->Range.Start;
154 Range.End = currentLocation();
155 Error->pushErrorFrame(Range, Error->ET_ParserStringError)
156 << ErrorText;
157 Result->Kind = TokenInfo::TK_Error;
158 }
159
160 /// \brief Consume all leading whitespace from \c Code.
161 void consumeWhitespace() {
162 while (!Code.empty() && isWhitespace(Code[0])) {
163 if (Code[0] == '\n') {
164 ++Line;
165 StartOfLine = Code.drop_front();
166 }
167 Code = Code.drop_front();
168 }
169 }
170
171 SourceLocation currentLocation() {
172 SourceLocation Location;
173 Location.Line = Line;
174 Location.Column = Code.data() - StartOfLine.data() + 1;
175 return Location;
176 }
177
178 StringRef Code;
179 StringRef StartOfLine;
180 unsigned Line;
181 Diagnostics *Error;
182 TokenInfo NextToken;
183};
184
185Parser::Sema::~Sema() {}
186
187/// \brief Parse and validate a matcher expression.
188/// \return \c true on success, in which case \c Value has the matcher parsed.
189/// If the input is malformed, or some argument has an error, it
190/// returns \c false.
191bool Parser::parseMatcherExpressionImpl(VariantValue *Value) {
192 const TokenInfo NameToken = Tokenizer->consumeNextToken();
193 assert(NameToken.Kind == TokenInfo::TK_Ident);
194 const TokenInfo OpenToken = Tokenizer->consumeNextToken();
195 if (OpenToken.Kind != TokenInfo::TK_OpenParen) {
196 Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoOpenParen)
197 << OpenToken.Text;
198 return false;
199 }
200
201 std::vector<ParserValue> Args;
202 TokenInfo EndToken;
203 while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) {
204 if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) {
205 // End of args.
206 EndToken = Tokenizer->consumeNextToken();
207 break;
208 }
209 if (Args.size() > 0) {
210 // We must find a , token to continue.
211 const TokenInfo CommaToken = Tokenizer->consumeNextToken();
212 if (CommaToken.Kind != TokenInfo::TK_Comma) {
213 Error->pushErrorFrame(CommaToken.Range, Error->ET_ParserNoComma)
214 << CommaToken.Text;
215 return false;
216 }
217 }
218
219 ParserValue ArgValue;
220 ArgValue.Text = Tokenizer->peekNextToken().Text;
221 ArgValue.Range = Tokenizer->peekNextToken().Range;
222 if (!parseExpressionImpl(&ArgValue.Value)) {
223 Error->pushErrorFrame(NameToken.Range,
224 Error->ET_ParserMatcherArgFailure)
225 << (Args.size() + 1) << NameToken.Text;
226 return false;
227 }
228
229 Args.push_back(ArgValue);
230 }
231
232 if (EndToken.Kind == TokenInfo::TK_Eof) {
233 Error->pushErrorFrame(OpenToken.Range, Error->ET_ParserNoCloseParen);
234 return false;
235 }
236
237 // Merge the start and end infos.
238 SourceRange MatcherRange = NameToken.Range;
239 MatcherRange.End = EndToken.Range.End;
240 DynTypedMatcher *Result =
241 S->actOnMatcherExpression(NameToken.Text, MatcherRange, Args, Error);
242 if (Result == NULL) {
243 Error->pushErrorFrame(NameToken.Range, Error->ET_ParserMatcherFailure)
244 << NameToken.Text;
245 return false;
246 }
247
248 Value->takeMatcher(Result);
249 return true;
250}
251
252/// \brief Parse an <Expresssion>
253bool Parser::parseExpressionImpl(VariantValue *Value) {
254 switch (Tokenizer->nextTokenKind()) {
255 case TokenInfo::TK_Literal:
256 *Value = Tokenizer->consumeNextToken().Value;
257 return true;
258
259 case TokenInfo::TK_Ident:
260 return parseMatcherExpressionImpl(Value);
261
262 case TokenInfo::TK_Eof:
263 Error->pushErrorFrame(Tokenizer->consumeNextToken().Range,
264 Error->ET_ParserNoCode);
265 return false;
266
267 case TokenInfo::TK_Error:
268 // This error was already reported by the tokenizer.
269 return false;
270
271 case TokenInfo::TK_OpenParen:
272 case TokenInfo::TK_CloseParen:
273 case TokenInfo::TK_Comma:
274 case TokenInfo::TK_InvalidChar:
275 const TokenInfo Token = Tokenizer->consumeNextToken();
276 Error->pushErrorFrame(Token.Range, Error->ET_ParserInvalidToken)
277 << Token.Text;
278 return false;
279 }
280
281 llvm_unreachable("Unknown token kind.");
282}
283
284Parser::Parser(CodeTokenizer *Tokenizer, Sema *S,
285 Diagnostics *Error)
286 : Tokenizer(Tokenizer), S(S), Error(Error) {}
287
288class RegistrySema : public Parser::Sema {
289public:
290 virtual ~RegistrySema() {}
291 DynTypedMatcher *actOnMatcherExpression(StringRef MatcherName,
292 const SourceRange &NameRange,
293 ArrayRef<ParserValue> Args,
294 Diagnostics *Error) {
295 return Registry::constructMatcher(MatcherName, NameRange, Args, Error);
296 }
297};
298
299bool Parser::parseExpression(StringRef Code, VariantValue *Value,
300 Diagnostics *Error) {
301 RegistrySema S;
302 return parseExpression(Code, &S, Value, Error);
303}
304
305bool Parser::parseExpression(StringRef Code, Sema *S,
306 VariantValue *Value, Diagnostics *Error) {
307 CodeTokenizer Tokenizer(Code, Error);
308 return Parser(&Tokenizer, S, Error).parseExpressionImpl(Value);
309}
310
311DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
312 Diagnostics *Error) {
313 RegistrySema S;
314 return parseMatcherExpression(Code, &S, Error);
315}
316
317DynTypedMatcher *Parser::parseMatcherExpression(StringRef Code,
318 Parser::Sema *S,
319 Diagnostics *Error) {
320 VariantValue Value;
321 if (!parseExpression(Code, S, &Value, Error))
322 return NULL;
323 if (!Value.isMatcher()) {
324 Error->pushErrorFrame(SourceRange(), Error->ET_ParserNotAMatcher);
325 return NULL;
326 }
327 return Value.getMatcher().clone();
328}
329
330} // namespace dynamic
331} // namespace ast_matchers
332} // namespace clang