Ethan Nicholas | ca82a92 | 2017-09-07 09:39:50 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SKSL_REGEXPARSER |
| 9 | #define SKSL_REGEXPARSER |
| 10 | |
| 11 | #include "RegexNode.h" |
| 12 | |
| 13 | #include <stack> |
| 14 | #include <string> |
| 15 | |
| 16 | /** |
| 17 | * Turns a simple regular expression into a parse tree. The regular expression syntax supports only |
| 18 | * the basic quantifiers ('*', '+', and '?'), alternation ('|'), character sets ('[a-z]'), and |
| 19 | * groups ('()'). |
| 20 | */ |
| 21 | class RegexParser { |
| 22 | public: |
| 23 | RegexNode parse(std::string source); |
| 24 | |
| 25 | private: |
| 26 | static constexpr char END = '\0'; |
| 27 | |
| 28 | char peek(); |
| 29 | |
| 30 | void expect(char c); |
| 31 | |
| 32 | RegexNode pop(); |
| 33 | |
| 34 | /** |
| 35 | * Matches a char literal, parenthesized group, character set, or dot ('.'). |
| 36 | */ |
| 37 | void term(); |
| 38 | |
| 39 | /** |
| 40 | * Matches a term followed by an optional quantifier ('*', '+', or '?'). |
| 41 | */ |
| 42 | void quantifiedTerm(); |
| 43 | |
| 44 | /** |
| 45 | * Matches a sequence of quantifiedTerms. |
| 46 | */ |
| 47 | void sequence(); |
| 48 | |
| 49 | /** |
| 50 | * Returns a node representing the given escape character (e.g. escapeSequence('n') returns a |
| 51 | * node which matches a newline character). |
| 52 | */ |
| 53 | RegexNode escapeSequence(char c); |
| 54 | |
| 55 | /** |
| 56 | * Matches a literal character or escape sequence. |
| 57 | */ |
| 58 | void literal(); |
| 59 | |
| 60 | /** |
| 61 | * Matches a dot ('.'). |
| 62 | */ |
| 63 | void dot(); |
| 64 | |
| 65 | /** |
| 66 | * Matches a parenthesized group. |
| 67 | */ |
| 68 | void group(); |
| 69 | |
| 70 | /** |
| 71 | * Matches a literal character, escape sequence, or character range from a character set. |
| 72 | */ |
| 73 | void setItem(); |
| 74 | |
| 75 | /** |
| 76 | * Matches a character set. |
| 77 | */ |
| 78 | void set(); |
| 79 | |
| 80 | void regex(); |
| 81 | |
| 82 | std::string fSource; |
| 83 | |
| 84 | size_t fIndex; |
| 85 | |
| 86 | std::stack<RegexNode> fStack; |
| 87 | }; |
| 88 | |
| 89 | #endif |