blob: f4fec3f79d6e7393a930b3577e35612122a4f687 [file] [log] [blame]
Ethan Nicholasca82a922017-09-07 09:39:50 -04001/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "RegexParser.h"
9
10#include "LexUtil.h"
11
12RegexNode RegexParser::parse(std::string source) {
13 fSource = source;
14 fIndex = 0;
Ethan Nicholasd9d33c32018-06-12 11:05:59 -040015 SkASSERT(fStack.size() == 0);
Ethan Nicholasca82a922017-09-07 09:39:50 -040016 this->regex();
Ethan Nicholasd9d33c32018-06-12 11:05:59 -040017 SkASSERT(fStack.size() == 1);
18 SkASSERT(fIndex == source.size());
Ethan Nicholasca82a922017-09-07 09:39:50 -040019 return this->pop();
20}
21
22char RegexParser::peek() {
23 if (fIndex >= fSource.size()) {
24 return END;
25 }
26 return fSource[fIndex];
27}
28
29void RegexParser::expect(char c) {
30 if (this->peek() != c) {
31 printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
32 exit(1);
33 }
34 ++fIndex;
35}
36
37RegexNode RegexParser::pop() {
38 RegexNode result = fStack.top();
39 fStack.pop();
40 return result;
41}
42
43void RegexParser::term() {
44 switch (this->peek()) {
45 case '(': this->group(); break;
46 case '[': this->set(); break;
47 case '.': this->dot(); break;
48 default: this->literal();
49 }
50}
51
52void RegexParser::quantifiedTerm() {
53 this->term();
54 switch (this->peek()) {
55 case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
56 case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
57 case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
58 default: break;
59 }
60}
61
62void RegexParser::sequence() {
63 this->quantifiedTerm();
64 for (;;) {
65 switch (this->peek()) {
66 case END: // fall through
67 case '|': // fall through
68 case ')': return;
69 default:
70 this->sequence();
71 RegexNode right = this->pop();
72 RegexNode left = this->pop();
73 fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
74 }
75 }
76}
77
78RegexNode RegexParser::escapeSequence(char c) {
79 switch (c) {
80 case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
81 case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
82 case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
83 case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
84 default: return RegexNode(RegexNode::kChar_Kind, c);
85 }
86}
87
88void RegexParser::literal() {
89 char c = this->peek();
90 if (c == '\\') {
91 ++fIndex;
92 fStack.push(this->escapeSequence(peek()));
93 ++fIndex;
94 }
95 else {
96 fStack.push(RegexNode(RegexNode::kChar_Kind, c));
97 ++fIndex;
98 }
99}
100
101void RegexParser::dot() {
102 this->expect('.');
103 fStack.push(RegexNode(RegexNode::kDot_Kind));
104}
105
106void RegexParser::group() {
107 this->expect('(');
108 this->regex();
109 this->expect(')');
110}
111
112void RegexParser::setItem() {
113 this->literal();
114 if (this->peek() == '-') {
115 ++fIndex;
116 if (peek() == ']') {
117 fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
118 }
119 else {
120 literal();
121 RegexNode end = this->pop();
Ethan Nicholasd9d33c32018-06-12 11:05:59 -0400122 SkASSERT(end.fKind == RegexNode::kChar_Kind);
Ethan Nicholasca82a922017-09-07 09:39:50 -0400123 RegexNode start = this->pop();
Ethan Nicholasd9d33c32018-06-12 11:05:59 -0400124 SkASSERT(start.fKind == RegexNode::kChar_Kind);
Ethan Nicholasca82a922017-09-07 09:39:50 -0400125 fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
126 }
127 }
128}
129
130void RegexParser::set() {
131 expect('[');
132 size_t depth = fStack.size();
133 RegexNode set(RegexNode::kCharset_Kind);
134 if (this->peek() == '^') {
135 ++fIndex;
136 set.fPayload.fBool = true;
137 }
138 else {
139 set.fPayload.fBool = false;
140 }
141 for (;;) {
142 switch (this->peek()) {
143 case ']':
144 ++fIndex;
145 while (fStack.size() > depth) {
146 set.fChildren.push_back(this->pop());
147 }
148 fStack.push(std::move(set));
149 return;
150 case END:
151 printf("unterminated character set\n");
152 exit(1);
153 default:
154 this->setItem();
155 }
156 }
157}
158
159void RegexParser::regex() {
160 this->sequence();
161 switch (this->peek()) {
162 case '|': {
163 ++fIndex;
164 this->regex();
165 RegexNode right = this->pop();
166 RegexNode left = this->pop();
167 fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
168 break;
169 }
170 case END: // fall through
171 case ')':
172 return;
173 default:
Ethan Nicholasd9d33c32018-06-12 11:05:59 -0400174 SkASSERT(false);
Ethan Nicholasca82a922017-09-07 09:39:50 -0400175 }
176}