Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1 | // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef V8_REGEXP_REGEXP_AST_H_ |
| 6 | #define V8_REGEXP_REGEXP_AST_H_ |
| 7 | |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 8 | #include "src/objects.h" |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 9 | #include "src/utils.h" |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 10 | #include "src/zone-containers.h" |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 11 | #include "src/zone.h" |
| 12 | |
| 13 | namespace v8 { |
| 14 | namespace internal { |
| 15 | |
| 16 | #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
| 17 | VISIT(Disjunction) \ |
| 18 | VISIT(Alternative) \ |
| 19 | VISIT(Assertion) \ |
| 20 | VISIT(CharacterClass) \ |
| 21 | VISIT(Atom) \ |
| 22 | VISIT(Quantifier) \ |
| 23 | VISIT(Capture) \ |
| 24 | VISIT(Lookaround) \ |
| 25 | VISIT(BackReference) \ |
| 26 | VISIT(Empty) \ |
| 27 | VISIT(Text) |
| 28 | |
| 29 | |
| 30 | #define FORWARD_DECLARE(Name) class RegExp##Name; |
| 31 | FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) |
| 32 | #undef FORWARD_DECLARE |
| 33 | |
| 34 | class RegExpCompiler; |
| 35 | class RegExpNode; |
| 36 | class RegExpTree; |
| 37 | |
| 38 | |
| 39 | class RegExpVisitor BASE_EMBEDDED { |
| 40 | public: |
| 41 | virtual ~RegExpVisitor() {} |
| 42 | #define MAKE_CASE(Name) \ |
| 43 | virtual void* Visit##Name(RegExp##Name*, void* data) = 0; |
| 44 | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
| 45 | #undef MAKE_CASE |
| 46 | }; |
| 47 | |
| 48 | |
| 49 | // A simple closed interval. |
| 50 | class Interval { |
| 51 | public: |
| 52 | Interval() : from_(kNone), to_(kNone) {} |
| 53 | Interval(int from, int to) : from_(from), to_(to) {} |
| 54 | Interval Union(Interval that) { |
| 55 | if (that.from_ == kNone) |
| 56 | return *this; |
| 57 | else if (from_ == kNone) |
| 58 | return that; |
| 59 | else |
| 60 | return Interval(Min(from_, that.from_), Max(to_, that.to_)); |
| 61 | } |
| 62 | bool Contains(int value) { return (from_ <= value) && (value <= to_); } |
| 63 | bool is_empty() { return from_ == kNone; } |
| 64 | int from() const { return from_; } |
| 65 | int to() const { return to_; } |
| 66 | static Interval Empty() { return Interval(); } |
| 67 | static const int kNone = -1; |
| 68 | |
| 69 | private: |
| 70 | int from_; |
| 71 | int to_; |
| 72 | }; |
| 73 | |
| 74 | |
| 75 | // Represents code units in the range from from_ to to_, both ends are |
| 76 | // inclusive. |
| 77 | class CharacterRange { |
| 78 | public: |
| 79 | CharacterRange() : from_(0), to_(0) {} |
| 80 | // For compatibility with the CHECK_OK macro |
| 81 | CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 82 | static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges, |
| 83 | Zone* zone); |
| 84 | static Vector<const int> GetWordBounds(); |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 85 | static inline CharacterRange Singleton(uc32 value) { |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 86 | return CharacterRange(value, value); |
| 87 | } |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 88 | static inline CharacterRange Range(uc32 from, uc32 to) { |
| 89 | DCHECK(0 <= from && to <= String::kMaxCodePoint); |
| 90 | DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to)); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 91 | return CharacterRange(from, to); |
| 92 | } |
| 93 | static inline CharacterRange Everything() { |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 94 | return CharacterRange(0, String::kMaxCodePoint); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 95 | } |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 96 | static inline ZoneList<CharacterRange>* List(Zone* zone, |
| 97 | CharacterRange range) { |
| 98 | ZoneList<CharacterRange>* list = |
| 99 | new (zone) ZoneList<CharacterRange>(1, zone); |
| 100 | list->Add(range, zone); |
| 101 | return list; |
| 102 | } |
| 103 | bool Contains(uc32 i) { return from_ <= i && i <= to_; } |
| 104 | uc32 from() const { return from_; } |
| 105 | void set_from(uc32 value) { from_ = value; } |
| 106 | uc32 to() const { return to_; } |
| 107 | void set_to(uc32 value) { to_ = value; } |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 108 | bool is_valid() { return from_ <= to_; } |
| 109 | bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; } |
| 110 | bool IsSingleton() { return (from_ == to_); } |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 111 | static void AddCaseEquivalents(Isolate* isolate, Zone* zone, |
| 112 | ZoneList<CharacterRange>* ranges, |
| 113 | bool is_one_byte); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 114 | // Whether a range list is in canonical form: Ranges ordered by from value, |
| 115 | // and ranges non-overlapping and non-adjacent. |
| 116 | static bool IsCanonical(ZoneList<CharacterRange>* ranges); |
| 117 | // Convert range list to canonical form. The characters covered by the ranges |
| 118 | // will still be the same, but no character is in more than one range, and |
| 119 | // adjacent ranges are merged. The resulting list may be shorter than the |
| 120 | // original, but cannot be longer. |
| 121 | static void Canonicalize(ZoneList<CharacterRange>* ranges); |
| 122 | // Negate the contents of a character range in canonical form. |
| 123 | static void Negate(ZoneList<CharacterRange>* src, |
| 124 | ZoneList<CharacterRange>* dst, Zone* zone); |
| 125 | static const int kStartMarker = (1 << 24); |
| 126 | static const int kPayloadMask = (1 << 24) - 1; |
| 127 | |
| 128 | private: |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 129 | CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {} |
| 130 | |
| 131 | uc32 from_; |
| 132 | uc32 to_; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 133 | }; |
| 134 | |
| 135 | |
| 136 | class CharacterSet final BASE_EMBEDDED { |
| 137 | public: |
| 138 | explicit CharacterSet(uc16 standard_set_type) |
| 139 | : ranges_(NULL), standard_set_type_(standard_set_type) {} |
| 140 | explicit CharacterSet(ZoneList<CharacterRange>* ranges) |
| 141 | : ranges_(ranges), standard_set_type_(0) {} |
| 142 | ZoneList<CharacterRange>* ranges(Zone* zone); |
| 143 | uc16 standard_set_type() { return standard_set_type_; } |
| 144 | void set_standard_set_type(uc16 special_set_type) { |
| 145 | standard_set_type_ = special_set_type; |
| 146 | } |
| 147 | bool is_standard() { return standard_set_type_ != 0; } |
| 148 | void Canonicalize(); |
| 149 | |
| 150 | private: |
| 151 | ZoneList<CharacterRange>* ranges_; |
| 152 | // If non-zero, the value represents a standard set (e.g., all whitespace |
| 153 | // characters) without having to expand the ranges. |
| 154 | uc16 standard_set_type_; |
| 155 | }; |
| 156 | |
| 157 | |
| 158 | class TextElement final BASE_EMBEDDED { |
| 159 | public: |
| 160 | enum TextType { ATOM, CHAR_CLASS }; |
| 161 | |
| 162 | static TextElement Atom(RegExpAtom* atom); |
| 163 | static TextElement CharClass(RegExpCharacterClass* char_class); |
| 164 | |
| 165 | int cp_offset() const { return cp_offset_; } |
| 166 | void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } |
| 167 | int length() const; |
| 168 | |
| 169 | TextType text_type() const { return text_type_; } |
| 170 | |
| 171 | RegExpTree* tree() const { return tree_; } |
| 172 | |
| 173 | RegExpAtom* atom() const { |
| 174 | DCHECK(text_type() == ATOM); |
| 175 | return reinterpret_cast<RegExpAtom*>(tree()); |
| 176 | } |
| 177 | |
| 178 | RegExpCharacterClass* char_class() const { |
| 179 | DCHECK(text_type() == CHAR_CLASS); |
| 180 | return reinterpret_cast<RegExpCharacterClass*>(tree()); |
| 181 | } |
| 182 | |
| 183 | private: |
| 184 | TextElement(TextType text_type, RegExpTree* tree) |
| 185 | : cp_offset_(-1), text_type_(text_type), tree_(tree) {} |
| 186 | |
| 187 | int cp_offset_; |
| 188 | TextType text_type_; |
| 189 | RegExpTree* tree_; |
| 190 | }; |
| 191 | |
| 192 | |
| 193 | class RegExpTree : public ZoneObject { |
| 194 | public: |
| 195 | static const int kInfinity = kMaxInt; |
| 196 | virtual ~RegExpTree() {} |
| 197 | virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; |
| 198 | virtual RegExpNode* ToNode(RegExpCompiler* compiler, |
| 199 | RegExpNode* on_success) = 0; |
| 200 | virtual bool IsTextElement() { return false; } |
| 201 | virtual bool IsAnchoredAtStart() { return false; } |
| 202 | virtual bool IsAnchoredAtEnd() { return false; } |
| 203 | virtual int min_match() = 0; |
| 204 | virtual int max_match() = 0; |
| 205 | // Returns the interval of registers used for captures within this |
| 206 | // expression. |
| 207 | virtual Interval CaptureRegisters() { return Interval::Empty(); } |
| 208 | virtual void AppendToText(RegExpText* text, Zone* zone); |
| 209 | std::ostream& Print(std::ostream& os, Zone* zone); // NOLINT |
| 210 | #define MAKE_ASTYPE(Name) \ |
| 211 | virtual RegExp##Name* As##Name(); \ |
| 212 | virtual bool Is##Name(); |
| 213 | FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) |
| 214 | #undef MAKE_ASTYPE |
| 215 | }; |
| 216 | |
| 217 | |
| 218 | class RegExpDisjunction final : public RegExpTree { |
| 219 | public: |
| 220 | explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); |
| 221 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 222 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 223 | RegExpDisjunction* AsDisjunction() override; |
| 224 | Interval CaptureRegisters() override; |
| 225 | bool IsDisjunction() override; |
| 226 | bool IsAnchoredAtStart() override; |
| 227 | bool IsAnchoredAtEnd() override; |
| 228 | int min_match() override { return min_match_; } |
| 229 | int max_match() override { return max_match_; } |
| 230 | ZoneList<RegExpTree*>* alternatives() { return alternatives_; } |
| 231 | |
| 232 | private: |
| 233 | bool SortConsecutiveAtoms(RegExpCompiler* compiler); |
| 234 | void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); |
| 235 | void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); |
| 236 | ZoneList<RegExpTree*>* alternatives_; |
| 237 | int min_match_; |
| 238 | int max_match_; |
| 239 | }; |
| 240 | |
| 241 | |
| 242 | class RegExpAlternative final : public RegExpTree { |
| 243 | public: |
| 244 | explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); |
| 245 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 246 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 247 | RegExpAlternative* AsAlternative() override; |
| 248 | Interval CaptureRegisters() override; |
| 249 | bool IsAlternative() override; |
| 250 | bool IsAnchoredAtStart() override; |
| 251 | bool IsAnchoredAtEnd() override; |
| 252 | int min_match() override { return min_match_; } |
| 253 | int max_match() override { return max_match_; } |
| 254 | ZoneList<RegExpTree*>* nodes() { return nodes_; } |
| 255 | |
| 256 | private: |
| 257 | ZoneList<RegExpTree*>* nodes_; |
| 258 | int min_match_; |
| 259 | int max_match_; |
| 260 | }; |
| 261 | |
| 262 | |
| 263 | class RegExpAssertion final : public RegExpTree { |
| 264 | public: |
| 265 | enum AssertionType { |
| 266 | START_OF_LINE, |
| 267 | START_OF_INPUT, |
| 268 | END_OF_LINE, |
| 269 | END_OF_INPUT, |
| 270 | BOUNDARY, |
| 271 | NON_BOUNDARY |
| 272 | }; |
| 273 | explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {} |
| 274 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 275 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 276 | RegExpAssertion* AsAssertion() override; |
| 277 | bool IsAssertion() override; |
| 278 | bool IsAnchoredAtStart() override; |
| 279 | bool IsAnchoredAtEnd() override; |
| 280 | int min_match() override { return 0; } |
| 281 | int max_match() override { return 0; } |
| 282 | AssertionType assertion_type() { return assertion_type_; } |
| 283 | |
| 284 | private: |
| 285 | AssertionType assertion_type_; |
| 286 | }; |
| 287 | |
| 288 | |
| 289 | class RegExpCharacterClass final : public RegExpTree { |
| 290 | public: |
| 291 | RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated) |
| 292 | : set_(ranges), is_negated_(is_negated) {} |
| 293 | explicit RegExpCharacterClass(uc16 type) : set_(type), is_negated_(false) {} |
| 294 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 295 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 296 | RegExpCharacterClass* AsCharacterClass() override; |
| 297 | bool IsCharacterClass() override; |
| 298 | bool IsTextElement() override { return true; } |
| 299 | int min_match() override { return 1; } |
Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame] | 300 | // The character class may match two code units for unicode regexps. |
| 301 | // TODO(yangguo): we should split this class for usage in TextElement, and |
| 302 | // make max_match() dependent on the character class content. |
| 303 | int max_match() override { return 2; } |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 304 | void AppendToText(RegExpText* text, Zone* zone) override; |
| 305 | CharacterSet character_set() { return set_; } |
| 306 | // TODO(lrn): Remove need for complex version if is_standard that |
| 307 | // recognizes a mangled standard set and just do { return set_.is_special(); } |
| 308 | bool is_standard(Zone* zone); |
| 309 | // Returns a value representing the standard character set if is_standard() |
| 310 | // returns true. |
| 311 | // Currently used values are: |
| 312 | // s : unicode whitespace |
| 313 | // S : unicode non-whitespace |
| 314 | // w : ASCII word character (digit, letter, underscore) |
| 315 | // W : non-ASCII word character |
| 316 | // d : ASCII digit |
| 317 | // D : non-ASCII digit |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 318 | // . : non-newline |
| 319 | // * : All characters, for advancing unanchored regexp |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 320 | uc16 standard_type() { return set_.standard_set_type(); } |
| 321 | ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } |
| 322 | bool is_negated() { return is_negated_; } |
| 323 | |
| 324 | private: |
| 325 | CharacterSet set_; |
| 326 | bool is_negated_; |
| 327 | }; |
| 328 | |
| 329 | |
| 330 | class RegExpAtom final : public RegExpTree { |
| 331 | public: |
| 332 | explicit RegExpAtom(Vector<const uc16> data) : data_(data) {} |
| 333 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 334 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 335 | RegExpAtom* AsAtom() override; |
| 336 | bool IsAtom() override; |
| 337 | bool IsTextElement() override { return true; } |
| 338 | int min_match() override { return data_.length(); } |
| 339 | int max_match() override { return data_.length(); } |
| 340 | void AppendToText(RegExpText* text, Zone* zone) override; |
| 341 | Vector<const uc16> data() { return data_; } |
| 342 | int length() { return data_.length(); } |
| 343 | |
| 344 | private: |
| 345 | Vector<const uc16> data_; |
| 346 | }; |
| 347 | |
| 348 | |
| 349 | class RegExpText final : public RegExpTree { |
| 350 | public: |
| 351 | explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {} |
| 352 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 353 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 354 | RegExpText* AsText() override; |
| 355 | bool IsText() override; |
| 356 | bool IsTextElement() override { return true; } |
| 357 | int min_match() override { return length_; } |
| 358 | int max_match() override { return length_; } |
| 359 | void AppendToText(RegExpText* text, Zone* zone) override; |
| 360 | void AddElement(TextElement elm, Zone* zone) { |
| 361 | elements_.Add(elm, zone); |
| 362 | length_ += elm.length(); |
| 363 | } |
| 364 | ZoneList<TextElement>* elements() { return &elements_; } |
| 365 | |
| 366 | private: |
| 367 | ZoneList<TextElement> elements_; |
| 368 | int length_; |
| 369 | }; |
| 370 | |
| 371 | |
| 372 | class RegExpQuantifier final : public RegExpTree { |
| 373 | public: |
| 374 | enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; |
| 375 | RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) |
| 376 | : body_(body), |
| 377 | min_(min), |
| 378 | max_(max), |
| 379 | min_match_(min * body->min_match()), |
| 380 | quantifier_type_(type) { |
| 381 | if (max > 0 && body->max_match() > kInfinity / max) { |
| 382 | max_match_ = kInfinity; |
| 383 | } else { |
| 384 | max_match_ = max * body->max_match(); |
| 385 | } |
| 386 | } |
| 387 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 388 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 389 | static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, |
| 390 | RegExpCompiler* compiler, RegExpNode* on_success, |
| 391 | bool not_at_start = false); |
| 392 | RegExpQuantifier* AsQuantifier() override; |
| 393 | Interval CaptureRegisters() override; |
| 394 | bool IsQuantifier() override; |
| 395 | int min_match() override { return min_match_; } |
| 396 | int max_match() override { return max_match_; } |
| 397 | int min() { return min_; } |
| 398 | int max() { return max_; } |
| 399 | bool is_possessive() { return quantifier_type_ == POSSESSIVE; } |
| 400 | bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } |
| 401 | bool is_greedy() { return quantifier_type_ == GREEDY; } |
| 402 | RegExpTree* body() { return body_; } |
| 403 | |
| 404 | private: |
| 405 | RegExpTree* body_; |
| 406 | int min_; |
| 407 | int max_; |
| 408 | int min_match_; |
| 409 | int max_match_; |
| 410 | QuantifierType quantifier_type_; |
| 411 | }; |
| 412 | |
| 413 | |
| 414 | class RegExpCapture final : public RegExpTree { |
| 415 | public: |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 416 | explicit RegExpCapture(int index) |
| 417 | : body_(NULL), index_(index), name_(nullptr) {} |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 418 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 419 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 420 | static RegExpNode* ToNode(RegExpTree* body, int index, |
| 421 | RegExpCompiler* compiler, RegExpNode* on_success); |
| 422 | RegExpCapture* AsCapture() override; |
| 423 | bool IsAnchoredAtStart() override; |
| 424 | bool IsAnchoredAtEnd() override; |
| 425 | Interval CaptureRegisters() override; |
| 426 | bool IsCapture() override; |
| 427 | int min_match() override { return body_->min_match(); } |
| 428 | int max_match() override { return body_->max_match(); } |
| 429 | RegExpTree* body() { return body_; } |
| 430 | void set_body(RegExpTree* body) { body_ = body; } |
| 431 | int index() { return index_; } |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 432 | const ZoneVector<uc16>* name() const { return name_; } |
| 433 | void set_name(const ZoneVector<uc16>* name) { name_ = name; } |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 434 | static int StartRegister(int index) { return index * 2; } |
| 435 | static int EndRegister(int index) { return index * 2 + 1; } |
| 436 | |
| 437 | private: |
| 438 | RegExpTree* body_; |
| 439 | int index_; |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 440 | const ZoneVector<uc16>* name_; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 441 | }; |
| 442 | |
| 443 | |
| 444 | class RegExpLookaround final : public RegExpTree { |
| 445 | public: |
| 446 | enum Type { LOOKAHEAD, LOOKBEHIND }; |
| 447 | |
| 448 | RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count, |
| 449 | int capture_from, Type type) |
| 450 | : body_(body), |
| 451 | is_positive_(is_positive), |
| 452 | capture_count_(capture_count), |
| 453 | capture_from_(capture_from), |
| 454 | type_(type) {} |
| 455 | |
| 456 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 457 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 458 | RegExpLookaround* AsLookaround() override; |
| 459 | Interval CaptureRegisters() override; |
| 460 | bool IsLookaround() override; |
| 461 | bool IsAnchoredAtStart() override; |
| 462 | int min_match() override { return 0; } |
| 463 | int max_match() override { return 0; } |
| 464 | RegExpTree* body() { return body_; } |
| 465 | bool is_positive() { return is_positive_; } |
| 466 | int capture_count() { return capture_count_; } |
| 467 | int capture_from() { return capture_from_; } |
| 468 | Type type() { return type_; } |
| 469 | |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 470 | class Builder { |
| 471 | public: |
| 472 | Builder(bool is_positive, RegExpNode* on_success, |
| 473 | int stack_pointer_register, int position_register, |
| 474 | int capture_register_count = 0, int capture_register_start = 0); |
| 475 | RegExpNode* on_match_success() { return on_match_success_; } |
| 476 | RegExpNode* ForMatch(RegExpNode* match); |
| 477 | |
| 478 | private: |
| 479 | bool is_positive_; |
| 480 | RegExpNode* on_match_success_; |
| 481 | RegExpNode* on_success_; |
| 482 | int stack_pointer_register_; |
| 483 | int position_register_; |
| 484 | }; |
| 485 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 486 | private: |
| 487 | RegExpTree* body_; |
| 488 | bool is_positive_; |
| 489 | int capture_count_; |
| 490 | int capture_from_; |
| 491 | Type type_; |
| 492 | }; |
| 493 | |
| 494 | |
| 495 | class RegExpBackReference final : public RegExpTree { |
| 496 | public: |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 497 | RegExpBackReference() : capture_(nullptr), name_(nullptr) {} |
| 498 | explicit RegExpBackReference(RegExpCapture* capture) |
| 499 | : capture_(capture), name_(nullptr) {} |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 500 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 501 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 502 | RegExpBackReference* AsBackReference() override; |
| 503 | bool IsBackReference() override; |
| 504 | int min_match() override { return 0; } |
| 505 | // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite |
| 506 | // recursion, we give up. Ignorance is bliss. |
| 507 | int max_match() override { return kInfinity; } |
| 508 | int index() { return capture_->index(); } |
| 509 | RegExpCapture* capture() { return capture_; } |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 510 | void set_capture(RegExpCapture* capture) { capture_ = capture; } |
| 511 | const ZoneVector<uc16>* name() const { return name_; } |
| 512 | void set_name(const ZoneVector<uc16>* name) { name_ = name; } |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 513 | |
| 514 | private: |
| 515 | RegExpCapture* capture_; |
Ben Murdoch | 61f157c | 2016-09-16 13:49:30 +0100 | [diff] [blame] | 516 | const ZoneVector<uc16>* name_; |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 517 | }; |
| 518 | |
| 519 | |
| 520 | class RegExpEmpty final : public RegExpTree { |
| 521 | public: |
| 522 | RegExpEmpty() {} |
| 523 | void* Accept(RegExpVisitor* visitor, void* data) override; |
| 524 | RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; |
| 525 | RegExpEmpty* AsEmpty() override; |
| 526 | bool IsEmpty() override; |
| 527 | int min_match() override { return 0; } |
| 528 | int max_match() override { return 0; } |
| 529 | }; |
| 530 | |
| 531 | } // namespace internal |
| 532 | } // namespace v8 |
| 533 | |
| 534 | #endif // V8_REGEXP_REGEXP_AST_H_ |