blob: 3ab6651632f537b93210610649c2687a3b54379c [file] [log] [blame]
ager@chromium.org9258b6b2008-09-11 09:11:10 +00001// Copyright 2006-2008 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_JSREGEXP_H_
29#define V8_JSREGEXP_H_
30
31namespace v8 { namespace internal {
32
ager@chromium.orga74f0da2008-12-03 16:05:52 +000033
34class RegExpMacroAssembler;
35
36
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000037class RegExpImpl {
38 public:
39 // Creates a regular expression literal in the old space.
40 // This function calls the garbage collector if necessary.
mads.s.ager@gmail.com9a4089a2008-09-01 08:55:01 +000041 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
42 Handle<String> pattern,
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000043 Handle<String> flags,
44 bool* has_pending_exception);
45
46 // Returns a string representation of a regular expression.
47 // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
48 // This function calls the garbage collector if necessary.
49 static Handle<String> ToString(Handle<Object> value);
50
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000051 static Handle<Object> Compile(Handle<JSRegExp> re,
52 Handle<String> pattern,
53 Handle<String> flags);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000054
55 // Implements RegExp.prototype.exec(string) function.
56 // See ECMA-262 section 15.10.6.2.
57 // This function calls the garbage collector if necessary.
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000058 static Handle<Object> Exec(Handle<JSRegExp> regexp,
59 Handle<String> subject,
60 Handle<Object> index);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000061
62 // Call RegExp.prototyp.exec(string) in a loop.
63 // Used by String.prototype.match and String.prototype.replace.
64 // This function calls the garbage collector if necessary.
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000065 static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
66 Handle<String> subject);
67
ager@chromium.orga74f0da2008-12-03 16:05:52 +000068 // Stores an uncompiled RegExp pattern in the JSRegExp object.
69 // It will be compiled by JSCRE when first executed.
70 static Handle<Object> JscrePrepare(Handle<JSRegExp> re,
71 Handle<String> pattern,
72 JSRegExp::Flags flags);
73
74 // Stores a compiled RegExp pattern in the JSRegExp object.
75 // The pattern is compiled by Irregexp.
76 static Handle<Object> IrregexpPrepare(Handle<JSRegExp> re,
77 Handle<String> pattern,
78 JSRegExp::Flags flags,
79 Handle<FixedArray> irregexp_data);
80
81
82 // Compile the pattern using JSCRE and store the result in the
83 // JSRegExp object.
84 static Handle<Object> JscreCompile(Handle<JSRegExp> re);
85
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000086 static Handle<Object> AtomCompile(Handle<JSRegExp> re,
kasperl@chromium.org9fe21c62008-10-28 08:53:51 +000087 Handle<String> pattern,
ager@chromium.orga74f0da2008-12-03 16:05:52 +000088 JSRegExp::Flags flags,
89 Handle<String> match_pattern);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000090 static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
91 Handle<String> subject,
92 Handle<Object> index);
93
94 static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
95 Handle<String> subject);
96
ager@chromium.orga74f0da2008-12-03 16:05:52 +000097 static Handle<Object> JscreCompile(Handle<JSRegExp> re,
98 Handle<String> pattern,
99 JSRegExp::Flags flags);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000100
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000101 // Execute a compiled JSCRE pattern.
102 static Handle<Object> JscreExec(Handle<JSRegExp> regexp,
103 Handle<String> subject,
104 Handle<Object> index);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000105
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000106 // Execute an Irregexp bytecode pattern.
107 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
108 Handle<String> subject,
109 Handle<Object> index);
110
111 static Handle<Object> JscreExecGlobal(Handle<JSRegExp> regexp,
112 Handle<String> subject);
113
114 static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
115 Handle<String> subject);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000116
117 static void NewSpaceCollectionPrologue();
118 static void OldSpaceCollectionPrologue();
119
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000120 // Converts a source string to a 16 bit flat string. The string
121 // will be either sequential or it will be a SlicedString backed
122 // by a flat string.
123 static Handle<String> StringToTwoByte(Handle<String> pattern);
124 static Handle<String> CachedStringToTwoByte(Handle<String> pattern);
125
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000126 static const int kIrregexpImplementationIndex = 0;
127 static const int kIrregexpNumberOfCapturesIndex = 1;
128 static const int kIrregexpNumberOfRegistersIndex = 2;
129 static const int kIrregexpCodeIndex = 3;
130 static const int kIrregexpDataLength = 4;
131
132 static const int kJscreNumberOfCapturesIndex = 0;
133 static const int kJscreInternalIndex = 1;
134 static const int kJscreDataLength = 2;
135
136 private:
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000137 static String* last_ascii_string_;
138 static String* two_byte_cached_string_;
139
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000140 static int JscreNumberOfCaptures(Handle<JSRegExp> re);
141 static ByteArray* JscreInternal(Handle<JSRegExp> re);
142
143 static int IrregexpNumberOfCaptures(Handle<JSRegExp> re);
144 static int IrregexpNumberOfRegisters(Handle<JSRegExp> re);
145 static Handle<ByteArray> IrregexpCode(Handle<JSRegExp> re);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000146
147 // Call jsRegExpExecute once
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000148 static Handle<Object> JscreExecOnce(Handle<JSRegExp> regexp,
149 int num_captures,
150 Handle<String> subject,
151 int previous_index,
152 const uc16* utf8_subject,
153 int* ovector,
154 int ovector_length);
155
156 static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> regexp,
157 int num_captures,
158 Handle<String> subject16,
159 int previous_index,
160 int* ovector,
161 int ovector_length);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000162
163 // Set the subject cache. The previous string buffer is not deleted, so the
164 // caller should ensure that it doesn't leak.
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000165 static void SetSubjectCache(String* subject,
166 char* utf8_subject,
167 int uft8_length,
168 int character_position,
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000169 int utf8_position);
170
171 // A one element cache of the last utf8_subject string and its length. The
172 // subject JS String object is cached in the heap. We also cache a
173 // translation between position and utf8 position.
174 static char* utf8_subject_cache_;
175 static int utf8_length_cache_;
176 static int utf8_position_;
177 static int character_position_;
178};
179
180
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000181class CharacterRange {
182 public:
183 CharacterRange() : from_(0), to_(0) { }
184 // For compatibility with the CHECK_OK macro
185 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
186 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
187 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
188 static Vector<const uc16> GetWordBounds();
189 static inline CharacterRange Singleton(uc16 value) {
190 return CharacterRange(value, value);
191 }
192 static inline CharacterRange Range(uc16 from, uc16 to) {
193 ASSERT(from <= to);
194 return CharacterRange(from, to);
195 }
196 static inline CharacterRange Everything() {
197 return CharacterRange(0, 0xFFFF);
198 }
199 bool Contains(uc16 i) { return from_ <= i && i <= to_; }
200 uc16 from() const { return from_; }
201 void set_from(uc16 value) { from_ = value; }
202 uc16 to() const { return to_; }
203 void set_to(uc16 value) { to_ = value; }
204 bool is_valid() { return from_ <= to_; }
205 bool IsSingleton() { return (from_ == to_); }
206 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
207 static void Split(ZoneList<CharacterRange>* base,
208 Vector<const uc16> overlay,
209 ZoneList<CharacterRange>** included,
210 ZoneList<CharacterRange>** excluded);
211
212 static const int kRangeCanonicalizeMax = 0x346;
213 static const int kStartMarker = (1 << 24);
214 static const int kPayloadMask = (1 << 24) - 1;
215
216 private:
217 uc16 from_;
218 uc16 to_;
219};
220
221
222template <typename Node, class Callback>
223static void DoForEach(Node* node, Callback* callback);
224
225
226// A zone splay tree. The config type parameter encapsulates the
227// different configurations of a concrete splay tree:
228//
229// typedef Key: the key type
230// typedef Value: the value type
231// static const kNoKey: the dummy key used when no key is set
232// static const kNoValue: the dummy value used to initialize nodes
233// int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
234//
235template <typename Config>
236class ZoneSplayTree : public ZoneObject {
237 public:
238 typedef typename Config::Key Key;
239 typedef typename Config::Value Value;
240
241 class Locator;
242
243 ZoneSplayTree() : root_(NULL) { }
244
245 // Inserts the given key in this tree with the given value. Returns
246 // true if a node was inserted, otherwise false. If found the locator
247 // is enabled and provides access to the mapping for the key.
248 bool Insert(const Key& key, Locator* locator);
249
250 // Looks up the key in this tree and returns true if it was found,
251 // otherwise false. If the node is found the locator is enabled and
252 // provides access to the mapping for the key.
253 bool Find(const Key& key, Locator* locator);
254
255 // Finds the mapping with the greatest key less than or equal to the
256 // given key.
257 bool FindGreatestLessThan(const Key& key, Locator* locator);
258
259 // Find the mapping with the greatest key in this tree.
260 bool FindGreatest(Locator* locator);
261
262 // Finds the mapping with the least key greater than or equal to the
263 // given key.
264 bool FindLeastGreaterThan(const Key& key, Locator* locator);
265
266 // Find the mapping with the least key in this tree.
267 bool FindLeast(Locator* locator);
268
269 // Remove the node with the given key from the tree.
270 bool Remove(const Key& key);
271
272 bool is_empty() { return root_ == NULL; }
273
274 // Perform the splay operation for the given key. Moves the node with
275 // the given key to the top of the tree. If no node has the given
276 // key, the last node on the search path is moved to the top of the
277 // tree.
278 void Splay(const Key& key);
279
280 class Node : public ZoneObject {
281 public:
282 Node(const Key& key, const Value& value)
283 : key_(key),
284 value_(value),
285 left_(NULL),
286 right_(NULL) { }
287 Key key() { return key_; }
288 Value value() { return value_; }
289 Node* left() { return left_; }
290 Node* right() { return right_; }
291 private:
292 friend class ZoneSplayTree;
293 friend class Locator;
294 Key key_;
295 Value value_;
296 Node* left_;
297 Node* right_;
298 };
299
300 // A locator provides access to a node in the tree without actually
301 // exposing the node.
302 class Locator {
303 public:
304 explicit Locator(Node* node) : node_(node) { }
305 Locator() : node_(NULL) { }
306 const Key& key() { return node_->key_; }
307 Value& value() { return node_->value_; }
308 void set_value(const Value& value) { node_->value_ = value; }
309 inline void bind(Node* node) { node_ = node; }
310 private:
311 Node* node_;
312 };
313
314 template <class Callback>
315 void ForEach(Callback* c) {
316 DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c);
317 }
318
319 private:
320 Node* root_;
321};
322
323
324// A set of unsigned integers that behaves especially well on small
325// integers (< 32). May do zone-allocation.
326class OutSet: public ZoneObject {
327 public:
328 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
329 OutSet* Extend(unsigned value);
330 bool Get(unsigned value);
331 static const unsigned kFirstLimit = 32;
332
333 private:
334 // Destructively set a value in this set. In most cases you want
335 // to use Extend instead to ensure that only one instance exists
336 // that contains the same values.
337 void Set(unsigned value);
338
339 // The successors are a list of sets that contain the same values
340 // as this set and the one more value that is not present in this
341 // set.
342 ZoneList<OutSet*>* successors() { return successors_; }
343
344 OutSet(uint32_t first, ZoneList<unsigned>* remaining)
345 : first_(first), remaining_(remaining), successors_(NULL) { }
346 uint32_t first_;
347 ZoneList<unsigned>* remaining_;
348 ZoneList<OutSet*>* successors_;
349};
350
351
352// A mapping from integers, specified as ranges, to a set of integers.
353// Used for mapping character ranges to choices.
354class DispatchTable : public ZoneObject {
355 public:
356 class Entry {
357 public:
358 Entry() : from_(0), to_(0), out_set_(NULL) { }
359 Entry(uc16 from, uc16 to, OutSet* out_set)
360 : from_(from), to_(to), out_set_(out_set) { }
361 uc16 from() { return from_; }
362 uc16 to() { return to_; }
363 void set_to(uc16 value) { to_ = value; }
364 void AddValue(int value) { out_set_ = out_set_->Extend(value); }
365 OutSet* out_set() { return out_set_; }
366 private:
367 uc16 from_;
368 uc16 to_;
369 OutSet* out_set_;
370 };
371
372 class Config {
373 public:
374 typedef uc16 Key;
375 typedef Entry Value;
376 static const uc16 kNoKey;
377 static const Entry kNoValue;
378 static inline int Compare(uc16 a, uc16 b) {
379 if (a == b)
380 return 0;
381 else if (a < b)
382 return -1;
383 else
384 return 1;
385 }
386 };
387
388 void AddRange(CharacterRange range, int value);
389 OutSet* Get(uc16 value);
390 void Dump();
391
392 template <typename Callback>
393 void ForEach(Callback* callback) { return tree()->ForEach(callback); }
394 private:
395 // There can't be a static empty set since it allocates its
396 // successors in a zone and caches them.
397 OutSet* empty() { return &empty_; }
398 OutSet empty_;
399 ZoneSplayTree<Config>* tree() { return &tree_; }
400 ZoneSplayTree<Config> tree_;
401};
402
403
404#define FOR_EACH_NODE_TYPE(VISIT) \
405 VISIT(End) \
406 VISIT(Action) \
407 VISIT(Choice) \
408 VISIT(BackReference) \
409 VISIT(Text)
410
411
412#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
413 VISIT(Disjunction) \
414 VISIT(Alternative) \
415 VISIT(Assertion) \
416 VISIT(CharacterClass) \
417 VISIT(Atom) \
418 VISIT(Quantifier) \
419 VISIT(Capture) \
420 VISIT(Lookahead) \
421 VISIT(BackReference) \
422 VISIT(Empty) \
423 VISIT(Text)
424
425
426#define FORWARD_DECLARE(Name) class RegExp##Name;
427FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
428#undef FORWARD_DECLARE
429
430
431class TextElement {
432 public:
433 enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
434 TextElement() : type(UNINITIALIZED) { }
435 explicit TextElement(Type t) : type(t) { }
436 static TextElement Atom(RegExpAtom* atom);
437 static TextElement CharClass(RegExpCharacterClass* char_class);
438 Type type;
439 union {
440 RegExpAtom* u_atom;
441 RegExpCharacterClass* u_char_class;
442 } data;
443};
444
445
446struct NodeInfo {
447 enum TriBool {
448 UNKNOWN = -1, FALSE = 0, TRUE = 1
449 };
450
451 NodeInfo()
452 : being_analyzed(false),
453 been_analyzed(false),
454 being_expanded(false),
455 been_expanded(false),
456 determine_word(false),
457 determine_newline(false),
458 determine_start(false),
459 does_determine_word(false),
460 does_determine_newline(false),
461 does_determine_start(false),
462 follows_word_interest(false),
463 follows_newline_interest(false),
464 follows_start_interest(false),
465 is_word(UNKNOWN),
466 is_newline(UNKNOWN),
467 at_end(false),
468 follows_word(UNKNOWN),
469 follows_newline(UNKNOWN),
470 follows_start(UNKNOWN),
471 visited(false) { }
472
473 // Returns true if the interests and assumptions of this node
474 // matches the given one.
475 bool Matches(NodeInfo* that) {
476 return (at_end == that->at_end) &&
477 (follows_word_interest == that->follows_word_interest) &&
478 (follows_newline_interest == that->follows_newline_interest) &&
479 (follows_start_interest == that->follows_start_interest) &&
480 (follows_word == that->follows_word) &&
481 (follows_newline == that->follows_newline) &&
482 (follows_start == that->follows_start) &&
483 (does_determine_word == that->does_determine_word) &&
484 (does_determine_newline == that->does_determine_newline) &&
485 (does_determine_start == that->does_determine_start);
486 }
487
488 bool HasAssertions() {
489 return (follows_word != UNKNOWN) ||
490 (follows_newline != UNKNOWN) ||
491 (follows_start != UNKNOWN);
492 }
493
494 // Updates the interests of this node given the interests of the
495 // node preceding it.
496 void AddFromPreceding(NodeInfo* that) {
497 at_end |= that->at_end;
498 follows_word_interest |= that->follows_word_interest;
499 follows_newline_interest |= that->follows_newline_interest;
500 follows_start_interest |= that->follows_start_interest;
501 }
502
503 void AddAssumptions(NodeInfo* that) {
504 if (that->follows_word != UNKNOWN) {
505 ASSERT(follows_word == UNKNOWN || follows_word == that->follows_word);
506 follows_word = that->follows_word;
507 }
508 if (that->follows_newline != UNKNOWN) {
509 ASSERT(follows_newline == UNKNOWN ||
510 follows_newline == that->follows_newline);
511 follows_newline = that->follows_newline;
512 }
513 if (that->follows_start != UNKNOWN) {
514 ASSERT(follows_start == UNKNOWN ||
515 follows_start == that->follows_start);
516 follows_start = that->follows_start;
517 }
518 does_determine_word = that->does_determine_word;
519 does_determine_newline = that->does_determine_newline;
520 does_determine_start = that->does_determine_start;
521 }
522
523 // Sets the interests of this node to include the interests of the
524 // following node.
525 void AddFromFollowing(NodeInfo* that) {
526 follows_word_interest |= that->follows_word_interest;
527 follows_newline_interest |= that->follows_newline_interest;
528 follows_start_interest |= that->follows_start_interest;
529 }
530
531 void ResetCompilationState() {
532 being_analyzed = false;
533 been_analyzed = false;
534 being_expanded = false;
535 been_expanded = false;
536 }
537
538 bool being_analyzed: 1;
539 bool been_analyzed: 1;
540 bool being_expanded: 1;
541 bool been_expanded: 1;
542
543 // These bits are set if this node must propagate forward information
544 // about the last character it consumed (or, in the case of 'start',
545 // if it is at the start of the input).
546 bool determine_word: 1;
547 bool determine_newline: 1;
548 bool determine_start: 1;
549
550 bool does_determine_word: 1;
551 bool does_determine_newline: 1;
552 bool does_determine_start: 1;
553
554 // These bits are set of this node has to know what the preceding
555 // character was.
556 bool follows_word_interest: 1;
557 bool follows_newline_interest: 1;
558 bool follows_start_interest: 1;
559
560 TriBool is_word: 2;
561 TriBool is_newline: 2;
562
563 bool at_end: 1;
564
565 // These bits are set if the node can make assumptions about what
566 // the previous character was.
567 TriBool follows_word: 2;
568 TriBool follows_newline: 2;
569 TriBool follows_start: 2;
570
571 bool visited: 1;
572};
573
574
575class ExpansionGuard {
576 public:
577 explicit inline ExpansionGuard(NodeInfo* info) : info_(info) {
578 ASSERT(!info->being_expanded);
579 info->being_expanded = true;
580 }
581 inline ~ExpansionGuard() {
582 info_->being_expanded = false;
583 }
584 private:
585 NodeInfo* info_;
586};
587
588
589class SiblingList {
590 public:
591 SiblingList() : list_(NULL) { }
592 int length() {
593 return list_ == NULL ? 0 : list_->length();
594 }
595 void Ensure(RegExpNode* parent) {
596 if (list_ == NULL) {
597 list_ = new ZoneList<RegExpNode*>(2);
598 list_->Add(parent);
599 }
600 }
601 void Add(RegExpNode* node) { list_->Add(node); }
602 RegExpNode* Get(int index) { return list_->at(index); }
603 private:
604 ZoneList<RegExpNode*>* list_;
605};
606
607
608class RegExpNode: public ZoneObject {
609 public:
610 virtual ~RegExpNode() { }
611 virtual void Accept(NodeVisitor* visitor) = 0;
612 // Generates a goto to this node or actually generates the code at this point.
613 // Until the implementation is complete we will return true for success and
614 // false for failure.
615 virtual bool GoTo(RegExpCompiler* compiler);
616 Label* label();
617
618 // Until the implementation is complete we will return true for success and
619 // false for failure.
620 virtual bool Emit(RegExpCompiler* compiler) = 0;
621
622 RegExpNode* EnsureExpanded(NodeInfo* info);
623 virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
624 virtual void ExpandChildren() = 0;
625
626 // Propagates the given interest information forward. When seeing
627 // \bfoo for instance, the \b is implemented by propagating forward
628 // to the 'foo' string that it should only succeed if its first
629 // character is a letter xor the previous character was a letter.
630 virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
631
632 NodeInfo* info() { return &info_; }
633 virtual bool IsBacktrack() { return false; }
634
635 void AddSibling(RegExpNode* node) { siblings_.Add(node); }
636
637 // Static version of EnsureSibling that expresses the fact that the
638 // result has the same type as the input.
639 template <class C>
640 static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) {
641 return static_cast<C*>(node->EnsureSibling(info, cloned));
642 }
643
644 SiblingList* siblings() { return &siblings_; }
645 void set_siblings(SiblingList* other) { siblings_ = *other; }
646
647 protected:
648
649 // Returns a sibling of this node whose interests and assumptions
650 // match the ones in the given node info. If no sibling exists NULL
651 // is returned.
652 RegExpNode* TryGetSibling(NodeInfo* info);
653
654 // Returns a sibling of this node whose interests match the ones in
655 // the given node info. The info must not contain any assertions.
656 // If no node exists a new one will be created by cloning the current
657 // node. The result will always be an instance of the same concrete
658 // class as this node.
659 RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned);
660
661 // Returns a clone of this node initialized using the copy constructor
662 // of its concrete class. Note that the node may have to be pre-
663 // processed before it is on a useable state.
664 virtual RegExpNode* Clone() = 0;
665
666 inline void Bind(RegExpMacroAssembler* macro);
667
668 private:
669 Label label_;
670 NodeInfo info_;
671 SiblingList siblings_;
672};
673
674
675class SeqRegExpNode: public RegExpNode {
676 public:
677 explicit SeqRegExpNode(RegExpNode* on_success)
678 : on_success_(on_success) { }
679 RegExpNode* on_success() { return on_success_; }
680 void set_on_success(RegExpNode* node) { on_success_ = node; }
681 virtual bool Emit(RegExpCompiler* compiler) { return false; }
682 private:
683 RegExpNode* on_success_;
684};
685
686
687class ActionNode: public SeqRegExpNode {
688 public:
689 enum Type {
690 STORE_REGISTER,
691 INCREMENT_REGISTER,
692 STORE_POSITION,
693 RESTORE_POSITION,
694 BEGIN_SUBMATCH,
695 ESCAPE_SUBMATCH
696 };
697 static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
698 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
699 static ActionNode* StorePosition(int reg, RegExpNode* on_success);
700 static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
701 static ActionNode* BeginSubmatch(int stack_pointer_reg,
702 int position_reg,
703 RegExpNode* on_success);
704 static ActionNode* EscapeSubmatch(int stack_pointer_reg,
705 bool and_restore_position,
706 int restore_reg,
707 RegExpNode* on_success);
708 virtual void Accept(NodeVisitor* visitor);
709 virtual bool Emit(RegExpCompiler* compiler);
710 virtual RegExpNode* ExpandLocal(NodeInfo* info);
711 virtual void ExpandChildren();
712 virtual RegExpNode* PropagateForward(NodeInfo* info);
713 virtual ActionNode* Clone() { return new ActionNode(*this); }
714
715 private:
716 union {
717 struct {
718 int reg;
719 int value;
720 } u_store_register;
721 struct {
722 int reg;
723 } u_increment_register;
724 struct {
725 int reg;
726 } u_position_register;
727 struct {
728 int stack_pointer_register;
729 int current_position_register;
730 } u_submatch;
731 } data_;
732 ActionNode(Type type, RegExpNode* on_success)
733 : SeqRegExpNode(on_success),
734 type_(type) { }
735 Type type_;
736 friend class DotPrinter;
737};
738
739
740class TextNode: public SeqRegExpNode {
741 public:
742 TextNode(ZoneList<TextElement>* elms,
743 RegExpNode* on_success,
744 RegExpNode* on_failure)
745 : SeqRegExpNode(on_success),
746 on_failure_(on_failure),
747 elms_(elms) { }
748 TextNode(RegExpCharacterClass* that,
749 RegExpNode* on_success,
750 RegExpNode* on_failure)
751 : SeqRegExpNode(on_success),
752 on_failure_(on_failure),
753 elms_(new ZoneList<TextElement>(1)) {
754 elms_->Add(TextElement::CharClass(that));
755 }
756 virtual void Accept(NodeVisitor* visitor);
757 virtual RegExpNode* PropagateForward(NodeInfo* info);
758 virtual RegExpNode* ExpandLocal(NodeInfo* info);
759 virtual void ExpandChildren();
760 RegExpNode* on_failure() { return on_failure_; }
761 virtual bool Emit(RegExpCompiler* compiler);
762 ZoneList<TextElement>* elements() { return elms_; }
763 void MakeCaseIndependent();
764 virtual TextNode* Clone() { return new TextNode(*this); }
765
766 private:
767 void ExpandAtomChildren(RegExpAtom* that);
768 void ExpandCharClassChildren(RegExpCharacterClass* that);
769
770 RegExpNode* on_failure_;
771 ZoneList<TextElement>* elms_;
772};
773
774
775class BackReferenceNode: public SeqRegExpNode {
776 public:
777 BackReferenceNode(int start_reg,
778 int end_reg,
779 RegExpNode* on_success,
780 RegExpNode* on_failure)
781 : SeqRegExpNode(on_success),
782 on_failure_(on_failure),
783 start_reg_(start_reg),
784 end_reg_(end_reg) { }
785 virtual void Accept(NodeVisitor* visitor);
786 RegExpNode* on_failure() { return on_failure_; }
787 int start_register() { return start_reg_; }
788 int end_register() { return end_reg_; }
789 virtual bool Emit(RegExpCompiler* compiler);
790 virtual RegExpNode* PropagateForward(NodeInfo* info);
791 virtual RegExpNode* ExpandLocal(NodeInfo* info);
792 virtual void ExpandChildren();
793 virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
794
795 private:
796 RegExpNode* on_failure_;
797 int start_reg_;
798 int end_reg_;
799};
800
801
802class EndNode: public RegExpNode {
803 public:
804 enum Action { ACCEPT, BACKTRACK };
805 explicit EndNode(Action action) : action_(action) { }
806 virtual void Accept(NodeVisitor* visitor);
807 virtual bool Emit(RegExpCompiler* compiler);
808 virtual RegExpNode* PropagateForward(NodeInfo* info);
809 virtual RegExpNode* ExpandLocal(NodeInfo* info);
810 virtual void ExpandChildren();
811 virtual bool IsBacktrack() { return action_ == BACKTRACK; }
812 virtual bool GoTo(RegExpCompiler* compiler);
813 virtual EndNode* Clone() { return new EndNode(*this); }
814
815 private:
816 Action action_;
817};
818
819
820class Guard: public ZoneObject {
821 public:
822 enum Relation { LT, GEQ };
823 Guard(int reg, Relation op, int value)
824 : reg_(reg),
825 op_(op),
826 value_(value) { }
827 int reg() { return reg_; }
828 Relation op() { return op_; }
829 int value() { return value_; }
830
831 private:
832 int reg_;
833 Relation op_;
834 int value_;
835};
836
837
838class GuardedAlternative {
839 public:
840 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
841 void AddGuard(Guard* guard);
842 RegExpNode* node() { return node_; }
843 void set_node(RegExpNode* node) { node_ = node; }
844 ZoneList<Guard*>* guards() { return guards_; }
845
846 private:
847 RegExpNode* node_;
848 ZoneList<Guard*>* guards_;
849};
850
851
852class ChoiceNode: public RegExpNode {
853 public:
854 explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
855 : on_failure_(on_failure),
856 alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
857 table_(NULL),
858 being_calculated_(false) { }
859 virtual void Accept(NodeVisitor* visitor);
860 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
861 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
862 DispatchTable* GetTable(bool ignore_case);
863 RegExpNode* on_failure() { return on_failure_; }
864 virtual bool Emit(RegExpCompiler* compiler);
865 virtual RegExpNode* PropagateForward(NodeInfo* info);
866 virtual RegExpNode* ExpandLocal(NodeInfo* info);
867 virtual void ExpandChildren();
868 virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
869
870 bool being_calculated() { return being_calculated_; }
871 void set_being_calculated(bool b) { being_calculated_ = b; }
872
873 private:
874 friend class DispatchTableConstructor;
875 friend class Analysis;
876 void GenerateGuard(RegExpMacroAssembler* macro_assembler,
877 Guard *guard,
878 Label* on_failure);
879 RegExpNode* on_failure_;
880 ZoneList<GuardedAlternative>* alternatives_;
881 DispatchTable* table_;
882 bool being_calculated_;
883};
884
885
886class NodeVisitor {
887 public:
888 virtual ~NodeVisitor() { }
889#define DECLARE_VISIT(Type) \
890 virtual void Visit##Type(Type##Node* that) = 0;
891FOR_EACH_NODE_TYPE(DECLARE_VISIT)
892#undef DECLARE_VISIT
893};
894
895
896// Node visitor used to add the start set of the alternatives to the
897// dispatch table of a choice node.
898class DispatchTableConstructor: public NodeVisitor {
899 public:
900 DispatchTableConstructor(DispatchTable* table, bool ignore_case)
901 : table_(table),
902 choice_index_(-1),
903 ignore_case_(ignore_case) { }
904
905 void BuildTable(ChoiceNode* node);
906
907 void AddRange(CharacterRange range) {
908 table()->AddRange(range, choice_index_);
909 }
910
911 void AddInverse(ZoneList<CharacterRange>* ranges);
912
913#define DECLARE_VISIT(Type) \
914 virtual void Visit##Type(Type##Node* that);
915FOR_EACH_NODE_TYPE(DECLARE_VISIT)
916#undef DECLARE_VISIT
917
918 DispatchTable* table() { return table_; }
919 void set_choice_index(int value) { choice_index_ = value; }
920
921 protected:
922 DispatchTable *table_;
923 int choice_index_;
924 bool ignore_case_;
925};
926
927
928class Analysis: public NodeVisitor {
929 public:
930 explicit Analysis(bool ignore_case)
931 : ignore_case_(ignore_case) { }
932 void EnsureAnalyzed(RegExpNode* node);
933
934#define DECLARE_VISIT(Type) \
935 virtual void Visit##Type(Type##Node* that);
936FOR_EACH_NODE_TYPE(DECLARE_VISIT)
937#undef DECLARE_VISIT
938
939 private:
940 bool ignore_case_;
941
942 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
943};
944
945
946struct RegExpParseResult {
947 RegExpTree* tree;
948 bool has_character_escapes;
949 Handle<String> error;
950 int capture_count;
951};
952
953
954class RegExpEngine: public AllStatic {
955 public:
956 static Handle<FixedArray> Compile(RegExpParseResult* input,
957 RegExpNode** node_return,
958 bool ignore_case,
959 bool multiline);
960 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
961};
962
963
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000964} } // namespace v8::internal
965
966#endif // V8_JSREGEXP_H_