blob: 782c5b0b201ec1a6c02e8f23fcd36ebf64f3dcfc [file] [log] [blame]
yangguo@chromium.org659ceec2012-01-26 07:37:54 +00001// Copyright 2012 the V8 project authors. All rights reserved.
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_JSREGEXP_H_
29#define V8_JSREGEXP_H_
30
lrn@chromium.org1c092762011-05-09 09:42:16 +000031#include "allocation.h"
erik.corry@gmail.comc3b670f2011-10-05 21:44:48 +000032#include "assembler.h"
ager@chromium.orgce5e87b2010-03-10 10:24:18 +000033#include "zone-inl.h"
christian.plesner.hansen@gmail.com9d58c2b2009-10-16 11:48:38 +000034
kasperl@chromium.org71affb52009-05-26 05:44:31 +000035namespace v8 {
36namespace internal {
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000037
yangguo@chromium.org659ceec2012-01-26 07:37:54 +000038class NodeVisitor;
39class RegExpCompiler;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000040class RegExpMacroAssembler;
yangguo@chromium.org659ceec2012-01-26 07:37:54 +000041class RegExpNode;
42class RegExpTree;
erik.corry@gmail.comed49e962012-04-17 11:57:53 +000043class BoyerMooreLookahead;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000044
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000045class RegExpImpl {
46 public:
kasperl@chromium.org68ac0092009-07-09 06:00:35 +000047 // Whether V8 is compiled with native regexp support or not.
48 static bool UsesNativeRegExp() {
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000049#ifdef V8_INTERPRETED_REGEXP
kasperl@chromium.org68ac0092009-07-09 06:00:35 +000050 return false;
ricow@chromium.orgc9c80822010-04-21 08:22:37 +000051#else
52 return true;
ager@chromium.orgbb29dc92009-03-24 13:25:23 +000053#endif
54 }
kasperl@chromium.org68ac0092009-07-09 06:00:35 +000055
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000056 // Creates a regular expression literal in the old space.
57 // This function calls the garbage collector if necessary.
mads.s.ager@gmail.com9a4089a2008-09-01 08:55:01 +000058 static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
59 Handle<String> pattern,
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000060 Handle<String> flags,
61 bool* has_pending_exception);
62
63 // Returns a string representation of a regular expression.
64 // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
65 // This function calls the garbage collector if necessary.
66 static Handle<String> ToString(Handle<Object> value);
67
ager@chromium.org8bb60582008-12-11 12:02:20 +000068 // Parses the RegExp pattern and prepares the JSRegExp object with
69 // generic data and choice of implementation - as well as what
70 // the implementation wants to store in the data field.
kasperl@chromium.org7be3c992009-03-12 07:19:55 +000071 // Returns false if compilation fails.
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000072 static Handle<Object> Compile(Handle<JSRegExp> re,
73 Handle<String> pattern,
74 Handle<String> flags);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000075
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000076 // See ECMA-262 section 15.10.6.2.
77 // This function calls the garbage collector if necessary.
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000078 static Handle<Object> Exec(Handle<JSRegExp> regexp,
79 Handle<String> subject,
kasperl@chromium.org7be3c992009-03-12 07:19:55 +000080 int index,
rossberg@chromium.org400388e2012-06-06 09:29:22 +000081 Handle<JSArray> lastMatchInfo,
82 Zone* zone);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +000083
ager@chromium.org8bb60582008-12-11 12:02:20 +000084 // Prepares a JSRegExp object with Irregexp-specific data.
whesse@chromium.orgcec079d2010-03-22 14:44:04 +000085 static void IrregexpInitialize(Handle<JSRegExp> re,
86 Handle<String> pattern,
87 JSRegExp::Flags flags,
88 int capture_register_count);
ager@chromium.orga74f0da2008-12-03 16:05:52 +000089
90
kasperl@chromium.org7be3c992009-03-12 07:19:55 +000091 static void AtomCompile(Handle<JSRegExp> re,
92 Handle<String> pattern,
93 JSRegExp::Flags flags,
94 Handle<String> match_pattern);
95
kasperl@chromium.org41044eb2008-10-06 08:24:46 +000096 static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
97 Handle<String> subject,
kasperl@chromium.org7be3c992009-03-12 07:19:55 +000098 int index,
99 Handle<JSArray> lastMatchInfo);
kasperl@chromium.org41044eb2008-10-06 08:24:46 +0000100
whesse@chromium.orgcec079d2010-03-22 14:44:04 +0000101 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
102
103 // Prepare a RegExp for being executed one or more times (using
104 // IrregexpExecOnce) on the subject.
105 // This ensures that the regexp is compiled for the subject, and that
106 // the subject is flat.
107 // Returns the number of integer spaces required by IrregexpExecOnce
108 // as its "registers" argument. If the regexp cannot be compiled,
109 // an exception is set as pending, and this function returns negative.
110 static int IrregexpPrepare(Handle<JSRegExp> regexp,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000111 Handle<String> subject,
112 Zone* zone);
whesse@chromium.orgcec079d2010-03-22 14:44:04 +0000113
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000114 // Calculate the size of offsets vector for the case of global regexp
115 // and the number of matches this vector is able to store.
116 static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
117 int registers_per_match,
118 int* max_matches);
119
120 // Execute a regular expression on the subject, starting from index.
121 // If matching succeeds, return the number of matches. This can be larger
122 // than one in the case of global regular expressions.
123 // The captures and subcaptures are stored into the registers vector.
whesse@chromium.orgcec079d2010-03-22 14:44:04 +0000124 // If matching fails, returns RE_FAILURE.
125 // If execution fails, sets a pending exception and returns RE_EXCEPTION.
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +0000126 static int IrregexpExecRaw(Handle<JSRegExp> regexp,
127 Handle<String> subject,
128 int index,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000129 Vector<int> registers,
130 Zone* zone);
whesse@chromium.orgcec079d2010-03-22 14:44:04 +0000131
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000132 // Execute an Irregexp bytecode pattern.
ager@chromium.org41826e72009-03-30 13:30:57 +0000133 // On a successful match, the result is a JSArray containing
134 // captured positions. On a failure, the result is the null value.
135 // Returns an empty handle in case of an exception.
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000136 static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
137 Handle<String> subject,
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000138 int index,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000139 Handle<JSArray> lastMatchInfo,
140 Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000141
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000142 // Array index in the lastMatchInfo array.
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000143 static const int kLastCaptureCount = 0;
144 static const int kLastSubject = 1;
145 static const int kLastInput = 2;
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000146 static const int kFirstCapture = 3;
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000147 static const int kLastMatchOverhead = 3;
148
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000149 // Direct offset into the lastMatchInfo array.
150 static const int kLastCaptureCountOffset =
151 FixedArray::kHeaderSize + kLastCaptureCount * kPointerSize;
152 static const int kLastSubjectOffset =
153 FixedArray::kHeaderSize + kLastSubject * kPointerSize;
154 static const int kLastInputOffset =
155 FixedArray::kHeaderSize + kLastInput * kPointerSize;
156 static const int kFirstCaptureOffset =
157 FixedArray::kHeaderSize + kFirstCapture * kPointerSize;
158
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000159 // Used to access the lastMatchInfo array.
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000160 static int GetCapture(FixedArray* array, int index) {
161 return Smi::cast(array->get(index + kFirstCapture))->value();
162 }
163
164 static void SetLastCaptureCount(FixedArray* array, int to) {
165 array->set(kLastCaptureCount, Smi::FromInt(to));
166 }
167
168 static void SetLastSubject(FixedArray* array, String* to) {
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000169 array->set(kLastSubject, to);
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000170 }
171
172 static void SetLastInput(FixedArray* array, String* to) {
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000173 array->set(kLastInput, to);
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000174 }
175
176 static void SetCapture(FixedArray* array, int index, int to) {
177 array->set(index + kFirstCapture, Smi::FromInt(to));
178 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000179
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000180 static int GetLastCaptureCount(FixedArray* array) {
181 return Smi::cast(array->get(kLastCaptureCount))->value();
182 }
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000183
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000184 // For acting on the JSRegExp data FixedArray.
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000185 static int IrregexpMaxRegisterCount(FixedArray* re);
186 static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
187 static int IrregexpNumberOfCaptures(FixedArray* re);
188 static int IrregexpNumberOfRegisters(FixedArray* re);
189 static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
190 static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000191
karlklose@chromium.org83a47282011-05-11 11:54:09 +0000192 // Limit the space regexps take up on the heap. In order to limit this we
193 // would like to keep track of the amount of regexp code on the heap. This
194 // is not tracked, however. As a conservative approximation we track the
195 // total regexp code compiled including code that has subsequently been freed
196 // and the total executable memory at any point.
197 static const int kRegExpExecutableMemoryLimit = 16 * MB;
198 static const int kRegWxpCompiledLimit = 1 * MB;
199
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000200 private:
201 static String* last_ascii_string_;
202 static String* two_byte_cached_string_;
203
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000204 static bool CompileIrregexp(
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000205 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii,
206 Zone* zone);
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000207 static inline bool EnsureCompiledIrregexp(
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000208 Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii,
209 Zone* zone);
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000210
211
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000212 // Set the subject cache. The previous string buffer is not deleted, so the
213 // caller should ensure that it doesn't leak.
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000214 static void SetSubjectCache(String* subject,
215 char* utf8_subject,
216 int uft8_length,
217 int character_position,
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +0000218 int utf8_position);
219
220 // A one element cache of the last utf8_subject string and its length. The
221 // subject JS String object is cached in the heap. We also cache a
222 // translation between position and utf8 position.
223 static char* utf8_subject_cache_;
224 static int utf8_length_cache_;
225 static int utf8_position_;
226 static int character_position_;
227};
228
229
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000230// Represents the location of one element relative to the intersection of
231// two sets. Corresponds to the four areas of a Venn diagram.
232enum ElementInSetsRelation {
233 kInsideNone = 0,
234 kInsideFirst = 1,
235 kInsideSecond = 2,
236 kInsideBoth = 3
237};
238
239
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000240// Represents code units in the range from from_ to to_, both ends are
241// inclusive.
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000242class CharacterRange {
243 public:
244 CharacterRange() : from_(0), to_(0) { }
245 // For compatibility with the CHECK_OK macro
246 CharacterRange(void* null) { ASSERT_EQ(NULL, null); } //NOLINT
247 CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000248 static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
249 Zone* zone);
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000250 static Vector<const int> GetWordBounds();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000251 static inline CharacterRange Singleton(uc16 value) {
252 return CharacterRange(value, value);
253 }
254 static inline CharacterRange Range(uc16 from, uc16 to) {
255 ASSERT(from <= to);
256 return CharacterRange(from, to);
257 }
258 static inline CharacterRange Everything() {
259 return CharacterRange(0, 0xFFFF);
260 }
261 bool Contains(uc16 i) { return from_ <= i && i <= to_; }
262 uc16 from() const { return from_; }
263 void set_from(uc16 value) { from_ = value; }
264 uc16 to() const { return to_; }
265 void set_to(uc16 value) { to_ = value; }
266 bool is_valid() { return from_ <= to_; }
ager@chromium.org8bb60582008-12-11 12:02:20 +0000267 bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000268 bool IsSingleton() { return (from_ == to_); }
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000269 void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii,
270 Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000271 static void Split(ZoneList<CharacterRange>* base,
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000272 Vector<const int> overlay,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000273 ZoneList<CharacterRange>** included,
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000274 ZoneList<CharacterRange>** excluded,
275 Zone* zone);
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000276 // Whether a range list is in canonical form: Ranges ordered by from value,
277 // and ranges non-overlapping and non-adjacent.
278 static bool IsCanonical(ZoneList<CharacterRange>* ranges);
279 // Convert range list to canonical form. The characters covered by the ranges
280 // will still be the same, but no character is in more than one range, and
281 // adjacent ranges are merged. The resulting list may be shorter than the
282 // original, but cannot be longer.
283 static void Canonicalize(ZoneList<CharacterRange>* ranges);
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000284 // Negate the contents of a character range in canonical form.
285 static void Negate(ZoneList<CharacterRange>* src,
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000286 ZoneList<CharacterRange>* dst,
287 Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000288 static const int kStartMarker = (1 << 24);
289 static const int kPayloadMask = (1 << 24) - 1;
290
291 private:
292 uc16 from_;
293 uc16 to_;
294};
295
296
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000297// A set of unsigned integers that behaves especially well on small
298// integers (< 32). May do zone-allocation.
299class OutSet: public ZoneObject {
300 public:
301 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000302 OutSet* Extend(unsigned value, Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000303 bool Get(unsigned value);
304 static const unsigned kFirstLimit = 32;
305
306 private:
307 // Destructively set a value in this set. In most cases you want
308 // to use Extend instead to ensure that only one instance exists
309 // that contains the same values.
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000310 void Set(unsigned value, Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000311
312 // The successors are a list of sets that contain the same values
313 // as this set and the one more value that is not present in this
314 // set.
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000315 ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000316
317 OutSet(uint32_t first, ZoneList<unsigned>* remaining)
318 : first_(first), remaining_(remaining), successors_(NULL) { }
319 uint32_t first_;
320 ZoneList<unsigned>* remaining_;
321 ZoneList<OutSet*>* successors_;
ager@chromium.org32912102009-01-16 10:38:43 +0000322 friend class Trace;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000323};
324
325
326// A mapping from integers, specified as ranges, to a set of integers.
327// Used for mapping character ranges to choices.
328class DispatchTable : public ZoneObject {
329 public:
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000330 explicit DispatchTable(Zone* zone) : tree_(zone) { }
331
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000332 class Entry {
333 public:
334 Entry() : from_(0), to_(0), out_set_(NULL) { }
335 Entry(uc16 from, uc16 to, OutSet* out_set)
336 : from_(from), to_(to), out_set_(out_set) { }
337 uc16 from() { return from_; }
338 uc16 to() { return to_; }
339 void set_to(uc16 value) { to_ = value; }
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000340 void AddValue(int value, Zone* zone) {
341 out_set_ = out_set_->Extend(value, zone);
342 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000343 OutSet* out_set() { return out_set_; }
344 private:
345 uc16 from_;
346 uc16 to_;
347 OutSet* out_set_;
348 };
349
350 class Config {
351 public:
352 typedef uc16 Key;
353 typedef Entry Value;
354 static const uc16 kNoKey;
svenpanne@chromium.orga8bb4d92011-10-10 13:20:40 +0000355 static const Entry NoValue() { return Value(); }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000356 static inline int Compare(uc16 a, uc16 b) {
357 if (a == b)
358 return 0;
359 else if (a < b)
360 return -1;
361 else
362 return 1;
363 }
364 };
365
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000366 void AddRange(CharacterRange range, int value, Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000367 OutSet* Get(uc16 value);
368 void Dump();
369
370 template <typename Callback>
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000371 void ForEach(Callback* callback) {
372 return tree()->ForEach(callback);
373 }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000374
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000375 private:
376 // There can't be a static empty set since it allocates its
377 // successors in a zone and caches them.
378 OutSet* empty() { return &empty_; }
379 OutSet empty_;
380 ZoneSplayTree<Config>* tree() { return &tree_; }
381 ZoneSplayTree<Config> tree_;
382};
383
384
385#define FOR_EACH_NODE_TYPE(VISIT) \
386 VISIT(End) \
387 VISIT(Action) \
388 VISIT(Choice) \
389 VISIT(BackReference) \
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000390 VISIT(Assertion) \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000391 VISIT(Text)
392
393
394#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
395 VISIT(Disjunction) \
396 VISIT(Alternative) \
397 VISIT(Assertion) \
398 VISIT(CharacterClass) \
399 VISIT(Atom) \
400 VISIT(Quantifier) \
401 VISIT(Capture) \
402 VISIT(Lookahead) \
403 VISIT(BackReference) \
404 VISIT(Empty) \
405 VISIT(Text)
406
407
408#define FORWARD_DECLARE(Name) class RegExp##Name;
409FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
410#undef FORWARD_DECLARE
411
412
413class TextElement {
414 public:
415 enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
416 TextElement() : type(UNINITIALIZED) { }
ager@chromium.org8bb60582008-12-11 12:02:20 +0000417 explicit TextElement(Type t) : type(t), cp_offset(-1) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000418 static TextElement Atom(RegExpAtom* atom);
419 static TextElement CharClass(RegExpCharacterClass* char_class);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000420 int length();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000421 Type type;
422 union {
423 RegExpAtom* u_atom;
424 RegExpCharacterClass* u_char_class;
425 } data;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000426 int cp_offset;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000427};
428
429
ager@chromium.org32912102009-01-16 10:38:43 +0000430class Trace;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000431
432
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000433struct NodeInfo {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000434 NodeInfo()
435 : being_analyzed(false),
436 been_analyzed(false),
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000437 follows_word_interest(false),
438 follows_newline_interest(false),
439 follows_start_interest(false),
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000440 at_end(false),
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000441 visited(false),
442 replacement_calculated(false) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000443
444 // Returns true if the interests and assumptions of this node
445 // matches the given one.
446 bool Matches(NodeInfo* that) {
447 return (at_end == that->at_end) &&
448 (follows_word_interest == that->follows_word_interest) &&
449 (follows_newline_interest == that->follows_newline_interest) &&
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000450 (follows_start_interest == that->follows_start_interest);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000451 }
452
453 // Updates the interests of this node given the interests of the
454 // node preceding it.
455 void AddFromPreceding(NodeInfo* that) {
456 at_end |= that->at_end;
457 follows_word_interest |= that->follows_word_interest;
458 follows_newline_interest |= that->follows_newline_interest;
459 follows_start_interest |= that->follows_start_interest;
460 }
461
ager@chromium.org8bb60582008-12-11 12:02:20 +0000462 bool HasLookbehind() {
463 return follows_word_interest ||
464 follows_newline_interest ||
465 follows_start_interest;
466 }
467
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000468 // Sets the interests of this node to include the interests of the
469 // following node.
470 void AddFromFollowing(NodeInfo* that) {
471 follows_word_interest |= that->follows_word_interest;
472 follows_newline_interest |= that->follows_newline_interest;
473 follows_start_interest |= that->follows_start_interest;
474 }
475
476 void ResetCompilationState() {
477 being_analyzed = false;
478 been_analyzed = false;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000479 }
480
481 bool being_analyzed: 1;
482 bool been_analyzed: 1;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000483
484 // These bits are set of this node has to know what the preceding
485 // character was.
486 bool follows_word_interest: 1;
487 bool follows_newline_interest: 1;
488 bool follows_start_interest: 1;
489
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000490 bool at_end: 1;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000491 bool visited: 1;
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000492 bool replacement_calculated: 1;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000493};
494
495
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000496// Details of a quick mask-compare check that can look ahead in the
497// input stream.
498class QuickCheckDetails {
499 public:
500 QuickCheckDetails()
501 : characters_(0),
502 mask_(0),
iposva@chromium.org245aa852009-02-10 00:49:54 +0000503 value_(0),
504 cannot_match_(false) { }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000505 explicit QuickCheckDetails(int characters)
506 : characters_(characters),
507 mask_(0),
iposva@chromium.org245aa852009-02-10 00:49:54 +0000508 value_(0),
509 cannot_match_(false) { }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000510 bool Rationalize(bool ascii);
511 // Merge in the information from another branch of an alternation.
512 void Merge(QuickCheckDetails* other, int from_index);
513 // Advance the current position by some amount.
514 void Advance(int by, bool ascii);
515 void Clear();
iposva@chromium.org245aa852009-02-10 00:49:54 +0000516 bool cannot_match() { return cannot_match_; }
517 void set_cannot_match() { cannot_match_ = true; }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000518 struct Position {
519 Position() : mask(0), value(0), determines_perfectly(false) { }
520 uc16 mask;
521 uc16 value;
522 bool determines_perfectly;
523 };
524 int characters() { return characters_; }
525 void set_characters(int characters) { characters_ = characters; }
526 Position* positions(int index) {
527 ASSERT(index >= 0);
528 ASSERT(index < characters_);
529 return positions_ + index;
530 }
531 uint32_t mask() { return mask_; }
532 uint32_t value() { return value_; }
533
534 private:
535 // How many characters do we have quick check information from. This is
536 // the same for all branches of a choice node.
537 int characters_;
538 Position positions_[4];
539 // These values are the condensate of the above array after Rationalize().
540 uint32_t mask_;
541 uint32_t value_;
iposva@chromium.org245aa852009-02-10 00:49:54 +0000542 // If set to true, there is no way this quick check can match at all.
543 // E.g., if it requires to be at the start of the input, and isn't.
544 bool cannot_match_;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000545};
546
547
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000548extern int kUninitializedRegExpNodePlaceHolder;
549
550
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000551class RegExpNode: public ZoneObject {
552 public:
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000553 explicit RegExpNode(Zone* zone)
554 : replacement_(NULL), trace_count_(0), zone_(zone) {
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000555 bm_info_[0] = bm_info_[1] = NULL;
556 }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000557 virtual ~RegExpNode();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000558 virtual void Accept(NodeVisitor* visitor) = 0;
559 // Generates a goto to this node or actually generates the code at this point.
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000560 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000561 // How many characters must this node consume at a minimum in order to
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000562 // succeed. If we have found at least 'still_to_find' characters that
563 // must be consumed there is no need to ask any following nodes whether
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000564 // they are sure to eat any more characters. The not_at_start argument is
565 // used to indicate that we know we are not at the start of the input. In
566 // this case anchored branches will always fail and can be ignored when
567 // determining how many characters are consumed on success.
568 virtual int EatsAtLeast(int still_to_find,
569 int recursion_depth,
570 bool not_at_start) = 0;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000571 // Emits some quick code that checks whether the preloaded characters match.
572 // Falls through on certain failure, jumps to the label on possible success.
573 // If the node cannot make a quick check it does nothing and returns false.
574 bool EmitQuickCheck(RegExpCompiler* compiler,
ager@chromium.org32912102009-01-16 10:38:43 +0000575 Trace* trace,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000576 bool preload_has_checked_bounds,
577 Label* on_possible_success,
578 QuickCheckDetails* details_return,
579 bool fall_through_on_failure);
580 // For a given number of characters this returns a mask and a value. The
581 // next n characters are anded with the mask and compared with the value.
582 // A comparison failure indicates the node cannot match the next n characters.
583 // A comparison success indicates the node may match.
584 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
585 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +0000586 int characters_filled_in,
587 bool not_at_start) = 0;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000588 static const int kNodeIsTooComplexForGreedyLoops = -1;
589 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000590 // Only returns the successor for a text node of length 1 that matches any
591 // character and that has no guards on it.
592 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
593 RegExpCompiler* compiler) {
594 return NULL;
595 }
596
597 // Collects information on the possible code units (mod 128) that can match if
598 // we look forward. This is used for a Boyer-Moore-like string searching
599 // implementation. TODO(erikcorry): This should share more code with
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000600 // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
601 // the number of nodes we are willing to look at in order to create this data.
602 static const int kFillInBMBudget = 200;
verwaest@chromium.org37141392012-05-31 13:27:02 +0000603 virtual void FillInBMInfo(int offset,
604 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000605 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000606 BoyerMooreLookahead* bm,
607 bool not_at_start) {
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000608 UNREACHABLE();
609 }
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000610
611 // If we know that the input is ASCII then there are some nodes that can
612 // never match. This method returns a node that can be substituted for
613 // itself, or NULL if the node can never match.
614 virtual RegExpNode* FilterASCII(int depth) { return this; }
615 // Helper for FilterASCII.
616 RegExpNode* replacement() {
617 ASSERT(info()->replacement_calculated);
618 return replacement_;
619 }
620 RegExpNode* set_replacement(RegExpNode* replacement) {
621 info()->replacement_calculated = true;
622 replacement_ = replacement;
623 return replacement; // For convenience.
624 }
625
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000626 // We want to avoid recalculating the lookahead info, so we store it on the
627 // node. Only info that is for this node is stored. We can tell that the
628 // info is for this node when offset == 0, so the information is calculated
629 // relative to this node.
630 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
631 if (offset == 0) set_bm_info(not_at_start, bm);
632 }
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000633
ager@chromium.org8bb60582008-12-11 12:02:20 +0000634 Label* label() { return &label_; }
ulan@chromium.org2efb9002012-01-19 15:36:35 +0000635 // If non-generic code is generated for a node (i.e. the node is not at the
ager@chromium.org32912102009-01-16 10:38:43 +0000636 // start of the trace) then it cannot be reused. This variable sets a limit
637 // on how often we allow that to happen before we insist on starting a new
638 // trace and generating generic code for a node that can be reused by flushing
639 // the deferred actions in the current trace and generating a goto.
640 static const int kMaxCopiesCodeGenerated = 10;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000641
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000642 NodeInfo* info() { return &info_; }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000643
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000644 BoyerMooreLookahead* bm_info(bool not_at_start) {
645 return bm_info_[not_at_start ? 1 : 0];
646 }
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000647
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000648 Zone* zone() const { return zone_; }
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000649
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000650 protected:
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000651 enum LimitResult { DONE, CONTINUE };
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000652 RegExpNode* replacement_;
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000653
ager@chromium.org32912102009-01-16 10:38:43 +0000654 LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000655
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000656 void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
657 bm_info_[not_at_start ? 1 : 0] = bm;
658 }
659
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000660 private:
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000661 static const int kFirstCharBudget = 10;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000662 Label label_;
663 NodeInfo info_;
ager@chromium.org32912102009-01-16 10:38:43 +0000664 // This variable keeps track of how many times code has been generated for
665 // this node (in different traces). We don't keep track of where the
666 // generated code is located unless the code is generated at the start of
667 // a trace, in which case it is generic and can be reused by flushing the
668 // deferred operations in the current trace and generating a goto.
669 int trace_count_;
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000670 BoyerMooreLookahead* bm_info_[2];
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000671
672 Zone* zone_;
ager@chromium.org32912102009-01-16 10:38:43 +0000673};
674
675
676// A simple closed interval.
677class Interval {
678 public:
679 Interval() : from_(kNone), to_(kNone) { }
680 Interval(int from, int to) : from_(from), to_(to) { }
681 Interval Union(Interval that) {
682 if (that.from_ == kNone)
683 return *this;
684 else if (from_ == kNone)
685 return that;
686 else
687 return Interval(Min(from_, that.from_), Max(to_, that.to_));
688 }
689 bool Contains(int value) {
690 return (from_ <= value) && (value <= to_);
691 }
692 bool is_empty() { return from_ == kNone; }
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000693 int from() const { return from_; }
694 int to() const { return to_; }
ager@chromium.org32912102009-01-16 10:38:43 +0000695 static Interval Empty() { return Interval(); }
696 static const int kNone = -1;
697 private:
698 int from_;
699 int to_;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000700};
701
702
703class SeqRegExpNode: public RegExpNode {
704 public:
705 explicit SeqRegExpNode(RegExpNode* on_success)
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000706 : RegExpNode(on_success->zone()), on_success_(on_success) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000707 RegExpNode* on_success() { return on_success_; }
708 void set_on_success(RegExpNode* node) { on_success_ = node; }
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000709 virtual RegExpNode* FilterASCII(int depth);
verwaest@chromium.org37141392012-05-31 13:27:02 +0000710 virtual void FillInBMInfo(int offset,
711 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000712 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000713 BoyerMooreLookahead* bm,
714 bool not_at_start) {
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000715 on_success_->FillInBMInfo(
716 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000717 if (offset == 0) set_bm_info(not_at_start, bm);
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000718 }
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000719
720 protected:
721 RegExpNode* FilterSuccessor(int depth);
722
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000723 private:
724 RegExpNode* on_success_;
725};
726
727
728class ActionNode: public SeqRegExpNode {
729 public:
730 enum Type {
ager@chromium.org8bb60582008-12-11 12:02:20 +0000731 SET_REGISTER,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000732 INCREMENT_REGISTER,
733 STORE_POSITION,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000734 BEGIN_SUBMATCH,
ager@chromium.org32912102009-01-16 10:38:43 +0000735 POSITIVE_SUBMATCH_SUCCESS,
736 EMPTY_MATCH_CHECK,
737 CLEAR_CAPTURES
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000738 };
ager@chromium.org8bb60582008-12-11 12:02:20 +0000739 static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000740 static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000741 static ActionNode* StorePosition(int reg,
742 bool is_capture,
743 RegExpNode* on_success);
ager@chromium.org32912102009-01-16 10:38:43 +0000744 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
745 static ActionNode* BeginSubmatch(int stack_pointer_reg,
746 int position_reg,
747 RegExpNode* on_success);
748 static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
749 int restore_reg,
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000750 int clear_capture_count,
751 int clear_capture_from,
ager@chromium.org32912102009-01-16 10:38:43 +0000752 RegExpNode* on_success);
753 static ActionNode* EmptyMatchCheck(int start_register,
754 int repetition_register,
755 int repetition_limit,
756 RegExpNode* on_success);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000757 virtual void Accept(NodeVisitor* visitor);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000758 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000759 virtual int EatsAtLeast(int still_to_find,
760 int recursion_depth,
761 bool not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000762 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
763 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +0000764 int filled_in,
765 bool not_at_start) {
766 return on_success()->GetQuickCheckDetails(
767 details, compiler, filled_in, not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000768 }
verwaest@chromium.org37141392012-05-31 13:27:02 +0000769 virtual void FillInBMInfo(int offset,
770 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000771 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000772 BoyerMooreLookahead* bm,
773 bool not_at_start);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000774 Type type() { return type_; }
775 // TODO(erikcorry): We should allow some action nodes in greedy loops.
776 virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000777
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000778 private:
779 union {
780 struct {
781 int reg;
782 int value;
783 } u_store_register;
784 struct {
785 int reg;
786 } u_increment_register;
787 struct {
788 int reg;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000789 bool is_capture;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000790 } u_position_register;
791 struct {
792 int stack_pointer_register;
793 int current_position_register;
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000794 int clear_register_count;
795 int clear_register_from;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000796 } u_submatch;
ager@chromium.org32912102009-01-16 10:38:43 +0000797 struct {
798 int start_register;
799 int repetition_register;
800 int repetition_limit;
801 } u_empty_match_check;
802 struct {
803 int range_from;
804 int range_to;
805 } u_clear_captures;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000806 } data_;
807 ActionNode(Type type, RegExpNode* on_success)
808 : SeqRegExpNode(on_success),
809 type_(type) { }
810 Type type_;
811 friend class DotPrinter;
812};
813
814
815class TextNode: public SeqRegExpNode {
816 public:
817 TextNode(ZoneList<TextElement>* elms,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000818 RegExpNode* on_success)
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000819 : SeqRegExpNode(on_success),
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000820 elms_(elms) { }
821 TextNode(RegExpCharacterClass* that,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000822 RegExpNode* on_success)
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000823 : SeqRegExpNode(on_success),
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000824 elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000825 elms_->Add(TextElement::CharClass(that), zone());
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000826 }
827 virtual void Accept(NodeVisitor* visitor);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000828 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000829 virtual int EatsAtLeast(int still_to_find,
830 int recursion_depth,
831 bool not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000832 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
833 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +0000834 int characters_filled_in,
835 bool not_at_start);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000836 ZoneList<TextElement>* elements() { return elms_; }
ager@chromium.org38e4c712009-11-11 09:11:58 +0000837 void MakeCaseIndependent(bool is_ascii);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000838 virtual int GreedyLoopTextLength();
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000839 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
840 RegExpCompiler* compiler);
verwaest@chromium.org37141392012-05-31 13:27:02 +0000841 virtual void FillInBMInfo(int offset,
842 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000843 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000844 BoyerMooreLookahead* bm,
845 bool not_at_start);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000846 void CalculateOffsets();
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000847 virtual RegExpNode* FilterASCII(int depth);
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000848
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000849 private:
850 enum TextEmitPassType {
ager@chromium.org381abbb2009-02-25 13:23:22 +0000851 NON_ASCII_MATCH, // Check for characters that can't match.
852 SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
853 NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
854 CASE_CHARACTER_MATCH, // Case-independent single character check.
855 CHARACTER_CLASS_MATCH // Character class.
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000856 };
ager@chromium.org381abbb2009-02-25 13:23:22 +0000857 static bool SkipPass(int pass, bool ignore_case);
858 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
859 static const int kLastPass = CHARACTER_CLASS_MATCH;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000860 void TextEmitPass(RegExpCompiler* compiler,
861 TextEmitPassType pass,
862 bool preloaded,
ager@chromium.org32912102009-01-16 10:38:43 +0000863 Trace* trace,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000864 bool first_element_checked,
865 int* checked_up_to);
866 int Length();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000867 ZoneList<TextElement>* elms_;
868};
869
870
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000871class AssertionNode: public SeqRegExpNode {
872 public:
873 enum AssertionNodeType {
874 AT_END,
875 AT_START,
876 AT_BOUNDARY,
877 AT_NON_BOUNDARY,
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000878 AFTER_NEWLINE
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000879 };
880 static AssertionNode* AtEnd(RegExpNode* on_success) {
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000881 return new(on_success->zone()) AssertionNode(AT_END, on_success);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000882 }
883 static AssertionNode* AtStart(RegExpNode* on_success) {
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000884 return new(on_success->zone()) AssertionNode(AT_START, on_success);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000885 }
886 static AssertionNode* AtBoundary(RegExpNode* on_success) {
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000887 return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000888 }
889 static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000890 return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000891 }
892 static AssertionNode* AfterNewline(RegExpNode* on_success) {
mmassi@chromium.org7028c052012-06-13 11:51:58 +0000893 return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000894 }
895 virtual void Accept(NodeVisitor* visitor);
896 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000897 virtual int EatsAtLeast(int still_to_find,
898 int recursion_depth,
899 bool not_at_start);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000900 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
901 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +0000902 int filled_in,
903 bool not_at_start);
verwaest@chromium.org37141392012-05-31 13:27:02 +0000904 virtual void FillInBMInfo(int offset,
905 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000906 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000907 BoyerMooreLookahead* bm,
908 bool not_at_start);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000909 AssertionNodeType type() { return type_; }
fschneider@chromium.org0c20e672010-01-14 15:28:53 +0000910 void set_type(AssertionNodeType type) { type_ = type; }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000911
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000912 private:
erik.corry@gmail.comed49e962012-04-17 11:57:53 +0000913 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
914 enum IfPrevious { kIsNonWord, kIsWord };
915 void BacktrackIfPrevious(RegExpCompiler* compiler,
916 Trace* trace,
917 IfPrevious backtrack_if_previous);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000918 AssertionNode(AssertionNodeType t, RegExpNode* on_success)
919 : SeqRegExpNode(on_success), type_(t) { }
920 AssertionNodeType type_;
921};
922
923
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000924class BackReferenceNode: public SeqRegExpNode {
925 public:
926 BackReferenceNode(int start_reg,
927 int end_reg,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000928 RegExpNode* on_success)
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000929 : SeqRegExpNode(on_success),
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000930 start_reg_(start_reg),
931 end_reg_(end_reg) { }
932 virtual void Accept(NodeVisitor* visitor);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000933 int start_register() { return start_reg_; }
934 int end_register() { return end_reg_; }
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000935 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000936 virtual int EatsAtLeast(int still_to_find,
937 int recursion_depth,
938 bool not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000939 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
940 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +0000941 int characters_filled_in,
942 bool not_at_start) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000943 return;
944 }
verwaest@chromium.org37141392012-05-31 13:27:02 +0000945 virtual void FillInBMInfo(int offset,
946 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000947 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000948 BoyerMooreLookahead* bm,
949 bool not_at_start);
kmillikin@chromium.org83e16822011-09-13 08:21:47 +0000950
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000951 private:
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000952 int start_reg_;
953 int end_reg_;
954};
955
956
957class EndNode: public RegExpNode {
958 public:
ager@chromium.org8bb60582008-12-11 12:02:20 +0000959 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000960 explicit EndNode(Action action, Zone* zone)
961 : RegExpNode(zone), action_(action) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000962 virtual void Accept(NodeVisitor* visitor);
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000963 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +0000964 virtual int EatsAtLeast(int still_to_find,
965 int recursion_depth,
966 bool not_at_start) { return 0; }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000967 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
968 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +0000969 int characters_filled_in,
970 bool not_at_start) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000971 // Returning 0 from EatsAtLeast should ensure we never get here.
972 UNREACHABLE();
973 }
verwaest@chromium.org37141392012-05-31 13:27:02 +0000974 virtual void FillInBMInfo(int offset,
975 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000976 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +0000977 BoyerMooreLookahead* bm,
978 bool not_at_start) {
fschneider@chromium.org7d10be52012-04-10 12:30:14 +0000979 // Returning 0 from EatsAtLeast should ensure we never get here.
980 UNREACHABLE();
981 }
danno@chromium.org1044a4d2012-04-30 12:34:39 +0000982
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000983 private:
984 Action action_;
985};
986
987
ager@chromium.org8bb60582008-12-11 12:02:20 +0000988class NegativeSubmatchSuccess: public EndNode {
989 public:
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000990 NegativeSubmatchSuccess(int stack_pointer_reg,
991 int position_reg,
992 int clear_capture_count,
rossberg@chromium.org400388e2012-06-06 09:29:22 +0000993 int clear_capture_start,
994 Zone* zone)
995 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
ager@chromium.org8bb60582008-12-11 12:02:20 +0000996 stack_pointer_register_(stack_pointer_reg),
ager@chromium.orgddb913d2009-01-27 10:01:48 +0000997 current_position_register_(position_reg),
998 clear_capture_count_(clear_capture_count),
999 clear_capture_start_(clear_capture_start) { }
1000 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001001
1002 private:
1003 int stack_pointer_register_;
1004 int current_position_register_;
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001005 int clear_capture_count_;
1006 int clear_capture_start_;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001007};
1008
1009
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001010class Guard: public ZoneObject {
1011 public:
1012 enum Relation { LT, GEQ };
1013 Guard(int reg, Relation op, int value)
1014 : reg_(reg),
1015 op_(op),
1016 value_(value) { }
1017 int reg() { return reg_; }
1018 Relation op() { return op_; }
1019 int value() { return value_; }
1020
1021 private:
1022 int reg_;
1023 Relation op_;
1024 int value_;
1025};
1026
1027
1028class GuardedAlternative {
1029 public:
1030 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001031 void AddGuard(Guard* guard, Zone* zone);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001032 RegExpNode* node() { return node_; }
1033 void set_node(RegExpNode* node) { node_ = node; }
1034 ZoneList<Guard*>* guards() { return guards_; }
1035
1036 private:
1037 RegExpNode* node_;
1038 ZoneList<Guard*>* guards_;
1039};
1040
1041
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001042class AlternativeGeneration;
1043
1044
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001045class ChoiceNode: public RegExpNode {
1046 public:
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001047 explicit ChoiceNode(int expected_size, Zone* zone)
1048 : RegExpNode(zone),
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001049 alternatives_(new(zone)
1050 ZoneList<GuardedAlternative>(expected_size, zone)),
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001051 table_(NULL),
iposva@chromium.org245aa852009-02-10 00:49:54 +00001052 not_at_start_(false),
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001053 being_calculated_(false) { }
1054 virtual void Accept(NodeVisitor* visitor);
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001055 void AddAlternative(GuardedAlternative node) {
1056 alternatives()->Add(node, zone());
1057 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001058 ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
1059 DispatchTable* GetTable(bool ignore_case);
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001060 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +00001061 virtual int EatsAtLeast(int still_to_find,
1062 int recursion_depth,
1063 bool not_at_start);
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001064 int EatsAtLeastHelper(int still_to_find,
1065 int recursion_depth,
kasperl@chromium.orga5551262010-12-07 12:49:48 +00001066 RegExpNode* ignore_this_node,
1067 bool not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001068 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1069 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +00001070 int characters_filled_in,
1071 bool not_at_start);
verwaest@chromium.org37141392012-05-31 13:27:02 +00001072 virtual void FillInBMInfo(int offset,
1073 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001074 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +00001075 BoyerMooreLookahead* bm,
1076 bool not_at_start);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001077
1078 bool being_calculated() { return being_calculated_; }
iposva@chromium.org245aa852009-02-10 00:49:54 +00001079 bool not_at_start() { return not_at_start_; }
1080 void set_not_at_start() { not_at_start_ = true; }
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001081 void set_being_calculated(bool b) { being_calculated_ = b; }
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001082 virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
danno@chromium.org1044a4d2012-04-30 12:34:39 +00001083 virtual RegExpNode* FilterASCII(int depth);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001084
ager@chromium.org8bb60582008-12-11 12:02:20 +00001085 protected:
jkummerow@chromium.org486075a2011-09-07 12:44:28 +00001086 int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001087 ZoneList<GuardedAlternative>* alternatives_;
1088
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001089 private:
1090 friend class DispatchTableConstructor;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001091 friend class Analysis;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001092 void GenerateGuard(RegExpMacroAssembler* macro_assembler,
ager@chromium.org5ec48922009-05-05 07:25:34 +00001093 Guard* guard,
ager@chromium.org32912102009-01-16 10:38:43 +00001094 Trace* trace);
fschneider@chromium.org7d10be52012-04-10 12:30:14 +00001095 int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001096 void EmitOutOfLineContinuation(RegExpCompiler* compiler,
ager@chromium.org32912102009-01-16 10:38:43 +00001097 Trace* trace,
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001098 GuardedAlternative alternative,
1099 AlternativeGeneration* alt_gen,
1100 int preload_characters,
1101 bool next_expects_preload);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001102 DispatchTable* table_;
iposva@chromium.org245aa852009-02-10 00:49:54 +00001103 // If true, this node is never checked at the start of the input.
1104 // Allows a new trace to start with at_start() set to false.
1105 bool not_at_start_;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001106 bool being_calculated_;
1107};
1108
1109
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001110class NegativeLookaheadChoiceNode: public ChoiceNode {
1111 public:
1112 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail,
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001113 GuardedAlternative then_do_this,
1114 Zone* zone)
1115 : ChoiceNode(2, zone) {
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001116 AddAlternative(this_must_fail);
1117 AddAlternative(then_do_this);
1118 }
kasperl@chromium.orga5551262010-12-07 12:49:48 +00001119 virtual int EatsAtLeast(int still_to_find,
1120 int recursion_depth,
1121 bool not_at_start);
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001122 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1123 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +00001124 int characters_filled_in,
1125 bool not_at_start);
verwaest@chromium.org37141392012-05-31 13:27:02 +00001126 virtual void FillInBMInfo(int offset,
1127 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001128 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +00001129 BoyerMooreLookahead* bm,
1130 bool not_at_start) {
1131 alternatives_->at(1).node()->FillInBMInfo(
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001132 offset, recursion_depth + 1, budget - 1, bm, not_at_start);
erik.corry@gmail.comed49e962012-04-17 11:57:53 +00001133 if (offset == 0) set_bm_info(not_at_start, bm);
fschneider@chromium.org7d10be52012-04-10 12:30:14 +00001134 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001135 // For a negative lookahead we don't emit the quick check for the
1136 // alternative that is expected to fail. This is because quick check code
1137 // starts by loading enough characters for the alternative that takes fewest
1138 // characters, but on a negative lookahead the negative branch did not take
1139 // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1140 virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
danno@chromium.org1044a4d2012-04-30 12:34:39 +00001141 virtual RegExpNode* FilterASCII(int depth);
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001142};
1143
1144
ager@chromium.org8bb60582008-12-11 12:02:20 +00001145class LoopChoiceNode: public ChoiceNode {
1146 public:
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001147 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone)
1148 : ChoiceNode(2, zone),
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001149 loop_node_(NULL),
1150 continue_node_(NULL),
1151 body_can_be_zero_length_(body_can_be_zero_length) { }
1152 void AddLoopAlternative(GuardedAlternative alt);
1153 void AddContinueAlternative(GuardedAlternative alt);
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001154 virtual void Emit(RegExpCompiler* compiler, Trace* trace);
kasperl@chromium.orga5551262010-12-07 12:49:48 +00001155 virtual int EatsAtLeast(int still_to_find,
1156 int recursion_depth,
1157 bool not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001158 virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1159 RegExpCompiler* compiler,
iposva@chromium.org245aa852009-02-10 00:49:54 +00001160 int characters_filled_in,
1161 bool not_at_start);
verwaest@chromium.org37141392012-05-31 13:27:02 +00001162 virtual void FillInBMInfo(int offset,
1163 int recursion_depth,
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001164 int budget,
verwaest@chromium.org37141392012-05-31 13:27:02 +00001165 BoyerMooreLookahead* bm,
1166 bool not_at_start);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001167 RegExpNode* loop_node() { return loop_node_; }
1168 RegExpNode* continue_node() { return continue_node_; }
1169 bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1170 virtual void Accept(NodeVisitor* visitor);
danno@chromium.org1044a4d2012-04-30 12:34:39 +00001171 virtual RegExpNode* FilterASCII(int depth);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001172
1173 private:
1174 // AddAlternative is made private for loop nodes because alternatives
1175 // should not be added freely, we need to keep track of which node
1176 // goes back to the node itself.
1177 void AddAlternative(GuardedAlternative node) {
1178 ChoiceNode::AddAlternative(node);
1179 }
1180
1181 RegExpNode* loop_node_;
1182 RegExpNode* continue_node_;
1183 bool body_can_be_zero_length_;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001184};
1185
1186
erik.corry@gmail.comed49e962012-04-17 11:57:53 +00001187// Improve the speed that we scan for an initial point where a non-anchored
1188// regexp can match by using a Boyer-Moore-like table. This is done by
1189// identifying non-greedy non-capturing loops in the nodes that eat any
1190// character one at a time. For example in the middle of the regexp
1191// /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
1192// inserted at the start of any non-anchored regexp.
1193//
1194// When we have found such a loop we look ahead in the nodes to find the set of
1195// characters that can come at given distances. For example for the regexp
1196// /.?foo/ we know that there are at least 3 characters ahead of us, and the
1197// sets of characters that can occur are [any, [f, o], [o]]. We find a range in
1198// the lookahead info where the set of characters is reasonably constrained. In
1199// our example this is from index 1 to 2 (0 is not constrained). We can now
1200// look 3 characters ahead and if we don't find one of [f, o] (the union of
1201// [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
1202//
1203// For Unicode input strings we do the same, but modulo 128.
1204//
1205// We also look at the first string fed to the regexp and use that to get a hint
1206// of the character frequencies in the inputs. This affects the assessment of
1207// whether the set of characters is 'reasonably constrained'.
1208//
1209// We also have another lookahead mechanism (called quick check in the code),
1210// which uses a wide load of multiple characters followed by a mask and compare
1211// to determine whether a match is possible at this point.
1212enum ContainedInLattice {
1213 kNotYet = 0,
1214 kLatticeIn = 1,
1215 kLatticeOut = 2,
1216 kLatticeUnknown = 3 // Can also mean both in and out.
1217};
1218
1219
1220inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
1221 return static_cast<ContainedInLattice>(a | b);
1222}
1223
1224
1225ContainedInLattice AddRange(ContainedInLattice a,
1226 const int* ranges,
1227 int ranges_size,
1228 Interval new_range);
1229
1230
1231class BoyerMoorePositionInfo : public ZoneObject {
1232 public:
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001233 explicit BoyerMoorePositionInfo(Zone* zone)
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001234 : map_(new(zone) ZoneList<bool>(kMapSize, zone)),
erik.corry@gmail.comed49e962012-04-17 11:57:53 +00001235 map_count_(0),
1236 w_(kNotYet),
1237 s_(kNotYet),
1238 d_(kNotYet),
1239 surrogate_(kNotYet) {
1240 for (int i = 0; i < kMapSize; i++) {
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001241 map_->Add(false, zone);
erik.corry@gmail.comed49e962012-04-17 11:57:53 +00001242 }
1243 }
1244
1245 bool& at(int i) { return map_->at(i); }
1246
1247 static const int kMapSize = 128;
1248 static const int kMask = kMapSize - 1;
1249
1250 int map_count() const { return map_count_; }
1251
1252 void Set(int character);
1253 void SetInterval(const Interval& interval);
1254 void SetAll();
1255 bool is_non_word() { return w_ == kLatticeOut; }
1256 bool is_word() { return w_ == kLatticeIn; }
1257
1258 private:
1259 ZoneList<bool>* map_;
1260 int map_count_; // Number of set bits in the map.
1261 ContainedInLattice w_; // The \w character class.
1262 ContainedInLattice s_; // The \s character class.
1263 ContainedInLattice d_; // The \d character class.
1264 ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
1265};
1266
1267
1268class BoyerMooreLookahead : public ZoneObject {
1269 public:
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001270 BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone);
erik.corry@gmail.comed49e962012-04-17 11:57:53 +00001271
1272 int length() { return length_; }
1273 int max_char() { return max_char_; }
1274 RegExpCompiler* compiler() { return compiler_; }
1275
1276 int Count(int map_number) {
1277 return bitmaps_->at(map_number)->map_count();
1278 }
1279
1280 BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
1281
1282 void Set(int map_number, int character) {
1283 if (character > max_char_) return;
1284 BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1285 info->Set(character);
1286 }
1287
1288 void SetInterval(int map_number, const Interval& interval) {
1289 if (interval.from() > max_char_) return;
1290 BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
1291 if (interval.to() > max_char_) {
1292 info->SetInterval(Interval(interval.from(), max_char_));
1293 } else {
1294 info->SetInterval(interval);
1295 }
1296 }
1297
1298 void SetAll(int map_number) {
1299 bitmaps_->at(map_number)->SetAll();
1300 }
1301
1302 void SetRest(int from_map) {
1303 for (int i = from_map; i < length_; i++) SetAll(i);
1304 }
1305 bool EmitSkipInstructions(RegExpMacroAssembler* masm);
1306
1307 private:
1308 // This is the value obtained by EatsAtLeast. If we do not have at least this
1309 // many characters left in the sample string then the match is bound to fail.
1310 // Therefore it is OK to read a character this far ahead of the current match
1311 // point.
1312 int length_;
1313 RegExpCompiler* compiler_;
1314 // 0x7f for ASCII, 0xffff for UTF-16.
1315 int max_char_;
1316 ZoneList<BoyerMoorePositionInfo*>* bitmaps_;
1317
1318 int GetSkipTable(int min_lookahead,
1319 int max_lookahead,
1320 Handle<ByteArray> boolean_skip_table);
1321 bool FindWorthwhileInterval(int* from, int* to);
1322 int FindBestInterval(
1323 int max_number_of_chars, int old_biggest_points, int* from, int* to);
1324};
1325
1326
ager@chromium.org8bb60582008-12-11 12:02:20 +00001327// There are many ways to generate code for a node. This class encapsulates
1328// the current way we should be generating. In other words it encapsulates
ager@chromium.org32912102009-01-16 10:38:43 +00001329// the current state of the code generator. The effect of this is that we
1330// generate code for paths that the matcher can take through the regular
1331// expression. A given node in the regexp can be code-generated several times
1332// as it can be part of several traces. For example for the regexp:
1333// /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
1334// of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
1335// to match foo is generated only once (the traces have a common prefix). The
1336// code to store the capture is deferred and generated (twice) after the places
1337// where baz has been matched.
1338class Trace {
ager@chromium.org8bb60582008-12-11 12:02:20 +00001339 public:
iposva@chromium.org245aa852009-02-10 00:49:54 +00001340 // A value for a property that is either known to be true, know to be false,
1341 // or not known.
1342 enum TriBool {
1343 UNKNOWN = -1, FALSE = 0, TRUE = 1
1344 };
1345
ager@chromium.org8bb60582008-12-11 12:02:20 +00001346 class DeferredAction {
1347 public:
1348 DeferredAction(ActionNode::Type type, int reg)
1349 : type_(type), reg_(reg), next_(NULL) { }
1350 DeferredAction* next() { return next_; }
ager@chromium.org32912102009-01-16 10:38:43 +00001351 bool Mentions(int reg);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001352 int reg() { return reg_; }
1353 ActionNode::Type type() { return type_; }
1354 private:
1355 ActionNode::Type type_;
1356 int reg_;
1357 DeferredAction* next_;
ager@chromium.org32912102009-01-16 10:38:43 +00001358 friend class Trace;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001359 };
1360
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001361 class DeferredCapture : public DeferredAction {
ager@chromium.org8bb60582008-12-11 12:02:20 +00001362 public:
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001363 DeferredCapture(int reg, bool is_capture, Trace* trace)
ager@chromium.org8bb60582008-12-11 12:02:20 +00001364 : DeferredAction(ActionNode::STORE_POSITION, reg),
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001365 cp_offset_(trace->cp_offset()),
1366 is_capture_(is_capture) { }
ager@chromium.org8bb60582008-12-11 12:02:20 +00001367 int cp_offset() { return cp_offset_; }
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001368 bool is_capture() { return is_capture_; }
ager@chromium.org8bb60582008-12-11 12:02:20 +00001369 private:
1370 int cp_offset_;
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001371 bool is_capture_;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001372 void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
1373 };
1374
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001375 class DeferredSetRegister : public DeferredAction {
ager@chromium.org8bb60582008-12-11 12:02:20 +00001376 public:
1377 DeferredSetRegister(int reg, int value)
1378 : DeferredAction(ActionNode::SET_REGISTER, reg),
1379 value_(value) { }
1380 int value() { return value_; }
1381 private:
1382 int value_;
1383 };
1384
ager@chromium.org32912102009-01-16 10:38:43 +00001385 class DeferredClearCaptures : public DeferredAction {
1386 public:
1387 explicit DeferredClearCaptures(Interval range)
1388 : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
1389 range_(range) { }
1390 Interval range() { return range_; }
1391 private:
1392 Interval range_;
1393 };
1394
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001395 class DeferredIncrementRegister : public DeferredAction {
ager@chromium.org8bb60582008-12-11 12:02:20 +00001396 public:
1397 explicit DeferredIncrementRegister(int reg)
1398 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
1399 };
1400
ager@chromium.org32912102009-01-16 10:38:43 +00001401 Trace()
ager@chromium.org8bb60582008-12-11 12:02:20 +00001402 : cp_offset_(0),
1403 actions_(NULL),
1404 backtrack_(NULL),
1405 stop_node_(NULL),
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001406 loop_label_(NULL),
1407 characters_preloaded_(0),
iposva@chromium.org245aa852009-02-10 00:49:54 +00001408 bound_checked_up_to_(0),
ager@chromium.org381abbb2009-02-25 13:23:22 +00001409 flush_budget_(100),
iposva@chromium.org245aa852009-02-10 00:49:54 +00001410 at_start_(UNKNOWN) { }
1411
ager@chromium.org32912102009-01-16 10:38:43 +00001412 // End the trace. This involves flushing the deferred actions in the trace
1413 // and pushing a backtrack location onto the backtrack stack. Once this is
1414 // done we can start a new trace or go to one that has already been
1415 // generated.
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001416 void Flush(RegExpCompiler* compiler, RegExpNode* successor);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001417 int cp_offset() { return cp_offset_; }
1418 DeferredAction* actions() { return actions_; }
ager@chromium.org32912102009-01-16 10:38:43 +00001419 // A trivial trace is one that has no deferred actions or other state that
1420 // affects the assumptions used when generating code. There is no recorded
1421 // backtrack location in a trivial trace, so with a trivial trace we will
1422 // generate code that, on a failure to match, gets the backtrack location
1423 // from the backtrack stack rather than using a direct jump instruction. We
1424 // always start code generation with a trivial trace and non-trivial traces
1425 // are created as we emit code for nodes or add to the list of deferred
1426 // actions in the trace. The location of the code generated for a node using
1427 // a trivial trace is recorded in a label in the node so that gotos can be
1428 // generated to that code.
ager@chromium.org8bb60582008-12-11 12:02:20 +00001429 bool is_trivial() {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001430 return backtrack_ == NULL &&
1431 actions_ == NULL &&
1432 cp_offset_ == 0 &&
1433 characters_preloaded_ == 0 &&
1434 bound_checked_up_to_ == 0 &&
iposva@chromium.org245aa852009-02-10 00:49:54 +00001435 quick_check_performed_.characters() == 0 &&
1436 at_start_ == UNKNOWN;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001437 }
iposva@chromium.org245aa852009-02-10 00:49:54 +00001438 TriBool at_start() { return at_start_; }
1439 void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; }
ager@chromium.org8bb60582008-12-11 12:02:20 +00001440 Label* backtrack() { return backtrack_; }
1441 Label* loop_label() { return loop_label_; }
1442 RegExpNode* stop_node() { return stop_node_; }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001443 int characters_preloaded() { return characters_preloaded_; }
1444 int bound_checked_up_to() { return bound_checked_up_to_; }
ager@chromium.org381abbb2009-02-25 13:23:22 +00001445 int flush_budget() { return flush_budget_; }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001446 QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
1447 bool mentions_reg(int reg);
ager@chromium.org32912102009-01-16 10:38:43 +00001448 // Returns true if a deferred position store exists to the specified
1449 // register and stores the offset in the out-parameter. Otherwise
1450 // returns false.
1451 bool GetStoredPosition(int reg, int* cp_offset);
1452 // These set methods and AdvanceCurrentPositionInTrace should be used only on
1453 // new traces - the intention is that traces are immutable after creation.
ager@chromium.org8bb60582008-12-11 12:02:20 +00001454 void add_action(DeferredAction* new_action) {
1455 ASSERT(new_action->next_ == NULL);
1456 new_action->next_ = actions_;
1457 actions_ = new_action;
1458 }
ager@chromium.org8bb60582008-12-11 12:02:20 +00001459 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1460 void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1461 void set_loop_label(Label* label) { loop_label_ = label; }
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001462 void set_characters_preloaded(int count) { characters_preloaded_ = count; }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001463 void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
ager@chromium.org381abbb2009-02-25 13:23:22 +00001464 void set_flush_budget(int to) { flush_budget_ = to; }
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001465 void set_quick_check_performed(QuickCheckDetails* d) {
1466 quick_check_performed_ = *d;
1467 }
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001468 void InvalidateCurrentCharacter();
1469 void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
kmillikin@chromium.org83e16822011-09-13 08:21:47 +00001470
ager@chromium.org8bb60582008-12-11 12:02:20 +00001471 private:
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001472 int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001473 void PerformDeferredActions(RegExpMacroAssembler* macro,
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001474 int max_register,
1475 OutSet& affected_registers,
1476 OutSet* registers_to_pop,
1477 OutSet* registers_to_clear,
1478 Zone* zone);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001479 void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1480 int max_register,
ager@chromium.orgddb913d2009-01-27 10:01:48 +00001481 OutSet& registers_to_pop,
1482 OutSet& registers_to_clear);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001483 int cp_offset_;
1484 DeferredAction* actions_;
1485 Label* backtrack_;
1486 RegExpNode* stop_node_;
1487 Label* loop_label_;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001488 int characters_preloaded_;
1489 int bound_checked_up_to_;
1490 QuickCheckDetails quick_check_performed_;
ager@chromium.org381abbb2009-02-25 13:23:22 +00001491 int flush_budget_;
iposva@chromium.org245aa852009-02-10 00:49:54 +00001492 TriBool at_start_;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001493};
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001494
1495
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001496class NodeVisitor {
1497 public:
1498 virtual ~NodeVisitor() { }
1499#define DECLARE_VISIT(Type) \
1500 virtual void Visit##Type(Type##Node* that) = 0;
1501FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1502#undef DECLARE_VISIT
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001503 virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001504};
1505
1506
1507// Node visitor used to add the start set of the alternatives to the
1508// dispatch table of a choice node.
1509class DispatchTableConstructor: public NodeVisitor {
1510 public:
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001511 DispatchTableConstructor(DispatchTable* table, bool ignore_case,
1512 Zone* zone)
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001513 : table_(table),
1514 choice_index_(-1),
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001515 ignore_case_(ignore_case),
1516 zone_(zone) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001517
1518 void BuildTable(ChoiceNode* node);
1519
1520 void AddRange(CharacterRange range) {
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001521 table()->AddRange(range, choice_index_, zone_);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001522 }
1523
1524 void AddInverse(ZoneList<CharacterRange>* ranges);
1525
1526#define DECLARE_VISIT(Type) \
1527 virtual void Visit##Type(Type##Node* that);
1528FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1529#undef DECLARE_VISIT
1530
1531 DispatchTable* table() { return table_; }
1532 void set_choice_index(int value) { choice_index_ = value; }
1533
1534 protected:
ager@chromium.org5ec48922009-05-05 07:25:34 +00001535 DispatchTable* table_;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001536 int choice_index_;
1537 bool ignore_case_;
mmassi@chromium.org7028c052012-06-13 11:51:58 +00001538 Zone* zone_;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001539};
1540
1541
ager@chromium.org8bb60582008-12-11 12:02:20 +00001542// Assertion propagation moves information about assertions such as
1543// \b to the affected nodes. For instance, in /.\b./ information must
1544// be propagated to the first '.' that whatever follows needs to know
1545// if it matched a word or a non-word, and to the second '.' that it
1546// has to check if it succeeds a word or non-word. In this case the
1547// result will be something like:
1548//
1549// +-------+ +------------+
1550// | . | | . |
1551// +-------+ ---> +------------+
1552// | word? | | check word |
1553// +-------+ +------------+
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001554class Analysis: public NodeVisitor {
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001555 public:
ager@chromium.org38e4c712009-11-11 09:11:58 +00001556 Analysis(bool ignore_case, bool is_ascii)
1557 : ignore_case_(ignore_case),
1558 is_ascii_(is_ascii),
1559 error_message_(NULL) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001560 void EnsureAnalyzed(RegExpNode* node);
1561
1562#define DECLARE_VISIT(Type) \
1563 virtual void Visit##Type(Type##Node* that);
1564FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1565#undef DECLARE_VISIT
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001566 virtual void VisitLoopChoice(LoopChoiceNode* that);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001567
sgjesse@chromium.org755c5b12009-05-29 11:04:38 +00001568 bool has_failed() { return error_message_ != NULL; }
1569 const char* error_message() {
1570 ASSERT(error_message_ != NULL);
1571 return error_message_;
1572 }
1573 void fail(const char* error_message) {
1574 error_message_ = error_message;
1575 }
kmillikin@chromium.org83e16822011-09-13 08:21:47 +00001576
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001577 private:
1578 bool ignore_case_;
ager@chromium.org38e4c712009-11-11 09:11:58 +00001579 bool is_ascii_;
sgjesse@chromium.org755c5b12009-05-29 11:04:38 +00001580 const char* error_message_;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001581
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001582 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001583};
1584
1585
ager@chromium.org8bb60582008-12-11 12:02:20 +00001586struct RegExpCompileData {
1587 RegExpCompileData()
1588 : tree(NULL),
1589 node(NULL),
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001590 simple(true),
iposva@chromium.org245aa852009-02-10 00:49:54 +00001591 contains_anchor(false),
ager@chromium.org8bb60582008-12-11 12:02:20 +00001592 capture_count(0) { }
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001593 RegExpTree* tree;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001594 RegExpNode* node;
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001595 bool simple;
iposva@chromium.org245aa852009-02-10 00:49:54 +00001596 bool contains_anchor;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001597 Handle<String> error;
1598 int capture_count;
1599};
1600
1601
1602class RegExpEngine: public AllStatic {
1603 public:
kasperl@chromium.org7be3c992009-03-12 07:19:55 +00001604 struct CompilationResult {
1605 explicit CompilationResult(const char* error_message)
1606 : error_message(error_message),
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +00001607 code(HEAP->the_hole_value()),
kasperl@chromium.org7be3c992009-03-12 07:19:55 +00001608 num_registers(0) {}
1609 CompilationResult(Object* code, int registers)
1610 : error_message(NULL),
1611 code(code),
1612 num_registers(registers) {}
1613 const char* error_message;
1614 Object* code;
1615 int num_registers;
1616 };
1617
1618 static CompilationResult Compile(RegExpCompileData* input,
1619 bool ignore_case,
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +00001620 bool global,
kasperl@chromium.org7be3c992009-03-12 07:19:55 +00001621 bool multiline,
1622 Handle<String> pattern,
fschneider@chromium.org7d10be52012-04-10 12:30:14 +00001623 Handle<String> sample_subject,
rossberg@chromium.org400388e2012-06-06 09:29:22 +00001624 bool is_ascii, Zone* zone);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001625
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001626 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1627};
1628
1629
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001630class OffsetsVector {
1631 public:
ulan@chromium.org812308e2012-02-29 15:58:45 +00001632 inline OffsetsVector(int num_registers, Isolate* isolate)
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001633 : offsets_vector_length_(num_registers) {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +00001634 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001635 vector_ = NewArray<int>(offsets_vector_length_);
1636 } else {
ulan@chromium.org812308e2012-02-29 15:58:45 +00001637 vector_ = isolate->jsregexp_static_offsets_vector();
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001638 }
1639 }
1640 inline ~OffsetsVector() {
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +00001641 if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001642 DeleteArray(vector_);
1643 vector_ = NULL;
1644 }
1645 }
1646 inline int* vector() { return vector_; }
1647 inline int length() { return offsets_vector_length_; }
1648
mstarzinger@chromium.org15613d02012-05-23 12:04:37 +00001649 static const int kStaticOffsetsVectorSize =
1650 Isolate::kJSRegexpStaticOffsetsVectorSize;
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001651
1652 private:
sgjesse@chromium.orgea88ce92011-03-23 11:19:56 +00001653 static Address static_offsets_vector_address(Isolate* isolate) {
1654 return reinterpret_cast<Address>(isolate->jsregexp_static_offsets_vector());
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001655 }
1656
1657 int* vector_;
1658 int offsets_vector_length_;
fschneider@chromium.org0c20e672010-01-14 15:28:53 +00001659
1660 friend class ExternalReference;
1661};
1662
1663
christian.plesner.hansen43d26ec2008-07-03 15:10:15 +00001664} } // namespace v8::internal
1665
1666#endif // V8_JSREGEXP_H_