blob: 81c220520480f4051de185cccc1922b8091a538d [file] [log] [blame]
Steve Blocka7e24c12009-10-30 11:49:00 +00001// Copyright 2008 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28
29#include <stdlib.h>
30
31#include "v8.h"
32
33#include "string-stream.h"
34#include "cctest.h"
35#include "zone-inl.h"
36#include "parser.h"
37#include "ast.h"
38#include "jsregexp.h"
39#include "regexp-macro-assembler.h"
40#include "regexp-macro-assembler-irregexp.h"
41#ifdef V8_NATIVE_REGEXP
42#ifdef V8_TARGET_ARCH_ARM
43#include "arm/macro-assembler-arm.h"
44#include "arm/regexp-macro-assembler-arm.h"
45#endif
46#ifdef V8_TARGET_ARCH_X64
47#include "x64/macro-assembler-x64.h"
48#include "x64/regexp-macro-assembler-x64.h"
49#endif
50#ifdef V8_TARGET_ARCH_IA32
51#include "ia32/macro-assembler-ia32.h"
52#include "ia32/regexp-macro-assembler-ia32.h"
53#endif
54#else
55#include "interpreter-irregexp.h"
56#endif
57
58using namespace v8::internal;
59
60
61static SmartPointer<const char> Parse(const char* input) {
62 V8::Initialize(NULL);
63 v8::HandleScope scope;
64 ZoneScope zone_scope(DELETE_ON_EXIT);
65 FlatStringReader reader(CStrVector(input));
66 RegExpCompileData result;
67 CHECK(v8::internal::ParseRegExp(&reader, false, &result));
68 CHECK(result.tree != NULL);
69 CHECK(result.error.is_null());
70 SmartPointer<const char> output = result.tree->ToString();
71 return output;
72}
73
74static bool CheckSimple(const char* input) {
75 V8::Initialize(NULL);
76 v8::HandleScope scope;
77 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
78 ZoneScope zone_scope(DELETE_ON_EXIT);
79 FlatStringReader reader(CStrVector(input));
80 RegExpCompileData result;
81 CHECK(v8::internal::ParseRegExp(&reader, false, &result));
82 CHECK(result.tree != NULL);
83 CHECK(result.error.is_null());
84 return result.simple;
85}
86
87struct MinMaxPair {
88 int min_match;
89 int max_match;
90};
91
92static MinMaxPair CheckMinMaxMatch(const char* input) {
93 V8::Initialize(NULL);
94 v8::HandleScope scope;
95 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
96 ZoneScope zone_scope(DELETE_ON_EXIT);
97 FlatStringReader reader(CStrVector(input));
98 RegExpCompileData result;
99 CHECK(v8::internal::ParseRegExp(&reader, false, &result));
100 CHECK(result.tree != NULL);
101 CHECK(result.error.is_null());
102 int min_match = result.tree->min_match();
103 int max_match = result.tree->max_match();
104 MinMaxPair pair = { min_match, max_match };
105 return pair;
106}
107
108
109
110#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
111#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
112#define CHECK_MIN_MAX(input, min, max) \
113 { MinMaxPair min_max = CheckMinMaxMatch(input); \
114 CHECK_EQ(min, min_max.min_match); \
115 CHECK_EQ(max, min_max.max_match); \
116 }
117
118TEST(Parser) {
119 V8::Initialize(NULL);
120 CHECK_PARSE_EQ("abc", "'abc'");
121 CHECK_PARSE_EQ("", "%");
122 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
123 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
124 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
125 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
126 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
127 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
128 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
129 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
130 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
131 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
132 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
133 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
134 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
135 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
136 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
137 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
138 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
139 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
140 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
141 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
142 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
143 CHECK_PARSE_EQ("(?:foo)", "'foo'");
144 CHECK_PARSE_EQ("(?: foo )", "' foo '");
145 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
146 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
147 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
148 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
149 CHECK_PARSE_EQ("()", "(^ %)");
150 CHECK_PARSE_EQ("(?=)", "(-> + %)");
151 CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
152 CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
153 CHECK_PARSE_EQ("[x]", "[x]");
154 CHECK_PARSE_EQ("[xyz]", "[x y z]");
155 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
156 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
157 CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
158 CHECK_PARSE_EQ("]", "']'");
159 CHECK_PARSE_EQ("}", "'}'");
160 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
161 CHECK_PARSE_EQ("[\\d]", "[0-9]");
162 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
163 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
164 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
165 CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
166 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
167 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
168 CHECK_PARSE_EQ("\\c!", "'c!'");
169 CHECK_PARSE_EQ("\\c_", "'c_'");
170 CHECK_PARSE_EQ("\\c~", "'c~'");
171 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
172 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
173 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
174 CHECK_PARSE_EQ("\\0", "'\\x00'");
175 CHECK_PARSE_EQ("\\8", "'8'");
176 CHECK_PARSE_EQ("\\9", "'9'");
177 CHECK_PARSE_EQ("\\11", "'\\x09'");
178 CHECK_PARSE_EQ("\\11a", "'\\x09a'");
179 CHECK_PARSE_EQ("\\011", "'\\x09'");
180 CHECK_PARSE_EQ("\\00011", "'\\x0011'");
181 CHECK_PARSE_EQ("\\118", "'\\x098'");
182 CHECK_PARSE_EQ("\\111", "'I'");
183 CHECK_PARSE_EQ("\\1111", "'I1'");
184 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
185 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
186 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
187 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
188 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
189 " (# 0 - g (<- 1)))");
190 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
191 " (# 0 - g (<- 2)))");
192 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
193 " (# 0 - g (<- 3)))");
194 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
195 " (# 0 - g '\\x04'))");
196 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
197 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
198 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
199 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
200 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
201 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
202 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
203 CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
204 CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
205 CHECK_PARSE_EQ("(?=a)?a", "'a'");
206 CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
207 CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
208 CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
209 CHECK_PARSE_EQ("(?!a)?a", "'a'");
210 CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
211 CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
212 CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
213 CHECK_PARSE_EQ("[\\0]", "[\\x00]");
214 CHECK_PARSE_EQ("[\\11]", "[\\x09]");
215 CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
216 CHECK_PARSE_EQ("[\\011]", "[\\x09]");
217 CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
218 CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
219 CHECK_PARSE_EQ("[\\111]", "[I]");
220 CHECK_PARSE_EQ("[\\1111]", "[I 1]");
221 CHECK_PARSE_EQ("\\x34", "'\x34'");
222 CHECK_PARSE_EQ("\\x60", "'\x60'");
223 CHECK_PARSE_EQ("\\x3z", "'x3z'");
224 CHECK_PARSE_EQ("\\c", "'c'");
225 CHECK_PARSE_EQ("\\u0034", "'\x34'");
226 CHECK_PARSE_EQ("\\u003z", "'u003z'");
227 CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
228
229 CHECK_SIMPLE("a", true);
230 CHECK_SIMPLE("a|b", false);
231 CHECK_SIMPLE("a\\n", false);
232 CHECK_SIMPLE("^a", false);
233 CHECK_SIMPLE("a$", false);
234 CHECK_SIMPLE("a\\b!", false);
235 CHECK_SIMPLE("a\\Bb", false);
236 CHECK_SIMPLE("a*", false);
237 CHECK_SIMPLE("a*?", false);
238 CHECK_SIMPLE("a?", false);
239 CHECK_SIMPLE("a??", false);
240 CHECK_SIMPLE("a{0,1}?", false);
241 CHECK_SIMPLE("a{1,1}?", false);
242 CHECK_SIMPLE("a{1,2}?", false);
243 CHECK_SIMPLE("a+?", false);
244 CHECK_SIMPLE("(a)", false);
245 CHECK_SIMPLE("(a)\\1", false);
246 CHECK_SIMPLE("(\\1a)", false);
247 CHECK_SIMPLE("\\1(a)", false);
248 CHECK_SIMPLE("a\\s", false);
249 CHECK_SIMPLE("a\\S", false);
250 CHECK_SIMPLE("a\\d", false);
251 CHECK_SIMPLE("a\\D", false);
252 CHECK_SIMPLE("a\\w", false);
253 CHECK_SIMPLE("a\\W", false);
254 CHECK_SIMPLE("a.", false);
255 CHECK_SIMPLE("a\\q", false);
256 CHECK_SIMPLE("a[a]", false);
257 CHECK_SIMPLE("a[^a]", false);
258 CHECK_SIMPLE("a[a-z]", false);
259 CHECK_SIMPLE("a[\\q]", false);
260 CHECK_SIMPLE("a(?:b)", false);
261 CHECK_SIMPLE("a(?=b)", false);
262 CHECK_SIMPLE("a(?!b)", false);
263 CHECK_SIMPLE("\\x60", false);
264 CHECK_SIMPLE("\\u0060", false);
265 CHECK_SIMPLE("\\cA", false);
266 CHECK_SIMPLE("\\q", false);
267 CHECK_SIMPLE("\\1112", false);
268 CHECK_SIMPLE("\\0", false);
269 CHECK_SIMPLE("(a)\\1", false);
270 CHECK_SIMPLE("(?=a)?a", false);
271 CHECK_SIMPLE("(?!a)?a\\1", false);
272 CHECK_SIMPLE("(?:(?=a))a\\1", false);
273
274 CHECK_PARSE_EQ("a{}", "'a{}'");
275 CHECK_PARSE_EQ("a{,}", "'a{,}'");
276 CHECK_PARSE_EQ("a{", "'a{'");
277 CHECK_PARSE_EQ("a{z}", "'a{z}'");
278 CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
279 CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
280 CHECK_PARSE_EQ("a{12,", "'a{12,'");
281 CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
282 CHECK_PARSE_EQ("{}", "'{}'");
283 CHECK_PARSE_EQ("{,}", "'{,}'");
284 CHECK_PARSE_EQ("{", "'{'");
285 CHECK_PARSE_EQ("{z}", "'{z}'");
286 CHECK_PARSE_EQ("{1z}", "'{1z}'");
287 CHECK_PARSE_EQ("{12z}", "'{12z}'");
288 CHECK_PARSE_EQ("{12,", "'{12,'");
289 CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
290
291 CHECK_MIN_MAX("a", 1, 1);
292 CHECK_MIN_MAX("abc", 3, 3);
293 CHECK_MIN_MAX("a[bc]d", 3, 3);
294 CHECK_MIN_MAX("a|bc", 1, 2);
295 CHECK_MIN_MAX("ab|c", 1, 2);
296 CHECK_MIN_MAX("a||bc", 0, 2);
297 CHECK_MIN_MAX("|", 0, 0);
298 CHECK_MIN_MAX("(?:ab)", 2, 2);
299 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
300 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
301 CHECK_MIN_MAX("(ab)", 2, 2);
302 CHECK_MIN_MAX("(ab|cde)", 2, 3);
303 CHECK_MIN_MAX("(ab)\\1", 2, 4);
304 CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
305 CHECK_MIN_MAX("(?:ab)?", 0, 2);
306 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
307 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
308 CHECK_MIN_MAX("a?", 0, 1);
309 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
310 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
311 CHECK_MIN_MAX("a??", 0, 1);
312 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
313 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
314 CHECK_MIN_MAX("(?:a?)?", 0, 1);
315 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
316 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
317 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
318 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
319 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
320 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
321 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
322 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
323 CHECK_MIN_MAX("a{0}", 0, 0);
324 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
325 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
326 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
327 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
328 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
329 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
330 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
331 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
332 CHECK_MIN_MAX("a\\bc", 2, 2);
333 CHECK_MIN_MAX("a\\Bc", 2, 2);
334 CHECK_MIN_MAX("a\\sc", 3, 3);
335 CHECK_MIN_MAX("a\\Sc", 3, 3);
336 CHECK_MIN_MAX("a(?=b)c", 2, 2);
337 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
338 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
339}
340
341TEST(ParserRegression) {
342 CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
343 CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
344 CHECK_PARSE_EQ("{", "'{'");
345 CHECK_PARSE_EQ("a|", "(| 'a' %)");
346}
347
348static void ExpectError(const char* input,
349 const char* expected) {
350 V8::Initialize(NULL);
351 v8::HandleScope scope;
352 ZoneScope zone_scope(DELETE_ON_EXIT);
353 FlatStringReader reader(CStrVector(input));
354 RegExpCompileData result;
355 CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
356 CHECK(result.tree == NULL);
357 CHECK(!result.error.is_null());
358 SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
359 CHECK_EQ(expected, *str);
360}
361
362
363TEST(Errors) {
364 V8::Initialize(NULL);
365 const char* kEndBackslash = "\\ at end of pattern";
366 ExpectError("\\", kEndBackslash);
367 const char* kUnterminatedGroup = "Unterminated group";
368 ExpectError("(foo", kUnterminatedGroup);
369 const char* kInvalidGroup = "Invalid group";
370 ExpectError("(?", kInvalidGroup);
371 const char* kUnterminatedCharacterClass = "Unterminated character class";
372 ExpectError("[", kUnterminatedCharacterClass);
373 ExpectError("[a-", kUnterminatedCharacterClass);
374 const char* kNothingToRepeat = "Nothing to repeat";
375 ExpectError("*", kNothingToRepeat);
376 ExpectError("?", kNothingToRepeat);
377 ExpectError("+", kNothingToRepeat);
378 ExpectError("{1}", kNothingToRepeat);
379 ExpectError("{1,2}", kNothingToRepeat);
380 ExpectError("{1,}", kNothingToRepeat);
381
382 // Check that we don't allow more than kMaxCapture captures
383 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
384 const char* kTooManyCaptures = "Too many captures";
385 HeapStringAllocator allocator;
386 StringStream accumulator(&allocator);
387 for (int i = 0; i <= kMaxCaptures; i++) {
388 accumulator.Add("()");
389 }
390 SmartPointer<const char> many_captures(accumulator.ToCString());
391 ExpectError(*many_captures, kTooManyCaptures);
392}
393
394
395static bool IsDigit(uc16 c) {
396 return ('0' <= c && c <= '9');
397}
398
399
400static bool NotDigit(uc16 c) {
401 return !IsDigit(c);
402}
403
404
405static bool IsWhiteSpace(uc16 c) {
406 switch (c) {
407 case 0x09:
408 case 0x0A:
409 case 0x0B:
410 case 0x0C:
411 case 0x0d:
412 case 0x20:
413 case 0xA0:
414 case 0x2028:
415 case 0x2029:
416 return true;
417 default:
418 return unibrow::Space::Is(c);
419 }
420}
421
422
423static bool NotWhiteSpace(uc16 c) {
424 return !IsWhiteSpace(c);
425}
426
427
428static bool NotWord(uc16 c) {
429 return !IsRegExpWord(c);
430}
431
432
433static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
434 ZoneScope scope(DELETE_ON_EXIT);
435 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
436 CharacterRange::AddClassEscape(c, ranges);
437 for (unsigned i = 0; i < (1 << 16); i++) {
438 bool in_class = false;
439 for (int j = 0; !in_class && j < ranges->length(); j++) {
440 CharacterRange& range = ranges->at(j);
441 in_class = (range.from() <= i && i <= range.to());
442 }
443 CHECK_EQ(pred(i), in_class);
444 }
445}
446
447
448TEST(CharacterClassEscapes) {
449 TestCharacterClassEscapes('.', IsRegExpNewline);
450 TestCharacterClassEscapes('d', IsDigit);
451 TestCharacterClassEscapes('D', NotDigit);
452 TestCharacterClassEscapes('s', IsWhiteSpace);
453 TestCharacterClassEscapes('S', NotWhiteSpace);
454 TestCharacterClassEscapes('w', IsRegExpWord);
455 TestCharacterClassEscapes('W', NotWord);
456}
457
458
459static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
460 V8::Initialize(NULL);
461 FlatStringReader reader(CStrVector(input));
462 RegExpCompileData compile_data;
463 if (!v8::internal::ParseRegExp(&reader, multiline, &compile_data))
464 return NULL;
465 Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
466 RegExpEngine::Compile(&compile_data, false, multiline, pattern, is_ascii);
467 return compile_data.node;
468}
469
470
471static void Execute(const char* input,
472 bool multiline,
473 bool is_ascii,
474 bool dot_output = false) {
475 v8::HandleScope scope;
476 ZoneScope zone_scope(DELETE_ON_EXIT);
477 RegExpNode* node = Compile(input, multiline, is_ascii);
478 USE(node);
479#ifdef DEBUG
480 if (dot_output) {
481 RegExpEngine::DotPrint(input, node, false);
482 exit(0);
483 }
484#endif // DEBUG
485}
486
487
488class TestConfig {
489 public:
490 typedef int Key;
491 typedef int Value;
492 static const int kNoKey;
493 static const int kNoValue;
494 static inline int Compare(int a, int b) {
495 if (a < b)
496 return -1;
497 else if (a > b)
498 return 1;
499 else
500 return 0;
501 }
502};
503
504
505const int TestConfig::kNoKey = 0;
506const int TestConfig::kNoValue = 0;
507
508
509static unsigned PseudoRandom(int i, int j) {
510 return ~(~((i * 781) ^ (j * 329)));
511}
512
513
514TEST(SplayTreeSimple) {
515 static const unsigned kLimit = 1000;
516 ZoneScope zone_scope(DELETE_ON_EXIT);
517 ZoneSplayTree<TestConfig> tree;
518 bool seen[kLimit];
519 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
520#define CHECK_MAPS_EQUAL() do { \
521 for (unsigned k = 0; k < kLimit; k++) \
522 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
523 } while (false)
524 for (int i = 0; i < 50; i++) {
525 for (int j = 0; j < 50; j++) {
526 unsigned next = PseudoRandom(i, j) % kLimit;
527 if (seen[next]) {
528 // We've already seen this one. Check the value and remove
529 // it.
530 ZoneSplayTree<TestConfig>::Locator loc;
531 CHECK(tree.Find(next, &loc));
532 CHECK_EQ(next, loc.key());
533 CHECK_EQ(3 * next, loc.value());
534 tree.Remove(next);
535 seen[next] = false;
536 CHECK_MAPS_EQUAL();
537 } else {
538 // Check that it wasn't there already and then add it.
539 ZoneSplayTree<TestConfig>::Locator loc;
540 CHECK(!tree.Find(next, &loc));
541 CHECK(tree.Insert(next, &loc));
542 CHECK_EQ(next, loc.key());
543 loc.set_value(3 * next);
544 seen[next] = true;
545 CHECK_MAPS_EQUAL();
546 }
547 int val = PseudoRandom(j, i) % kLimit;
548 if (seen[val]) {
549 ZoneSplayTree<TestConfig>::Locator loc;
550 CHECK(tree.FindGreatestLessThan(val, &loc));
551 CHECK_EQ(loc.key(), val);
552 break;
553 }
554 val = PseudoRandom(i + j, i - j) % kLimit;
555 if (seen[val]) {
556 ZoneSplayTree<TestConfig>::Locator loc;
557 CHECK(tree.FindLeastGreaterThan(val, &loc));
558 CHECK_EQ(loc.key(), val);
559 break;
560 }
561 }
562 }
563}
564
565
566TEST(DispatchTableConstruction) {
567 // Initialize test data.
568 static const int kLimit = 1000;
569 static const int kRangeCount = 8;
570 static const int kRangeSize = 16;
571 uc16 ranges[kRangeCount][2 * kRangeSize];
572 for (int i = 0; i < kRangeCount; i++) {
573 Vector<uc16> range(ranges[i], 2 * kRangeSize);
574 for (int j = 0; j < 2 * kRangeSize; j++) {
575 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
576 }
577 range.Sort();
578 for (int j = 1; j < 2 * kRangeSize; j++) {
579 CHECK(range[j-1] <= range[j]);
580 }
581 }
582 // Enter test data into dispatch table.
583 ZoneScope zone_scope(DELETE_ON_EXIT);
584 DispatchTable table;
585 for (int i = 0; i < kRangeCount; i++) {
586 uc16* range = ranges[i];
587 for (int j = 0; j < 2 * kRangeSize; j += 2)
588 table.AddRange(CharacterRange(range[j], range[j + 1]), i);
589 }
590 // Check that the table looks as we would expect
591 for (int p = 0; p < kLimit; p++) {
592 OutSet* outs = table.Get(p);
593 for (int j = 0; j < kRangeCount; j++) {
594 uc16* range = ranges[j];
595 bool is_on = false;
596 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
597 is_on = (range[k] <= p && p <= range[k + 1]);
598 CHECK_EQ(is_on, outs->Get(j));
599 }
600 }
601}
602
603
604// Tests of interpreter.
605
606
607#ifdef V8_NATIVE_REGEXP
608
609#if V8_TARGET_ARCH_IA32
610typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
611#elif V8_TARGET_ARCH_X64
612typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
613#elif V8_TARGET_ARCH_ARM
614typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
615#endif
616
617class ContextInitializer {
618 public:
619 ContextInitializer()
620 : env_(), scope_(), zone_(DELETE_ON_EXIT), stack_guard_() {
621 env_ = v8::Context::New();
622 env_->Enter();
623 }
624 ~ContextInitializer() {
625 env_->Exit();
626 env_.Dispose();
627 }
628 private:
629 v8::Persistent<v8::Context> env_;
630 v8::HandleScope scope_;
631 v8::internal::ZoneScope zone_;
632 v8::internal::StackGuard stack_guard_;
633};
634
635
636static ArchRegExpMacroAssembler::Result Execute(Code* code,
637 String* input,
638 int start_offset,
639 const byte* input_start,
640 const byte* input_end,
641 int* captures,
642 bool at_start) {
643 return NativeRegExpMacroAssembler::Execute(
644 code,
645 input,
646 start_offset,
647 input_start,
648 input_end,
649 captures,
650 at_start);
651}
652
653
654TEST(MacroAssemblerNativeSuccess) {
655 v8::V8::Initialize();
656 ContextInitializer initializer;
657
658 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
659
660 m.Succeed();
661
662 Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
663 Handle<Object> code_object = m.GetCode(source);
664 Handle<Code> code = Handle<Code>::cast(code_object);
665
666 int captures[4] = {42, 37, 87, 117};
667 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
668 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
669 const byte* start_adr =
670 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
671
672 NativeRegExpMacroAssembler::Result result =
673 Execute(*code,
674 *input,
675 0,
676 start_adr,
677 start_adr + seq_input->length(),
678 captures,
679 true);
680
681 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
682 CHECK_EQ(-1, captures[0]);
683 CHECK_EQ(-1, captures[1]);
684 CHECK_EQ(-1, captures[2]);
685 CHECK_EQ(-1, captures[3]);
686}
687
688
689TEST(MacroAssemblerNativeSimple) {
690 v8::V8::Initialize();
691 ContextInitializer initializer;
692
693 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
694
695 uc16 foo_chars[3] = {'f', 'o', 'o'};
696 Vector<const uc16> foo(foo_chars, 3);
697
698 Label fail;
699 m.CheckCharacters(foo, 0, &fail, true);
700 m.WriteCurrentPositionToRegister(0, 0);
701 m.AdvanceCurrentPosition(3);
702 m.WriteCurrentPositionToRegister(1, 0);
703 m.Succeed();
704 m.Bind(&fail);
705 m.Fail();
706
707 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
708 Handle<Object> code_object = m.GetCode(source);
709 Handle<Code> code = Handle<Code>::cast(code_object);
710
711 int captures[4] = {42, 37, 87, 117};
712 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
713 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
714 Address start_adr = seq_input->GetCharsAddress();
715
716 NativeRegExpMacroAssembler::Result result =
717 Execute(*code,
718 *input,
719 0,
720 start_adr,
721 start_adr + input->length(),
722 captures,
723 true);
724
725 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
726 CHECK_EQ(0, captures[0]);
727 CHECK_EQ(3, captures[1]);
728 CHECK_EQ(-1, captures[2]);
729 CHECK_EQ(-1, captures[3]);
730
731 input = Factory::NewStringFromAscii(CStrVector("barbarbar"));
732 seq_input = Handle<SeqAsciiString>::cast(input);
733 start_adr = seq_input->GetCharsAddress();
734
735 result = Execute(*code,
736 *input,
737 0,
738 start_adr,
739 start_adr + input->length(),
740 captures,
741 true);
742
743 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
744}
745
746
747TEST(MacroAssemblerNativeSimpleUC16) {
748 v8::V8::Initialize();
749 ContextInitializer initializer;
750
751 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4);
752
753 uc16 foo_chars[3] = {'f', 'o', 'o'};
754 Vector<const uc16> foo(foo_chars, 3);
755
756 Label fail;
757 m.CheckCharacters(foo, 0, &fail, true);
758 m.WriteCurrentPositionToRegister(0, 0);
759 m.AdvanceCurrentPosition(3);
760 m.WriteCurrentPositionToRegister(1, 0);
761 m.Succeed();
762 m.Bind(&fail);
763 m.Fail();
764
765 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
766 Handle<Object> code_object = m.GetCode(source);
767 Handle<Code> code = Handle<Code>::cast(code_object);
768
769 int captures[4] = {42, 37, 87, 117};
770 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', '\xa0'};
771 Handle<String> input =
772 Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
773 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
774 Address start_adr = seq_input->GetCharsAddress();
775
776 NativeRegExpMacroAssembler::Result result =
777 Execute(*code,
778 *input,
779 0,
780 start_adr,
781 start_adr + input->length(),
782 captures,
783 true);
784
785 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
786 CHECK_EQ(0, captures[0]);
787 CHECK_EQ(3, captures[1]);
788 CHECK_EQ(-1, captures[2]);
789 CHECK_EQ(-1, captures[3]);
790
791 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', '\xa0'};
792 input = Factory::NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
793 seq_input = Handle<SeqTwoByteString>::cast(input);
794 start_adr = seq_input->GetCharsAddress();
795
796 result = Execute(*code,
797 *input,
798 0,
799 start_adr,
800 start_adr + input->length() * 2,
801 captures,
802 true);
803
804 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
805}
806
807
808TEST(MacroAssemblerNativeBacktrack) {
809 v8::V8::Initialize();
810 ContextInitializer initializer;
811
812 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
813
814 Label fail;
815 Label backtrack;
816 m.LoadCurrentCharacter(10, &fail);
817 m.Succeed();
818 m.Bind(&fail);
819 m.PushBacktrack(&backtrack);
820 m.LoadCurrentCharacter(10, NULL);
821 m.Succeed();
822 m.Bind(&backtrack);
823 m.Fail();
824
825 Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
826 Handle<Object> code_object = m.GetCode(source);
827 Handle<Code> code = Handle<Code>::cast(code_object);
828
829 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
830 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
831 Address start_adr = seq_input->GetCharsAddress();
832
833 NativeRegExpMacroAssembler::Result result =
834 Execute(*code,
835 *input,
836 0,
837 start_adr,
838 start_adr + input->length(),
839 NULL,
840 true);
841
842 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
843}
844
845
846TEST(MacroAssemblerNativeBackReferenceASCII) {
847 v8::V8::Initialize();
848 ContextInitializer initializer;
849
850 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
851
852 m.WriteCurrentPositionToRegister(0, 0);
853 m.AdvanceCurrentPosition(2);
854 m.WriteCurrentPositionToRegister(1, 0);
855 Label nomatch;
856 m.CheckNotBackReference(0, &nomatch);
857 m.Fail();
858 m.Bind(&nomatch);
859 m.AdvanceCurrentPosition(2);
860 Label missing_match;
861 m.CheckNotBackReference(0, &missing_match);
862 m.WriteCurrentPositionToRegister(2, 0);
863 m.Succeed();
864 m.Bind(&missing_match);
865 m.Fail();
866
867 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
868 Handle<Object> code_object = m.GetCode(source);
869 Handle<Code> code = Handle<Code>::cast(code_object);
870
871 Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
872 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
873 Address start_adr = seq_input->GetCharsAddress();
874
875 int output[4];
876 NativeRegExpMacroAssembler::Result result =
877 Execute(*code,
878 *input,
879 0,
880 start_adr,
881 start_adr + input->length(),
882 output,
883 true);
884
885 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
886 CHECK_EQ(0, output[0]);
887 CHECK_EQ(2, output[1]);
888 CHECK_EQ(6, output[2]);
889 CHECK_EQ(-1, output[3]);
890}
891
892
893TEST(MacroAssemblerNativeBackReferenceUC16) {
894 v8::V8::Initialize();
895 ContextInitializer initializer;
896
897 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4);
898
899 m.WriteCurrentPositionToRegister(0, 0);
900 m.AdvanceCurrentPosition(2);
901 m.WriteCurrentPositionToRegister(1, 0);
902 Label nomatch;
903 m.CheckNotBackReference(0, &nomatch);
904 m.Fail();
905 m.Bind(&nomatch);
906 m.AdvanceCurrentPosition(2);
907 Label missing_match;
908 m.CheckNotBackReference(0, &missing_match);
909 m.WriteCurrentPositionToRegister(2, 0);
910 m.Succeed();
911 m.Bind(&missing_match);
912 m.Fail();
913
914 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
915 Handle<Object> code_object = m.GetCode(source);
916 Handle<Code> code = Handle<Code>::cast(code_object);
917
918 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
919 Handle<String> input =
920 Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
921 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
922 Address start_adr = seq_input->GetCharsAddress();
923
924 int output[4];
925 NativeRegExpMacroAssembler::Result result =
926 Execute(*code,
927 *input,
928 0,
929 start_adr,
930 start_adr + input->length() * 2,
931 output,
932 true);
933
934 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
935 CHECK_EQ(0, output[0]);
936 CHECK_EQ(2, output[1]);
937 CHECK_EQ(6, output[2]);
938 CHECK_EQ(-1, output[3]);
939}
940
941
942
943TEST(MacroAssemblernativeAtStart) {
944 v8::V8::Initialize();
945 ContextInitializer initializer;
946
947 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
948
949 Label not_at_start, newline, fail;
950 m.CheckNotAtStart(&not_at_start);
951 // Check that prevchar = '\n' and current = 'f'.
952 m.CheckCharacter('\n', &newline);
953 m.Bind(&fail);
954 m.Fail();
955 m.Bind(&newline);
956 m.LoadCurrentCharacter(0, &fail);
957 m.CheckNotCharacter('f', &fail);
958 m.Succeed();
959
960 m.Bind(&not_at_start);
961 // Check that prevchar = 'o' and current = 'b'.
962 Label prevo;
963 m.CheckCharacter('o', &prevo);
964 m.Fail();
965 m.Bind(&prevo);
966 m.LoadCurrentCharacter(0, &fail);
967 m.CheckNotCharacter('b', &fail);
968 m.Succeed();
969
970 Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
971 Handle<Object> code_object = m.GetCode(source);
972 Handle<Code> code = Handle<Code>::cast(code_object);
973
974 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
975 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
976 Address start_adr = seq_input->GetCharsAddress();
977
978 NativeRegExpMacroAssembler::Result result =
979 Execute(*code,
980 *input,
981 0,
982 start_adr,
983 start_adr + input->length(),
984 NULL,
985 true);
986
987 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
988
989 result = Execute(*code,
990 *input,
991 3,
992 start_adr + 3,
993 start_adr + input->length(),
994 NULL,
995 false);
996
997 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
998}
999
1000
1001TEST(MacroAssemblerNativeBackRefNoCase) {
1002 v8::V8::Initialize();
1003 ContextInitializer initializer;
1004
1005 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4);
1006
1007 Label fail, succ;
1008
1009 m.WriteCurrentPositionToRegister(0, 0);
1010 m.WriteCurrentPositionToRegister(2, 0);
1011 m.AdvanceCurrentPosition(3);
1012 m.WriteCurrentPositionToRegister(3, 0);
1013 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
1014 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
1015 Label expected_fail;
1016 m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1017 m.Bind(&fail);
1018 m.Fail();
1019
1020 m.Bind(&expected_fail);
1021 m.AdvanceCurrentPosition(3); // Skip "xYz"
1022 m.CheckNotBackReferenceIgnoreCase(2, &succ);
1023 m.Fail();
1024
1025 m.Bind(&succ);
1026 m.WriteCurrentPositionToRegister(1, 0);
1027 m.Succeed();
1028
1029 Handle<String> source =
1030 Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1031 Handle<Object> code_object = m.GetCode(source);
1032 Handle<Code> code = Handle<Code>::cast(code_object);
1033
1034 Handle<String> input =
1035 Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1036 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1037 Address start_adr = seq_input->GetCharsAddress();
1038
1039 int output[4];
1040 NativeRegExpMacroAssembler::Result result =
1041 Execute(*code,
1042 *input,
1043 0,
1044 start_adr,
1045 start_adr + input->length(),
1046 output,
1047 true);
1048
1049 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1050 CHECK_EQ(0, output[0]);
1051 CHECK_EQ(12, output[1]);
1052 CHECK_EQ(0, output[2]);
1053 CHECK_EQ(3, output[3]);
1054}
1055
1056
1057
1058TEST(MacroAssemblerNativeRegisters) {
1059 v8::V8::Initialize();
1060 ContextInitializer initializer;
1061
1062 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6);
1063
1064 uc16 foo_chars[3] = {'f', 'o', 'o'};
1065 Vector<const uc16> foo(foo_chars, 3);
1066
1067 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1068 Label fail;
1069 Label backtrack;
1070 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1071 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1072 m.PushBacktrack(&backtrack);
1073 m.WriteStackPointerToRegister(sp);
1074 // Fill stack and registers
1075 m.AdvanceCurrentPosition(2);
1076 m.WriteCurrentPositionToRegister(out1, 0);
1077 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1078 m.PushBacktrack(&fail);
1079 // Drop backtrack stack frames.
1080 m.ReadStackPointerFromRegister(sp);
1081 // And take the first backtrack (to &backtrack)
1082 m.Backtrack();
1083
1084 m.PushCurrentPosition();
1085 m.AdvanceCurrentPosition(2);
1086 m.PopCurrentPosition();
1087
1088 m.Bind(&backtrack);
1089 m.PopRegister(out1);
1090 m.ReadCurrentPositionFromRegister(out1);
1091 m.AdvanceCurrentPosition(3);
1092 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1093
1094 Label loop;
1095 m.SetRegister(loop_cnt, 0); // loop counter
1096 m.Bind(&loop);
1097 m.AdvanceRegister(loop_cnt, 1);
1098 m.AdvanceCurrentPosition(1);
1099 m.IfRegisterLT(loop_cnt, 3, &loop);
1100 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1101
1102 Label loop2;
1103 m.SetRegister(loop_cnt, 2); // loop counter
1104 m.Bind(&loop2);
1105 m.AdvanceRegister(loop_cnt, -1);
1106 m.AdvanceCurrentPosition(1);
1107 m.IfRegisterGE(loop_cnt, 0, &loop2);
1108 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1109
1110 Label loop3;
1111 Label exit_loop3;
1112 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1113 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1114 m.ReadCurrentPositionFromRegister(out3);
1115 m.Bind(&loop3);
1116 m.AdvanceCurrentPosition(1);
1117 m.CheckGreedyLoop(&exit_loop3);
1118 m.GoTo(&loop3);
1119 m.Bind(&exit_loop3);
1120 m.PopCurrentPosition();
1121 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1122
1123 m.Succeed();
1124
1125 m.Bind(&fail);
1126 m.Fail();
1127
1128 Handle<String> source =
1129 Factory::NewStringFromAscii(CStrVector("<loop test>"));
1130 Handle<Object> code_object = m.GetCode(source);
1131 Handle<Code> code = Handle<Code>::cast(code_object);
1132
1133 // String long enough for test (content doesn't matter).
1134 Handle<String> input =
1135 Factory::NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1136 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1137 Address start_adr = seq_input->GetCharsAddress();
1138
1139 int output[6];
1140 NativeRegExpMacroAssembler::Result result =
1141 Execute(*code,
1142 *input,
1143 0,
1144 start_adr,
1145 start_adr + input->length(),
1146 output,
1147 true);
1148
1149 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1150 CHECK_EQ(0, output[0]);
1151 CHECK_EQ(3, output[1]);
1152 CHECK_EQ(6, output[2]);
1153 CHECK_EQ(9, output[3]);
1154 CHECK_EQ(9, output[4]);
1155 CHECK_EQ(-1, output[5]);
1156}
1157
1158
1159TEST(MacroAssemblerStackOverflow) {
1160 v8::V8::Initialize();
1161 ContextInitializer initializer;
1162
1163 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0);
1164
1165 Label loop;
1166 m.Bind(&loop);
1167 m.PushBacktrack(&loop);
1168 m.GoTo(&loop);
1169
1170 Handle<String> source =
1171 Factory::NewStringFromAscii(CStrVector("<stack overflow test>"));
1172 Handle<Object> code_object = m.GetCode(source);
1173 Handle<Code> code = Handle<Code>::cast(code_object);
1174
1175 // String long enough for test (content doesn't matter).
1176 Handle<String> input =
1177 Factory::NewStringFromAscii(CStrVector("dummy"));
1178 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1179 Address start_adr = seq_input->GetCharsAddress();
1180
1181 NativeRegExpMacroAssembler::Result result =
1182 Execute(*code,
1183 *input,
1184 0,
1185 start_adr,
1186 start_adr + input->length(),
1187 NULL,
1188 true);
1189
1190 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1191 CHECK(Top::has_pending_exception());
1192 Top::clear_pending_exception();
1193}
1194
1195
1196TEST(MacroAssemblerNativeLotsOfRegisters) {
1197 v8::V8::Initialize();
1198 ContextInitializer initializer;
1199
1200 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2);
1201
1202 // At least 2048, to ensure the allocated space for registers
1203 // span one full page.
1204 const int large_number = 8000;
1205 m.WriteCurrentPositionToRegister(large_number, 42);
1206 m.WriteCurrentPositionToRegister(0, 0);
1207 m.WriteCurrentPositionToRegister(1, 1);
1208 Label done;
1209 m.CheckNotBackReference(0, &done); // Performs a system-stack push.
1210 m.Bind(&done);
1211 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1212 m.PopRegister(1);
1213 m.Succeed();
1214
1215 Handle<String> source =
1216 Factory::NewStringFromAscii(CStrVector("<huge register space test>"));
1217 Handle<Object> code_object = m.GetCode(source);
1218 Handle<Code> code = Handle<Code>::cast(code_object);
1219
1220 // String long enough for test (content doesn't matter).
1221 Handle<String> input =
1222 Factory::NewStringFromAscii(CStrVector("sample text"));
1223 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1224 Address start_adr = seq_input->GetCharsAddress();
1225
1226 int captures[2];
1227 NativeRegExpMacroAssembler::Result result =
1228 Execute(*code,
1229 *input,
1230 0,
1231 start_adr,
1232 start_adr + input->length(),
1233 captures,
1234 true);
1235
1236 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1237 CHECK_EQ(0, captures[0]);
1238 CHECK_EQ(42, captures[1]);
1239
1240 Top::clear_pending_exception();
1241}
1242
1243#else // ! V8_REGEX_NATIVE
1244
1245TEST(MacroAssembler) {
1246 V8::Initialize(NULL);
1247 byte codes[1024];
1248 RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
1249 // ^f(o)o.
1250 Label fail, fail2, start;
1251 uc16 foo_chars[3];
1252 foo_chars[0] = 'f';
1253 foo_chars[1] = 'o';
1254 foo_chars[2] = 'o';
1255 Vector<const uc16> foo(foo_chars, 3);
1256 m.SetRegister(4, 42);
1257 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1258 m.AdvanceRegister(4, 42);
1259 m.GoTo(&start);
1260 m.Fail();
1261 m.Bind(&start);
1262 m.PushBacktrack(&fail2);
1263 m.CheckCharacters(foo, 0, &fail, true);
1264 m.WriteCurrentPositionToRegister(0, 0);
1265 m.PushCurrentPosition();
1266 m.AdvanceCurrentPosition(3);
1267 m.WriteCurrentPositionToRegister(1, 0);
1268 m.PopCurrentPosition();
1269 m.AdvanceCurrentPosition(1);
1270 m.WriteCurrentPositionToRegister(2, 0);
1271 m.AdvanceCurrentPosition(1);
1272 m.WriteCurrentPositionToRegister(3, 0);
1273 m.Succeed();
1274
1275 m.Bind(&fail);
1276 m.Backtrack();
1277 m.Succeed();
1278
1279 m.Bind(&fail2);
1280 m.PopRegister(0);
1281 m.Fail();
1282
1283 v8::HandleScope scope;
1284
1285 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
1286 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1287 int captures[5];
1288
1289 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1290 Handle<String> f1_16 =
1291 Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
1292
1293 CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
1294 CHECK_EQ(0, captures[0]);
1295 CHECK_EQ(3, captures[1]);
1296 CHECK_EQ(1, captures[2]);
1297 CHECK_EQ(2, captures[3]);
1298 CHECK_EQ(84, captures[4]);
1299
1300 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1301 Handle<String> f2_16 =
1302 Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
1303
1304 CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
1305 CHECK_EQ(42, captures[0]);
1306}
1307
1308#endif // ! V8_REGEXP_NATIVE
1309
1310
1311TEST(AddInverseToTable) {
1312 static const int kLimit = 1000;
1313 static const int kRangeCount = 16;
1314 for (int t = 0; t < 10; t++) {
1315 ZoneScope zone_scope(DELETE_ON_EXIT);
1316 ZoneList<CharacterRange>* ranges =
1317 new ZoneList<CharacterRange>(kRangeCount);
1318 for (int i = 0; i < kRangeCount; i++) {
1319 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1320 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1321 if (to > kLimit) to = kLimit;
1322 ranges->Add(CharacterRange(from, to));
1323 }
1324 DispatchTable table;
1325 DispatchTableConstructor cons(&table, false);
1326 cons.set_choice_index(0);
1327 cons.AddInverse(ranges);
1328 for (int i = 0; i < kLimit; i++) {
1329 bool is_on = false;
1330 for (int j = 0; !is_on && j < kRangeCount; j++)
1331 is_on = ranges->at(j).Contains(i);
1332 OutSet* set = table.Get(i);
1333 CHECK_EQ(is_on, set->Get(0) == false);
1334 }
1335 }
1336 ZoneScope zone_scope(DELETE_ON_EXIT);
1337 ZoneList<CharacterRange>* ranges =
1338 new ZoneList<CharacterRange>(1);
1339 ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
1340 DispatchTable table;
1341 DispatchTableConstructor cons(&table, false);
1342 cons.set_choice_index(0);
1343 cons.AddInverse(ranges);
1344 CHECK(!table.Get(0xFFFE)->Get(0));
1345 CHECK(table.Get(0xFFFF)->Get(0));
1346}
1347
1348
1349static uc32 canonicalize(uc32 c) {
1350 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1351 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1352 if (count == 0) {
1353 return c;
1354 } else {
1355 CHECK_EQ(1, count);
1356 return canon[0];
1357 }
1358}
1359
1360
1361TEST(LatinCanonicalize) {
1362 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1363 for (char lower = 'a'; lower <= 'z'; lower++) {
1364 char upper = lower + ('A' - 'a');
1365 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1366 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1367 int length = un_canonicalize.get(lower, '\0', uncanon);
1368 CHECK_EQ(2, length);
1369 CHECK_EQ(upper, uncanon[0]);
1370 CHECK_EQ(lower, uncanon[1]);
1371 }
1372 for (uc32 c = 128; c < (1 << 21); c++)
1373 CHECK_GE(canonicalize(c), 128);
1374 unibrow::Mapping<unibrow::ToUppercase> to_upper;
1375 for (uc32 c = 0; c < (1 << 21); c++) {
1376 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1377 int length = to_upper.get(c, '\0', upper);
1378 if (length == 0) {
1379 length = 1;
1380 upper[0] = c;
1381 }
1382 uc32 u = upper[0];
1383 if (length > 1 || (c >= 128 && u < 128))
1384 u = c;
1385 CHECK_EQ(u, canonicalize(c));
1386 }
1387}
1388
1389
1390static uc32 CanonRange(uc32 c) {
1391 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1392 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1393 if (count == 0) {
1394 return c;
1395 } else {
1396 CHECK_EQ(1, count);
1397 return canon[0];
1398 }
1399}
1400
1401
1402TEST(RangeCanonicalization) {
1403 CHECK_NE(CanonRange(0) & CharacterRange::kStartMarker, 0);
1404 // Check that we arrive at the same result when using the basic
1405 // range canonicalization primitives as when using immediate
1406 // canonicalization.
1407 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1408 for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
1409 int range = CanonRange(i);
1410 int indirect_length = 0;
1411 unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1412 if ((range & CharacterRange::kStartMarker) == 0) {
1413 indirect_length = un_canonicalize.get(i - range, '\0', indirect);
1414 for (int i = 0; i < indirect_length; i++)
1415 indirect[i] += range;
1416 } else {
1417 indirect_length = un_canonicalize.get(i, '\0', indirect);
1418 }
1419 unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1420 int direct_length = un_canonicalize.get(i, '\0', direct);
1421 CHECK_EQ(direct_length, indirect_length);
1422 }
1423 // Check that we arrive at the same results when skipping over
1424 // canonicalization ranges.
1425 int next_block = 0;
1426 while (next_block < CharacterRange::kRangeCanonicalizeMax) {
1427 uc32 start = CanonRange(next_block);
1428 CHECK_NE((start & CharacterRange::kStartMarker), 0);
1429 unsigned dist = start & CharacterRange::kPayloadMask;
1430 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1431 int first_length = un_canonicalize.get(next_block, '\0', first);
1432 for (unsigned i = 1; i < dist; i++) {
1433 CHECK_EQ(i, CanonRange(next_block + i));
1434 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1435 int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
1436 CHECK_EQ(first_length, succ_length);
1437 for (int j = 0; j < succ_length; j++) {
1438 int calc = first[j] + i;
1439 int found = succ[j];
1440 CHECK_EQ(calc, found);
1441 }
1442 }
1443 next_block = next_block + dist;
1444 }
1445}
1446
1447
1448TEST(UncanonicalizeEquivalence) {
1449 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1450 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1451 for (int i = 0; i < (1 << 16); i++) {
1452 int length = un_canonicalize.get(i, '\0', chars);
1453 for (int j = 0; j < length; j++) {
1454 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1455 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1456 CHECK_EQ(length, length2);
1457 for (int k = 0; k < length; k++)
1458 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1459 }
1460 }
1461}
1462
1463
1464static void TestRangeCaseIndependence(CharacterRange input,
1465 Vector<CharacterRange> expected) {
1466 ZoneScope zone_scope(DELETE_ON_EXIT);
1467 int count = expected.length();
1468 ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
1469 input.AddCaseEquivalents(list);
1470 CHECK_EQ(count, list->length());
1471 for (int i = 0; i < list->length(); i++) {
1472 CHECK_EQ(expected[i].from(), list->at(i).from());
1473 CHECK_EQ(expected[i].to(), list->at(i).to());
1474 }
1475}
1476
1477
1478static void TestSimpleRangeCaseIndependence(CharacterRange input,
1479 CharacterRange expected) {
1480 EmbeddedVector<CharacterRange, 1> vector;
1481 vector[0] = expected;
1482 TestRangeCaseIndependence(input, vector);
1483}
1484
1485
1486TEST(CharacterRangeCaseIndependence) {
1487 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1488 CharacterRange::Singleton('A'));
1489 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1490 CharacterRange::Singleton('Z'));
1491 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1492 CharacterRange('A', 'Z'));
1493 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1494 CharacterRange('C', 'F'));
1495 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1496 CharacterRange('A', 'B'));
1497 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1498 CharacterRange('Y', 'Z'));
1499 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1500 CharacterRange('A', 'Z'));
1501 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1502 CharacterRange('a', 'z'));
1503 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1504 CharacterRange('c', 'f'));
1505 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1506 CharacterRange('a', 'z'));
1507 // Here we need to add [l-z] to complete the case independence of
1508 // [A-Za-z] but we expect [a-z] to be added since we always add a
1509 // whole block at a time.
1510 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1511 CharacterRange('a', 'z'));
1512}
1513
1514
1515static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1516 if (ranges == NULL)
1517 return false;
1518 for (int i = 0; i < ranges->length(); i++) {
1519 CharacterRange range = ranges->at(i);
1520 if (range.from() <= c && c <= range.to())
1521 return true;
1522 }
1523 return false;
1524}
1525
1526
1527TEST(CharClassDifference) {
1528 ZoneScope zone_scope(DELETE_ON_EXIT);
1529 ZoneList<CharacterRange>* base = new ZoneList<CharacterRange>(1);
1530 base->Add(CharacterRange::Everything());
1531 Vector<const uc16> overlay = CharacterRange::GetWordBounds();
1532 ZoneList<CharacterRange>* included = NULL;
1533 ZoneList<CharacterRange>* excluded = NULL;
1534 CharacterRange::Split(base, overlay, &included, &excluded);
1535 for (int i = 0; i < (1 << 16); i++) {
1536 bool in_base = InClass(i, base);
1537 if (in_base) {
1538 bool in_overlay = false;
1539 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1540 if (overlay[j] <= i && i <= overlay[j+1])
1541 in_overlay = true;
1542 }
1543 CHECK_EQ(in_overlay, InClass(i, included));
1544 CHECK_EQ(!in_overlay, InClass(i, excluded));
1545 } else {
1546 CHECK(!InClass(i, included));
1547 CHECK(!InClass(i, excluded));
1548 }
1549 }
1550}
1551
1552
1553TEST(Graph) {
1554 V8::Initialize(NULL);
1555 Execute("(?:(?:x(.))?\1)+$", false, true, true);
1556}