blob: 4a572c8160d4cde8b50d76e3082c9fdaa75ce186 [file] [log] [blame]
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001// Copyright 2012 the V8 project authors. All rights reserved.
Steve Blocka7e24c12009-10-30 11:49:00 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
Emily Bernierd0a1eb72015-03-24 16:35:39 -040028#include <cstdlib>
29#include <sstream>
Steve Blocka7e24c12009-10-30 11:49:00 +000030
Ben Murdochb8a8cc12014-11-26 15:28:44 +000031#include "src/v8.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000032
Ben Murdochb8a8cc12014-11-26 15:28:44 +000033#include "src/ast.h"
34#include "src/char-predicates-inl.h"
35#include "src/jsregexp.h"
36#include "src/ostreams.h"
37#include "src/parser.h"
38#include "src/regexp-macro-assembler.h"
39#include "src/regexp-macro-assembler-irregexp.h"
40#include "src/string-stream.h"
41#include "src/zone-inl.h"
Steve Block6ded16b2010-05-10 14:33:55 +010042#ifdef V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000043#include "src/interpreter-irregexp.h"
Steve Block6ded16b2010-05-10 14:33:55 +010044#else // V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000045#include "src/macro-assembler.h"
46#if V8_TARGET_ARCH_ARM
47#include "src/arm/assembler-arm.h" // NOLINT
48#include "src/arm/macro-assembler-arm.h"
49#include "src/arm/regexp-macro-assembler-arm.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000050#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000051#if V8_TARGET_ARCH_ARM64
52#include "src/arm64/assembler-arm64.h"
53#include "src/arm64/macro-assembler-arm64.h"
54#include "src/arm64/regexp-macro-assembler-arm64.h"
Steve Block44f0eee2011-05-26 01:26:41 +010055#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000056#if V8_TARGET_ARCH_MIPS
57#include "src/mips/assembler-mips.h"
58#include "src/mips/macro-assembler-mips.h"
59#include "src/mips/regexp-macro-assembler-mips.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000060#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000061#if V8_TARGET_ARCH_MIPS64
62#include "src/mips64/assembler-mips64.h"
63#include "src/mips64/macro-assembler-mips64.h"
64#include "src/mips64/regexp-macro-assembler-mips64.h"
65#endif
66#if V8_TARGET_ARCH_X64
67#include "src/x64/assembler-x64.h"
68#include "src/x64/macro-assembler-x64.h"
69#include "src/x64/regexp-macro-assembler-x64.h"
70#endif
71#if V8_TARGET_ARCH_IA32
72#include "src/ia32/assembler-ia32.h"
73#include "src/ia32/macro-assembler-ia32.h"
74#include "src/ia32/regexp-macro-assembler-ia32.h"
75#endif
76#if V8_TARGET_ARCH_X87
77#include "src/x87/assembler-x87.h"
78#include "src/x87/macro-assembler-x87.h"
79#include "src/x87/regexp-macro-assembler-x87.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000080#endif
Steve Block6ded16b2010-05-10 14:33:55 +010081#endif // V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000082#include "test/cctest/cctest.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000083
84using namespace v8::internal;
85
86
Leon Clarkee46be812010-01-19 14:06:41 +000087static bool CheckParse(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +000088 v8::HandleScope scope(CcTest::isolate());
89 Zone zone(CcTest::i_isolate());
90 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Leon Clarkee46be812010-01-19 14:06:41 +000091 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +000092 return v8::internal::RegExpParser::ParseRegExp(
93 &reader, false, &result, &zone);
Leon Clarkee46be812010-01-19 14:06:41 +000094}
95
96
Ben Murdochb8a8cc12014-11-26 15:28:44 +000097static void CheckParseEq(const char* input, const char* expected) {
98 v8::HandleScope scope(CcTest::isolate());
99 Zone zone(CcTest::i_isolate());
100 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000101 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000102 CHECK(v8::internal::RegExpParser::ParseRegExp(
103 &reader, false, &result, &zone));
Steve Blocka7e24c12009-10-30 11:49:00 +0000104 CHECK(result.tree != NULL);
105 CHECK(result.error.is_null());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400106 std::ostringstream os;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000107 result.tree->Print(os, &zone);
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400108 CHECK_EQ(expected, os.str().c_str());
Steve Blocka7e24c12009-10-30 11:49:00 +0000109}
110
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000111
Steve Blocka7e24c12009-10-30 11:49:00 +0000112static bool CheckSimple(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000113 v8::HandleScope scope(CcTest::isolate());
114 Zone zone(CcTest::i_isolate());
115 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000116 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000117 CHECK(v8::internal::RegExpParser::ParseRegExp(
118 &reader, false, &result, &zone));
Steve Blocka7e24c12009-10-30 11:49:00 +0000119 CHECK(result.tree != NULL);
120 CHECK(result.error.is_null());
121 return result.simple;
122}
123
124struct MinMaxPair {
125 int min_match;
126 int max_match;
127};
128
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000129
Steve Blocka7e24c12009-10-30 11:49:00 +0000130static MinMaxPair CheckMinMaxMatch(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000131 v8::HandleScope scope(CcTest::isolate());
132 Zone zone(CcTest::i_isolate());
133 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000134 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000135 CHECK(v8::internal::RegExpParser::ParseRegExp(
136 &reader, false, &result, &zone));
Steve Blocka7e24c12009-10-30 11:49:00 +0000137 CHECK(result.tree != NULL);
138 CHECK(result.error.is_null());
139 int min_match = result.tree->min_match();
140 int max_match = result.tree->max_match();
141 MinMaxPair pair = { min_match, max_match };
142 return pair;
143}
144
145
Leon Clarkee46be812010-01-19 14:06:41 +0000146#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
Steve Blocka7e24c12009-10-30 11:49:00 +0000147#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
148#define CHECK_MIN_MAX(input, min, max) \
149 { MinMaxPair min_max = CheckMinMaxMatch(input); \
150 CHECK_EQ(min, min_max.min_match); \
151 CHECK_EQ(max, min_max.max_match); \
152 }
153
154TEST(Parser) {
Leon Clarkee46be812010-01-19 14:06:41 +0000155 CHECK_PARSE_ERROR("?");
156
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000157 CheckParseEq("abc", "'abc'");
158 CheckParseEq("", "%");
159 CheckParseEq("abc|def", "(| 'abc' 'def')");
160 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
161 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
162 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
163 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
164 CheckParseEq("a*", "(# 0 - g 'a')");
165 CheckParseEq("a*?", "(# 0 - n 'a')");
166 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
167 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
168 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
169 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
170 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
171 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
172 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
173 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
174 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
175 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
176 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
177 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
178 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
179 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
180 CheckParseEq("(?:foo)", "'foo'");
181 CheckParseEq("(?: foo )", "' foo '");
182 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
183 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
184 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
185 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
186 CheckParseEq("()", "(^ %)");
187 CheckParseEq("(?=)", "(-> + %)");
188 CheckParseEq("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
189 CheckParseEq("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
190 CheckParseEq("[x]", "[x]");
191 CheckParseEq("[xyz]", "[x y z]");
192 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
193 CheckParseEq("[-123]", "[- 1 2 3]");
194 CheckParseEq("[^123]", "^[1 2 3]");
195 CheckParseEq("]", "']'");
196 CheckParseEq("}", "'}'");
197 CheckParseEq("[a-b-c]", "[a-b - c]");
198 CheckParseEq("[\\d]", "[0-9]");
199 CheckParseEq("[x\\dz]", "[x 0-9 z]");
200 CheckParseEq("[\\d-z]", "[0-9 - z]");
201 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
202 CheckParseEq("[z-\\d]", "[z - 0-9]");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100203 // Control character outside character class.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000204 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
205 CheckParseEq("\\c!", "'\\c!'");
206 CheckParseEq("\\c_", "'\\c_'");
207 CheckParseEq("\\c~", "'\\c~'");
208 CheckParseEq("\\c1", "'\\c1'");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100209 // Control character inside character class.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000210 CheckParseEq("[\\c!]", "[\\ c !]");
211 CheckParseEq("[\\c_]", "[\\x1f]");
212 CheckParseEq("[\\c~]", "[\\ c ~]");
213 CheckParseEq("[\\ca]", "[\\x01]");
214 CheckParseEq("[\\cz]", "[\\x1a]");
215 CheckParseEq("[\\cA]", "[\\x01]");
216 CheckParseEq("[\\cZ]", "[\\x1a]");
217 CheckParseEq("[\\c1]", "[\\x11]");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100218
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000219 CheckParseEq("[a\\]c]", "[a ] c]");
220 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
221 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
222 CheckParseEq("\\0", "'\\x00'");
223 CheckParseEq("\\8", "'8'");
224 CheckParseEq("\\9", "'9'");
225 CheckParseEq("\\11", "'\\x09'");
226 CheckParseEq("\\11a", "'\\x09a'");
227 CheckParseEq("\\011", "'\\x09'");
228 CheckParseEq("\\00011", "'\\x0011'");
229 CheckParseEq("\\118", "'\\x098'");
230 CheckParseEq("\\111", "'I'");
231 CheckParseEq("\\1111", "'I1'");
232 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
233 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
234 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
235 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
236 CheckParseEq("(x)(x)(x)\\1*",
237 "(: (^ 'x') (^ 'x') (^ 'x')"
238 " (# 0 - g (<- 1)))");
239 CheckParseEq("(x)(x)(x)\\2*",
240 "(: (^ 'x') (^ 'x') (^ 'x')"
241 " (# 0 - g (<- 2)))");
242 CheckParseEq("(x)(x)(x)\\3*",
243 "(: (^ 'x') (^ 'x') (^ 'x')"
244 " (# 0 - g (<- 3)))");
245 CheckParseEq("(x)(x)(x)\\4*",
246 "(: (^ 'x') (^ 'x') (^ 'x')"
247 " (# 0 - g '\\x04'))");
248 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
249 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
250 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
251 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
252 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
253 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
254 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
255 CheckParseEq("(a\\1)", "(^ 'a')");
256 CheckParseEq("(\\1a)", "(^ 'a')");
257 CheckParseEq("(?=a)?a", "'a'");
258 CheckParseEq("(?=a){0,10}a", "'a'");
259 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
260 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
261 CheckParseEq("(?!a)?a", "'a'");
262 CheckParseEq("\\1(a)", "(^ 'a')");
263 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
264 CheckParseEq("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
265 CheckParseEq("[\\0]", "[\\x00]");
266 CheckParseEq("[\\11]", "[\\x09]");
267 CheckParseEq("[\\11a]", "[\\x09 a]");
268 CheckParseEq("[\\011]", "[\\x09]");
269 CheckParseEq("[\\00011]", "[\\x00 1 1]");
270 CheckParseEq("[\\118]", "[\\x09 8]");
271 CheckParseEq("[\\111]", "[I]");
272 CheckParseEq("[\\1111]", "[I 1]");
273 CheckParseEq("\\x34", "'\x34'");
274 CheckParseEq("\\x60", "'\x60'");
275 CheckParseEq("\\x3z", "'x3z'");
276 CheckParseEq("\\c", "'\\c'");
277 CheckParseEq("\\u0034", "'\x34'");
278 CheckParseEq("\\u003z", "'u003z'");
279 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
Steve Blocka7e24c12009-10-30 11:49:00 +0000280
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000281 CHECK_SIMPLE("", false);
Steve Blocka7e24c12009-10-30 11:49:00 +0000282 CHECK_SIMPLE("a", true);
283 CHECK_SIMPLE("a|b", false);
284 CHECK_SIMPLE("a\\n", false);
285 CHECK_SIMPLE("^a", false);
286 CHECK_SIMPLE("a$", false);
287 CHECK_SIMPLE("a\\b!", false);
288 CHECK_SIMPLE("a\\Bb", false);
289 CHECK_SIMPLE("a*", false);
290 CHECK_SIMPLE("a*?", false);
291 CHECK_SIMPLE("a?", false);
292 CHECK_SIMPLE("a??", false);
293 CHECK_SIMPLE("a{0,1}?", false);
294 CHECK_SIMPLE("a{1,1}?", false);
295 CHECK_SIMPLE("a{1,2}?", false);
296 CHECK_SIMPLE("a+?", false);
297 CHECK_SIMPLE("(a)", false);
298 CHECK_SIMPLE("(a)\\1", false);
299 CHECK_SIMPLE("(\\1a)", false);
300 CHECK_SIMPLE("\\1(a)", false);
301 CHECK_SIMPLE("a\\s", false);
302 CHECK_SIMPLE("a\\S", false);
303 CHECK_SIMPLE("a\\d", false);
304 CHECK_SIMPLE("a\\D", false);
305 CHECK_SIMPLE("a\\w", false);
306 CHECK_SIMPLE("a\\W", false);
307 CHECK_SIMPLE("a.", false);
308 CHECK_SIMPLE("a\\q", false);
309 CHECK_SIMPLE("a[a]", false);
310 CHECK_SIMPLE("a[^a]", false);
311 CHECK_SIMPLE("a[a-z]", false);
312 CHECK_SIMPLE("a[\\q]", false);
313 CHECK_SIMPLE("a(?:b)", false);
314 CHECK_SIMPLE("a(?=b)", false);
315 CHECK_SIMPLE("a(?!b)", false);
316 CHECK_SIMPLE("\\x60", false);
317 CHECK_SIMPLE("\\u0060", false);
318 CHECK_SIMPLE("\\cA", false);
319 CHECK_SIMPLE("\\q", false);
320 CHECK_SIMPLE("\\1112", false);
321 CHECK_SIMPLE("\\0", false);
322 CHECK_SIMPLE("(a)\\1", false);
323 CHECK_SIMPLE("(?=a)?a", false);
324 CHECK_SIMPLE("(?!a)?a\\1", false);
325 CHECK_SIMPLE("(?:(?=a))a\\1", false);
326
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000327 CheckParseEq("a{}", "'a{}'");
328 CheckParseEq("a{,}", "'a{,}'");
329 CheckParseEq("a{", "'a{'");
330 CheckParseEq("a{z}", "'a{z}'");
331 CheckParseEq("a{1z}", "'a{1z}'");
332 CheckParseEq("a{12z}", "'a{12z}'");
333 CheckParseEq("a{12,", "'a{12,'");
334 CheckParseEq("a{12,3b", "'a{12,3b'");
335 CheckParseEq("{}", "'{}'");
336 CheckParseEq("{,}", "'{,}'");
337 CheckParseEq("{", "'{'");
338 CheckParseEq("{z}", "'{z}'");
339 CheckParseEq("{1z}", "'{1z}'");
340 CheckParseEq("{12z}", "'{12z}'");
341 CheckParseEq("{12,", "'{12,'");
342 CheckParseEq("{12,3b", "'{12,3b'");
Steve Blocka7e24c12009-10-30 11:49:00 +0000343
344 CHECK_MIN_MAX("a", 1, 1);
345 CHECK_MIN_MAX("abc", 3, 3);
346 CHECK_MIN_MAX("a[bc]d", 3, 3);
347 CHECK_MIN_MAX("a|bc", 1, 2);
348 CHECK_MIN_MAX("ab|c", 1, 2);
349 CHECK_MIN_MAX("a||bc", 0, 2);
350 CHECK_MIN_MAX("|", 0, 0);
351 CHECK_MIN_MAX("(?:ab)", 2, 2);
352 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
353 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
354 CHECK_MIN_MAX("(ab)", 2, 2);
355 CHECK_MIN_MAX("(ab|cde)", 2, 3);
356 CHECK_MIN_MAX("(ab)\\1", 2, 4);
357 CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
358 CHECK_MIN_MAX("(?:ab)?", 0, 2);
359 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
360 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
361 CHECK_MIN_MAX("a?", 0, 1);
362 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
363 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
364 CHECK_MIN_MAX("a??", 0, 1);
365 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
366 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
367 CHECK_MIN_MAX("(?:a?)?", 0, 1);
368 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
369 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
370 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
371 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
372 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
373 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
374 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
375 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
376 CHECK_MIN_MAX("a{0}", 0, 0);
377 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
378 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
379 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
380 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
381 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
382 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
383 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
384 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
385 CHECK_MIN_MAX("a\\bc", 2, 2);
386 CHECK_MIN_MAX("a\\Bc", 2, 2);
387 CHECK_MIN_MAX("a\\sc", 3, 3);
388 CHECK_MIN_MAX("a\\Sc", 3, 3);
389 CHECK_MIN_MAX("a(?=b)c", 2, 2);
390 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
391 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
392}
393
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000394
Steve Blocka7e24c12009-10-30 11:49:00 +0000395TEST(ParserRegression) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000396 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
397 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
398 CheckParseEq("{", "'{'");
399 CheckParseEq("a|", "(| 'a' %)");
Steve Blocka7e24c12009-10-30 11:49:00 +0000400}
401
402static void ExpectError(const char* input,
403 const char* expected) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000404 v8::HandleScope scope(CcTest::isolate());
405 Zone zone(CcTest::i_isolate());
406 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000407 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000408 CHECK(!v8::internal::RegExpParser::ParseRegExp(
409 &reader, false, &result, &zone));
Steve Blocka7e24c12009-10-30 11:49:00 +0000410 CHECK(result.tree == NULL);
411 CHECK(!result.error.is_null());
Ben Murdoch589d6972011-11-30 16:04:58 +0000412 SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000413 CHECK_EQ(expected, str.get());
Steve Blocka7e24c12009-10-30 11:49:00 +0000414}
415
416
417TEST(Errors) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000418 const char* kEndBackslash = "\\ at end of pattern";
419 ExpectError("\\", kEndBackslash);
420 const char* kUnterminatedGroup = "Unterminated group";
421 ExpectError("(foo", kUnterminatedGroup);
422 const char* kInvalidGroup = "Invalid group";
423 ExpectError("(?", kInvalidGroup);
424 const char* kUnterminatedCharacterClass = "Unterminated character class";
425 ExpectError("[", kUnterminatedCharacterClass);
426 ExpectError("[a-", kUnterminatedCharacterClass);
427 const char* kNothingToRepeat = "Nothing to repeat";
428 ExpectError("*", kNothingToRepeat);
429 ExpectError("?", kNothingToRepeat);
430 ExpectError("+", kNothingToRepeat);
431 ExpectError("{1}", kNothingToRepeat);
432 ExpectError("{1,2}", kNothingToRepeat);
433 ExpectError("{1,}", kNothingToRepeat);
434
435 // Check that we don't allow more than kMaxCapture captures
436 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
437 const char* kTooManyCaptures = "Too many captures";
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400438 std::ostringstream os;
Steve Blocka7e24c12009-10-30 11:49:00 +0000439 for (int i = 0; i <= kMaxCaptures; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000440 os << "()";
Steve Blocka7e24c12009-10-30 11:49:00 +0000441 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400442 ExpectError(os.str().c_str(), kTooManyCaptures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000443}
444
445
446static bool IsDigit(uc16 c) {
447 return ('0' <= c && c <= '9');
448}
449
450
451static bool NotDigit(uc16 c) {
452 return !IsDigit(c);
453}
454
455
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000456static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
457 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
458 // WhiteSpace (7.2) and LineTerminator (7.3) values.
459 return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000460}
461
462
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000463static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
464 return !IsWhiteSpaceOrLineTerminator(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000465}
466
467
468static bool NotWord(uc16 c) {
469 return !IsRegExpWord(c);
470}
471
472
473static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000474 Zone zone(CcTest::i_isolate());
475 ZoneList<CharacterRange>* ranges =
476 new(&zone) ZoneList<CharacterRange>(2, &zone);
477 CharacterRange::AddClassEscape(c, ranges, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000478 for (unsigned i = 0; i < (1 << 16); i++) {
479 bool in_class = false;
480 for (int j = 0; !in_class && j < ranges->length(); j++) {
481 CharacterRange& range = ranges->at(j);
482 in_class = (range.from() <= i && i <= range.to());
483 }
484 CHECK_EQ(pred(i), in_class);
485 }
486}
487
488
489TEST(CharacterClassEscapes) {
490 TestCharacterClassEscapes('.', IsRegExpNewline);
491 TestCharacterClassEscapes('d', IsDigit);
492 TestCharacterClassEscapes('D', NotDigit);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000493 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
494 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
Steve Blocka7e24c12009-10-30 11:49:00 +0000495 TestCharacterClassEscapes('w', IsRegExpWord);
496 TestCharacterClassEscapes('W', NotWord);
497}
498
499
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000500static RegExpNode* Compile(const char* input, bool multiline, bool is_one_byte,
501 Zone* zone) {
502 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +0100503 FlatStringReader reader(isolate, CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000504 RegExpCompileData compile_data;
Teng-Hui Zhu3e5fa292010-11-09 16:16:48 -0800505 if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000506 &compile_data, zone))
Steve Blocka7e24c12009-10-30 11:49:00 +0000507 return NULL;
Ben Murdoch8b112d22011-06-08 16:22:53 +0100508 Handle<String> pattern = isolate->factory()->
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000509 NewStringFromUtf8(CStrVector(input)).ToHandleChecked();
510 Handle<String> sample_subject =
511 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
512 RegExpEngine::Compile(&compile_data, false, false, multiline, false, pattern,
513 sample_subject, is_one_byte, zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000514 return compile_data.node;
515}
516
517
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000518static void Execute(const char* input, bool multiline, bool is_one_byte,
Steve Blocka7e24c12009-10-30 11:49:00 +0000519 bool dot_output = false) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000520 v8::HandleScope scope(CcTest::isolate());
521 Zone zone(CcTest::i_isolate());
522 RegExpNode* node = Compile(input, multiline, is_one_byte, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000523 USE(node);
524#ifdef DEBUG
525 if (dot_output) {
526 RegExpEngine::DotPrint(input, node, false);
Steve Blocka7e24c12009-10-30 11:49:00 +0000527 }
528#endif // DEBUG
529}
530
531
532class TestConfig {
533 public:
534 typedef int Key;
535 typedef int Value;
536 static const int kNoKey;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100537 static int NoValue() { return 0; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000538 static inline int Compare(int a, int b) {
539 if (a < b)
540 return -1;
541 else if (a > b)
542 return 1;
543 else
544 return 0;
545 }
546};
547
548
549const int TestConfig::kNoKey = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +0000550
551
552static unsigned PseudoRandom(int i, int j) {
553 return ~(~((i * 781) ^ (j * 329)));
554}
555
556
557TEST(SplayTreeSimple) {
558 static const unsigned kLimit = 1000;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000559 Zone zone(CcTest::i_isolate());
560 ZoneSplayTree<TestConfig> tree(&zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000561 bool seen[kLimit];
562 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
563#define CHECK_MAPS_EQUAL() do { \
564 for (unsigned k = 0; k < kLimit; k++) \
565 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
566 } while (false)
567 for (int i = 0; i < 50; i++) {
568 for (int j = 0; j < 50; j++) {
569 unsigned next = PseudoRandom(i, j) % kLimit;
570 if (seen[next]) {
571 // We've already seen this one. Check the value and remove
572 // it.
573 ZoneSplayTree<TestConfig>::Locator loc;
574 CHECK(tree.Find(next, &loc));
575 CHECK_EQ(next, loc.key());
576 CHECK_EQ(3 * next, loc.value());
577 tree.Remove(next);
578 seen[next] = false;
579 CHECK_MAPS_EQUAL();
580 } else {
581 // Check that it wasn't there already and then add it.
582 ZoneSplayTree<TestConfig>::Locator loc;
583 CHECK(!tree.Find(next, &loc));
584 CHECK(tree.Insert(next, &loc));
585 CHECK_EQ(next, loc.key());
586 loc.set_value(3 * next);
587 seen[next] = true;
588 CHECK_MAPS_EQUAL();
589 }
590 int val = PseudoRandom(j, i) % kLimit;
591 if (seen[val]) {
592 ZoneSplayTree<TestConfig>::Locator loc;
593 CHECK(tree.FindGreatestLessThan(val, &loc));
594 CHECK_EQ(loc.key(), val);
595 break;
596 }
597 val = PseudoRandom(i + j, i - j) % kLimit;
598 if (seen[val]) {
599 ZoneSplayTree<TestConfig>::Locator loc;
600 CHECK(tree.FindLeastGreaterThan(val, &loc));
601 CHECK_EQ(loc.key(), val);
602 break;
603 }
604 }
605 }
606}
607
608
609TEST(DispatchTableConstruction) {
610 // Initialize test data.
611 static const int kLimit = 1000;
612 static const int kRangeCount = 8;
613 static const int kRangeSize = 16;
614 uc16 ranges[kRangeCount][2 * kRangeSize];
615 for (int i = 0; i < kRangeCount; i++) {
616 Vector<uc16> range(ranges[i], 2 * kRangeSize);
617 for (int j = 0; j < 2 * kRangeSize; j++) {
618 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
619 }
620 range.Sort();
621 for (int j = 1; j < 2 * kRangeSize; j++) {
622 CHECK(range[j-1] <= range[j]);
623 }
624 }
625 // Enter test data into dispatch table.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000626 Zone zone(CcTest::i_isolate());
627 DispatchTable table(&zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000628 for (int i = 0; i < kRangeCount; i++) {
629 uc16* range = ranges[i];
630 for (int j = 0; j < 2 * kRangeSize; j += 2)
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000631 table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000632 }
633 // Check that the table looks as we would expect
634 for (int p = 0; p < kLimit; p++) {
635 OutSet* outs = table.Get(p);
636 for (int j = 0; j < kRangeCount; j++) {
637 uc16* range = ranges[j];
638 bool is_on = false;
639 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
640 is_on = (range[k] <= p && p <= range[k + 1]);
641 CHECK_EQ(is_on, outs->Get(j));
642 }
643 }
644}
645
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000646
Leon Clarkee46be812010-01-19 14:06:41 +0000647// Test of debug-only syntax.
648#ifdef DEBUG
649
650TEST(ParsePossessiveRepetition) {
651 bool old_flag_value = FLAG_regexp_possessive_quantifier;
652
653 // Enable possessive quantifier syntax.
654 FLAG_regexp_possessive_quantifier = true;
655
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000656 CheckParseEq("a*+", "(# 0 - p 'a')");
657 CheckParseEq("a++", "(# 1 - p 'a')");
658 CheckParseEq("a?+", "(# 0 1 p 'a')");
659 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
660 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
Leon Clarkee46be812010-01-19 14:06:41 +0000661
662 // Disable possessive quantifier syntax.
663 FLAG_regexp_possessive_quantifier = false;
664
665 CHECK_PARSE_ERROR("a*+");
666 CHECK_PARSE_ERROR("a++");
667 CHECK_PARSE_ERROR("a?+");
668 CHECK_PARSE_ERROR("a{10,20}+");
669 CHECK_PARSE_ERROR("a{10,20}+b");
670
671 FLAG_regexp_possessive_quantifier = old_flag_value;
672}
673
674#endif
Steve Blocka7e24c12009-10-30 11:49:00 +0000675
676// Tests of interpreter.
677
678
Steve Block6ded16b2010-05-10 14:33:55 +0100679#ifndef V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +0000680
681#if V8_TARGET_ARCH_IA32
682typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
683#elif V8_TARGET_ARCH_X64
684typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
685#elif V8_TARGET_ARCH_ARM
686typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000687#elif V8_TARGET_ARCH_ARM64
688typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
Andrei Popescu31002712010-02-23 13:46:05 +0000689#elif V8_TARGET_ARCH_MIPS
Steve Block44f0eee2011-05-26 01:26:41 +0100690typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000691#elif V8_TARGET_ARCH_MIPS64
692typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
693#elif V8_TARGET_ARCH_X87
694typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
Steve Blocka7e24c12009-10-30 11:49:00 +0000695#endif
696
697class ContextInitializer {
698 public:
699 ContextInitializer()
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000700 : scope_(CcTest::isolate()),
701 env_(v8::Context::New(CcTest::isolate())) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000702 env_->Enter();
703 }
704 ~ContextInitializer() {
705 env_->Exit();
Steve Blocka7e24c12009-10-30 11:49:00 +0000706 }
707 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000708 v8::HandleScope scope_;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000709 v8::Handle<v8::Context> env_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000710};
711
712
713static ArchRegExpMacroAssembler::Result Execute(Code* code,
714 String* input,
715 int start_offset,
716 const byte* input_start,
717 const byte* input_end,
Leon Clarked91b9f72010-01-27 17:25:45 +0000718 int* captures) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000719 return NativeRegExpMacroAssembler::Execute(
720 code,
721 input,
722 start_offset,
723 input_start,
724 input_end,
Steve Block44f0eee2011-05-26 01:26:41 +0100725 captures,
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000726 0,
727 CcTest::i_isolate());
Steve Blocka7e24c12009-10-30 11:49:00 +0000728}
729
730
731TEST(MacroAssemblerNativeSuccess) {
732 v8::V8::Initialize();
733 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000734 Isolate* isolate = CcTest::i_isolate();
735 Factory* factory = isolate->factory();
736 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +0000737
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000738 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000739
740 m.Succeed();
741
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000742 Handle<String> source = factory->NewStringFromStaticChars("");
Steve Blocka7e24c12009-10-30 11:49:00 +0000743 Handle<Object> code_object = m.GetCode(source);
744 Handle<Code> code = Handle<Code>::cast(code_object);
745
746 int captures[4] = {42, 37, 87, 117};
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000747 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
748 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000749 const byte* start_adr =
750 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
751
752 NativeRegExpMacroAssembler::Result result =
753 Execute(*code,
754 *input,
755 0,
756 start_adr,
757 start_adr + seq_input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000758 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000759
760 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
761 CHECK_EQ(-1, captures[0]);
762 CHECK_EQ(-1, captures[1]);
763 CHECK_EQ(-1, captures[2]);
764 CHECK_EQ(-1, captures[3]);
765}
766
767
768TEST(MacroAssemblerNativeSimple) {
769 v8::V8::Initialize();
770 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000771 Isolate* isolate = CcTest::i_isolate();
772 Factory* factory = isolate->factory();
773 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +0000774
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000775 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000776
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000777 Label fail, backtrack;
778 m.PushBacktrack(&fail);
779 m.CheckNotAtStart(NULL);
780 m.LoadCurrentCharacter(2, NULL);
781 m.CheckNotCharacter('o', NULL);
782 m.LoadCurrentCharacter(1, NULL, false);
783 m.CheckNotCharacter('o', NULL);
784 m.LoadCurrentCharacter(0, NULL, false);
785 m.CheckNotCharacter('f', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000786 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000787 m.WriteCurrentPositionToRegister(1, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +0000788 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000789 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +0000790 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000791 m.Bind(&backtrack);
792 m.Backtrack();
Steve Blocka7e24c12009-10-30 11:49:00 +0000793 m.Bind(&fail);
794 m.Fail();
795
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000796 Handle<String> source = factory->NewStringFromStaticChars("^foo");
Steve Blocka7e24c12009-10-30 11:49:00 +0000797 Handle<Object> code_object = m.GetCode(source);
798 Handle<Code> code = Handle<Code>::cast(code_object);
799
800 int captures[4] = {42, 37, 87, 117};
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000801 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
802 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000803 Address start_adr = seq_input->GetCharsAddress();
804
805 NativeRegExpMacroAssembler::Result result =
806 Execute(*code,
807 *input,
808 0,
809 start_adr,
810 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000811 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000812
813 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
814 CHECK_EQ(0, captures[0]);
815 CHECK_EQ(3, captures[1]);
816 CHECK_EQ(-1, captures[2]);
817 CHECK_EQ(-1, captures[3]);
818
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000819 input = factory->NewStringFromStaticChars("barbarbar");
820 seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000821 start_adr = seq_input->GetCharsAddress();
822
823 result = Execute(*code,
824 *input,
825 0,
826 start_adr,
827 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000828 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000829
830 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
831}
832
833
834TEST(MacroAssemblerNativeSimpleUC16) {
835 v8::V8::Initialize();
836 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000837 Isolate* isolate = CcTest::i_isolate();
838 Factory* factory = isolate->factory();
839 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +0000840
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000841 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000842
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000843 Label fail, backtrack;
844 m.PushBacktrack(&fail);
845 m.CheckNotAtStart(NULL);
846 m.LoadCurrentCharacter(2, NULL);
847 m.CheckNotCharacter('o', NULL);
848 m.LoadCurrentCharacter(1, NULL, false);
849 m.CheckNotCharacter('o', NULL);
850 m.LoadCurrentCharacter(0, NULL, false);
851 m.CheckNotCharacter('f', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000852 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000853 m.WriteCurrentPositionToRegister(1, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +0000854 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000855 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +0000856 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000857 m.Bind(&backtrack);
858 m.Backtrack();
Steve Blocka7e24c12009-10-30 11:49:00 +0000859 m.Bind(&fail);
860 m.Fail();
861
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000862 Handle<String> source = factory->NewStringFromStaticChars("^foo");
Steve Blocka7e24c12009-10-30 11:49:00 +0000863 Handle<Object> code_object = m.GetCode(source);
864 Handle<Code> code = Handle<Code>::cast(code_object);
865
866 int captures[4] = {42, 37, 87, 117};
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100867 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000868 static_cast<uc16>(0x2603)};
869 Handle<String> input = factory->NewStringFromTwoByte(
870 Vector<const uc16>(input_data, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +0000871 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
872 Address start_adr = seq_input->GetCharsAddress();
873
874 NativeRegExpMacroAssembler::Result result =
875 Execute(*code,
876 *input,
877 0,
878 start_adr,
879 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000880 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000881
882 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
883 CHECK_EQ(0, captures[0]);
884 CHECK_EQ(3, captures[1]);
885 CHECK_EQ(-1, captures[2]);
886 CHECK_EQ(-1, captures[3]);
887
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100888 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000889 static_cast<uc16>(0x2603)};
890 input = factory->NewStringFromTwoByte(
891 Vector<const uc16>(input_data2, 9)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +0000892 seq_input = Handle<SeqTwoByteString>::cast(input);
893 start_adr = seq_input->GetCharsAddress();
894
895 result = Execute(*code,
896 *input,
897 0,
898 start_adr,
899 start_adr + input->length() * 2,
Leon Clarked91b9f72010-01-27 17:25:45 +0000900 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000901
902 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
903}
904
905
906TEST(MacroAssemblerNativeBacktrack) {
907 v8::V8::Initialize();
908 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000909 Isolate* isolate = CcTest::i_isolate();
910 Factory* factory = isolate->factory();
911 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +0000912
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000913 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000914
915 Label fail;
916 Label backtrack;
917 m.LoadCurrentCharacter(10, &fail);
918 m.Succeed();
919 m.Bind(&fail);
920 m.PushBacktrack(&backtrack);
921 m.LoadCurrentCharacter(10, NULL);
922 m.Succeed();
923 m.Bind(&backtrack);
924 m.Fail();
925
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000926 Handle<String> source = factory->NewStringFromStaticChars("..........");
Steve Blocka7e24c12009-10-30 11:49:00 +0000927 Handle<Object> code_object = m.GetCode(source);
928 Handle<Code> code = Handle<Code>::cast(code_object);
929
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000930 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
931 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000932 Address start_adr = seq_input->GetCharsAddress();
933
934 NativeRegExpMacroAssembler::Result result =
935 Execute(*code,
936 *input,
937 0,
938 start_adr,
939 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000940 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000941
942 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
943}
944
945
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000946TEST(MacroAssemblerNativeBackReferenceLATIN1) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000947 v8::V8::Initialize();
948 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000949 Isolate* isolate = CcTest::i_isolate();
950 Factory* factory = isolate->factory();
951 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +0000952
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000953 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000954
955 m.WriteCurrentPositionToRegister(0, 0);
956 m.AdvanceCurrentPosition(2);
957 m.WriteCurrentPositionToRegister(1, 0);
958 Label nomatch;
959 m.CheckNotBackReference(0, &nomatch);
960 m.Fail();
961 m.Bind(&nomatch);
962 m.AdvanceCurrentPosition(2);
963 Label missing_match;
964 m.CheckNotBackReference(0, &missing_match);
965 m.WriteCurrentPositionToRegister(2, 0);
966 m.Succeed();
967 m.Bind(&missing_match);
968 m.Fail();
969
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000970 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
Steve Blocka7e24c12009-10-30 11:49:00 +0000971 Handle<Object> code_object = m.GetCode(source);
972 Handle<Code> code = Handle<Code>::cast(code_object);
973
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000974 Handle<String> input = factory->NewStringFromStaticChars("fooofo");
975 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000976 Address start_adr = seq_input->GetCharsAddress();
977
978 int output[4];
979 NativeRegExpMacroAssembler::Result result =
980 Execute(*code,
981 *input,
982 0,
983 start_adr,
984 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000985 output);
Steve Blocka7e24c12009-10-30 11:49:00 +0000986
987 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
988 CHECK_EQ(0, output[0]);
989 CHECK_EQ(2, output[1]);
990 CHECK_EQ(6, output[2]);
991 CHECK_EQ(-1, output[3]);
992}
993
994
995TEST(MacroAssemblerNativeBackReferenceUC16) {
996 v8::V8::Initialize();
997 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000998 Isolate* isolate = CcTest::i_isolate();
999 Factory* factory = isolate->factory();
1000 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001001
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001002 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001003
1004 m.WriteCurrentPositionToRegister(0, 0);
1005 m.AdvanceCurrentPosition(2);
1006 m.WriteCurrentPositionToRegister(1, 0);
1007 Label nomatch;
1008 m.CheckNotBackReference(0, &nomatch);
1009 m.Fail();
1010 m.Bind(&nomatch);
1011 m.AdvanceCurrentPosition(2);
1012 Label missing_match;
1013 m.CheckNotBackReference(0, &missing_match);
1014 m.WriteCurrentPositionToRegister(2, 0);
1015 m.Succeed();
1016 m.Bind(&missing_match);
1017 m.Fail();
1018
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001019 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
Steve Blocka7e24c12009-10-30 11:49:00 +00001020 Handle<Object> code_object = m.GetCode(source);
1021 Handle<Code> code = Handle<Code>::cast(code_object);
1022
1023 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001024 Handle<String> input = factory->NewStringFromTwoByte(
1025 Vector<const uc16>(input_data, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001026 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1027 Address start_adr = seq_input->GetCharsAddress();
1028
1029 int output[4];
1030 NativeRegExpMacroAssembler::Result result =
1031 Execute(*code,
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001032 *input,
1033 0,
1034 start_adr,
1035 start_adr + input->length() * 2,
1036 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001037
1038 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1039 CHECK_EQ(0, output[0]);
1040 CHECK_EQ(2, output[1]);
1041 CHECK_EQ(6, output[2]);
1042 CHECK_EQ(-1, output[3]);
1043}
1044
1045
1046
1047TEST(MacroAssemblernativeAtStart) {
1048 v8::V8::Initialize();
1049 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001050 Isolate* isolate = CcTest::i_isolate();
1051 Factory* factory = isolate->factory();
1052 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001053
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001054 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001055
1056 Label not_at_start, newline, fail;
1057 m.CheckNotAtStart(&not_at_start);
1058 // Check that prevchar = '\n' and current = 'f'.
1059 m.CheckCharacter('\n', &newline);
1060 m.Bind(&fail);
1061 m.Fail();
1062 m.Bind(&newline);
1063 m.LoadCurrentCharacter(0, &fail);
1064 m.CheckNotCharacter('f', &fail);
1065 m.Succeed();
1066
1067 m.Bind(&not_at_start);
1068 // Check that prevchar = 'o' and current = 'b'.
1069 Label prevo;
1070 m.CheckCharacter('o', &prevo);
1071 m.Fail();
1072 m.Bind(&prevo);
1073 m.LoadCurrentCharacter(0, &fail);
1074 m.CheckNotCharacter('b', &fail);
1075 m.Succeed();
1076
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001077 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
Steve Blocka7e24c12009-10-30 11:49:00 +00001078 Handle<Object> code_object = m.GetCode(source);
1079 Handle<Code> code = Handle<Code>::cast(code_object);
1080
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001081 Handle<String> input = factory->NewStringFromStaticChars("foobar");
1082 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001083 Address start_adr = seq_input->GetCharsAddress();
1084
1085 NativeRegExpMacroAssembler::Result result =
1086 Execute(*code,
1087 *input,
1088 0,
1089 start_adr,
1090 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001091 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001092
1093 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1094
1095 result = Execute(*code,
1096 *input,
1097 3,
1098 start_adr + 3,
1099 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001100 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001101
1102 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1103}
1104
1105
1106TEST(MacroAssemblerNativeBackRefNoCase) {
1107 v8::V8::Initialize();
1108 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001109 Isolate* isolate = CcTest::i_isolate();
1110 Factory* factory = isolate->factory();
1111 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001112
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001113 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 4, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001114
1115 Label fail, succ;
1116
1117 m.WriteCurrentPositionToRegister(0, 0);
1118 m.WriteCurrentPositionToRegister(2, 0);
1119 m.AdvanceCurrentPosition(3);
1120 m.WriteCurrentPositionToRegister(3, 0);
1121 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
1122 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
1123 Label expected_fail;
1124 m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1125 m.Bind(&fail);
1126 m.Fail();
1127
1128 m.Bind(&expected_fail);
1129 m.AdvanceCurrentPosition(3); // Skip "xYz"
1130 m.CheckNotBackReferenceIgnoreCase(2, &succ);
1131 m.Fail();
1132
1133 m.Bind(&succ);
1134 m.WriteCurrentPositionToRegister(1, 0);
1135 m.Succeed();
1136
1137 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001138 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
Steve Blocka7e24c12009-10-30 11:49:00 +00001139 Handle<Object> code_object = m.GetCode(source);
1140 Handle<Code> code = Handle<Code>::cast(code_object);
1141
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001142 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1143 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001144 Address start_adr = seq_input->GetCharsAddress();
1145
1146 int output[4];
1147 NativeRegExpMacroAssembler::Result result =
1148 Execute(*code,
1149 *input,
1150 0,
1151 start_adr,
1152 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001153 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001154
1155 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1156 CHECK_EQ(0, output[0]);
1157 CHECK_EQ(12, output[1]);
1158 CHECK_EQ(0, output[2]);
1159 CHECK_EQ(3, output[3]);
1160}
1161
1162
1163
1164TEST(MacroAssemblerNativeRegisters) {
1165 v8::V8::Initialize();
1166 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001167 Isolate* isolate = CcTest::i_isolate();
1168 Factory* factory = isolate->factory();
1169 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001170
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001171 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 6, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001172
1173 uc16 foo_chars[3] = {'f', 'o', 'o'};
1174 Vector<const uc16> foo(foo_chars, 3);
1175
1176 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1177 Label fail;
1178 Label backtrack;
1179 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1180 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1181 m.PushBacktrack(&backtrack);
1182 m.WriteStackPointerToRegister(sp);
1183 // Fill stack and registers
1184 m.AdvanceCurrentPosition(2);
1185 m.WriteCurrentPositionToRegister(out1, 0);
1186 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1187 m.PushBacktrack(&fail);
1188 // Drop backtrack stack frames.
1189 m.ReadStackPointerFromRegister(sp);
1190 // And take the first backtrack (to &backtrack)
1191 m.Backtrack();
1192
1193 m.PushCurrentPosition();
1194 m.AdvanceCurrentPosition(2);
1195 m.PopCurrentPosition();
1196
1197 m.Bind(&backtrack);
1198 m.PopRegister(out1);
1199 m.ReadCurrentPositionFromRegister(out1);
1200 m.AdvanceCurrentPosition(3);
1201 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1202
1203 Label loop;
1204 m.SetRegister(loop_cnt, 0); // loop counter
1205 m.Bind(&loop);
1206 m.AdvanceRegister(loop_cnt, 1);
1207 m.AdvanceCurrentPosition(1);
1208 m.IfRegisterLT(loop_cnt, 3, &loop);
1209 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1210
1211 Label loop2;
1212 m.SetRegister(loop_cnt, 2); // loop counter
1213 m.Bind(&loop2);
1214 m.AdvanceRegister(loop_cnt, -1);
1215 m.AdvanceCurrentPosition(1);
1216 m.IfRegisterGE(loop_cnt, 0, &loop2);
1217 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1218
1219 Label loop3;
1220 Label exit_loop3;
1221 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1222 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1223 m.ReadCurrentPositionFromRegister(out3);
1224 m.Bind(&loop3);
1225 m.AdvanceCurrentPosition(1);
1226 m.CheckGreedyLoop(&exit_loop3);
1227 m.GoTo(&loop3);
1228 m.Bind(&exit_loop3);
1229 m.PopCurrentPosition();
1230 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1231
1232 m.Succeed();
1233
1234 m.Bind(&fail);
1235 m.Fail();
1236
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001237 Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001238 Handle<Object> code_object = m.GetCode(source);
1239 Handle<Code> code = Handle<Code>::cast(code_object);
1240
1241 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001242 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1243 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001244 Address start_adr = seq_input->GetCharsAddress();
1245
1246 int output[6];
1247 NativeRegExpMacroAssembler::Result result =
1248 Execute(*code,
1249 *input,
1250 0,
1251 start_adr,
1252 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001253 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001254
1255 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1256 CHECK_EQ(0, output[0]);
1257 CHECK_EQ(3, output[1]);
1258 CHECK_EQ(6, output[2]);
1259 CHECK_EQ(9, output[3]);
1260 CHECK_EQ(9, output[4]);
1261 CHECK_EQ(-1, output[5]);
1262}
1263
1264
1265TEST(MacroAssemblerStackOverflow) {
1266 v8::V8::Initialize();
1267 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001268 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001269 Factory* factory = isolate->factory();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001270 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001271
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001272 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 0, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001273
1274 Label loop;
1275 m.Bind(&loop);
1276 m.PushBacktrack(&loop);
1277 m.GoTo(&loop);
1278
1279 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001280 factory->NewStringFromStaticChars("<stack overflow test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001281 Handle<Object> code_object = m.GetCode(source);
1282 Handle<Code> code = Handle<Code>::cast(code_object);
1283
1284 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001285 Handle<String> input = factory->NewStringFromStaticChars("dummy");
1286 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001287 Address start_adr = seq_input->GetCharsAddress();
1288
1289 NativeRegExpMacroAssembler::Result result =
1290 Execute(*code,
1291 *input,
1292 0,
1293 start_adr,
1294 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001295 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001296
1297 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
Ben Murdoch8b112d22011-06-08 16:22:53 +01001298 CHECK(isolate->has_pending_exception());
1299 isolate->clear_pending_exception();
Steve Blocka7e24c12009-10-30 11:49:00 +00001300}
1301
1302
1303TEST(MacroAssemblerNativeLotsOfRegisters) {
1304 v8::V8::Initialize();
1305 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001306 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001307 Factory* factory = isolate->factory();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001308 Zone zone(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001309
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001310 ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::LATIN1, 2, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001311
1312 // At least 2048, to ensure the allocated space for registers
1313 // span one full page.
1314 const int large_number = 8000;
1315 m.WriteCurrentPositionToRegister(large_number, 42);
1316 m.WriteCurrentPositionToRegister(0, 0);
1317 m.WriteCurrentPositionToRegister(1, 1);
1318 Label done;
1319 m.CheckNotBackReference(0, &done); // Performs a system-stack push.
1320 m.Bind(&done);
1321 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1322 m.PopRegister(1);
1323 m.Succeed();
1324
1325 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001326 factory->NewStringFromStaticChars("<huge register space test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001327 Handle<Object> code_object = m.GetCode(source);
1328 Handle<Code> code = Handle<Code>::cast(code_object);
1329
1330 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001331 Handle<String> input = factory->NewStringFromStaticChars("sample text");
1332 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001333 Address start_adr = seq_input->GetCharsAddress();
1334
1335 int captures[2];
1336 NativeRegExpMacroAssembler::Result result =
1337 Execute(*code,
1338 *input,
1339 0,
1340 start_adr,
1341 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001342 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +00001343
1344 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1345 CHECK_EQ(0, captures[0]);
1346 CHECK_EQ(42, captures[1]);
1347
Ben Murdoch8b112d22011-06-08 16:22:53 +01001348 isolate->clear_pending_exception();
Steve Blocka7e24c12009-10-30 11:49:00 +00001349}
1350
Steve Block6ded16b2010-05-10 14:33:55 +01001351#else // V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +00001352
1353TEST(MacroAssembler) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001354 byte codes[1024];
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001355 Zone zone(CcTest::i_isolate());
1356 RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001357 // ^f(o)o.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001358 Label start, fail, backtrack;
1359
Steve Blocka7e24c12009-10-30 11:49:00 +00001360 m.SetRegister(4, 42);
1361 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1362 m.AdvanceRegister(4, 42);
1363 m.GoTo(&start);
1364 m.Fail();
1365 m.Bind(&start);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001366 m.PushBacktrack(&fail);
1367 m.CheckNotAtStart(NULL);
1368 m.LoadCurrentCharacter(0, NULL);
1369 m.CheckNotCharacter('f', NULL);
1370 m.LoadCurrentCharacter(1, NULL);
1371 m.CheckNotCharacter('o', NULL);
1372 m.LoadCurrentCharacter(2, NULL);
1373 m.CheckNotCharacter('o', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001374 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001375 m.WriteCurrentPositionToRegister(1, 3);
1376 m.WriteCurrentPositionToRegister(2, 1);
1377 m.WriteCurrentPositionToRegister(3, 2);
Steve Blocka7e24c12009-10-30 11:49:00 +00001378 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001379 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +00001380 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001381 m.Bind(&backtrack);
1382 m.ClearRegisters(2, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +00001383 m.Backtrack();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001384 m.Bind(&fail);
Steve Blocka7e24c12009-10-30 11:49:00 +00001385 m.PopRegister(0);
1386 m.Fail();
1387
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001388 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001389 Factory* factory = isolate->factory();
1390 HandleScope scope(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001391
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001392 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
Steve Blocka7e24c12009-10-30 11:49:00 +00001393 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1394 int captures[5];
1395
1396 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001397 Handle<String> f1_16 = factory->NewStringFromTwoByte(
1398 Vector<const uc16>(str1, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001399
Ben Murdoch8b112d22011-06-08 16:22:53 +01001400 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
Steve Blocka7e24c12009-10-30 11:49:00 +00001401 CHECK_EQ(0, captures[0]);
1402 CHECK_EQ(3, captures[1]);
1403 CHECK_EQ(1, captures[2]);
1404 CHECK_EQ(2, captures[3]);
1405 CHECK_EQ(84, captures[4]);
1406
1407 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001408 Handle<String> f2_16 = factory->NewStringFromTwoByte(
1409 Vector<const uc16>(str2, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001410
Ben Murdoch8b112d22011-06-08 16:22:53 +01001411 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
Steve Blocka7e24c12009-10-30 11:49:00 +00001412 CHECK_EQ(42, captures[0]);
1413}
1414
Steve Block6ded16b2010-05-10 14:33:55 +01001415#endif // V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +00001416
1417
1418TEST(AddInverseToTable) {
1419 static const int kLimit = 1000;
1420 static const int kRangeCount = 16;
1421 for (int t = 0; t < 10; t++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001422 Zone zone(CcTest::i_isolate());
Steve Blocka7e24c12009-10-30 11:49:00 +00001423 ZoneList<CharacterRange>* ranges =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001424 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001425 for (int i = 0; i < kRangeCount; i++) {
1426 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1427 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1428 if (to > kLimit) to = kLimit;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001429 ranges->Add(CharacterRange(from, to), &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001430 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001431 DispatchTable table(&zone);
1432 DispatchTableConstructor cons(&table, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001433 cons.set_choice_index(0);
1434 cons.AddInverse(ranges);
1435 for (int i = 0; i < kLimit; i++) {
1436 bool is_on = false;
1437 for (int j = 0; !is_on && j < kRangeCount; j++)
1438 is_on = ranges->at(j).Contains(i);
1439 OutSet* set = table.Get(i);
1440 CHECK_EQ(is_on, set->Get(0) == false);
1441 }
1442 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001443 Zone zone(CcTest::i_isolate());
Steve Blocka7e24c12009-10-30 11:49:00 +00001444 ZoneList<CharacterRange>* ranges =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001445 new(&zone) ZoneList<CharacterRange>(1, &zone);
1446 ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
1447 DispatchTable table(&zone);
1448 DispatchTableConstructor cons(&table, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001449 cons.set_choice_index(0);
1450 cons.AddInverse(ranges);
1451 CHECK(!table.Get(0xFFFE)->Get(0));
1452 CHECK(table.Get(0xFFFF)->Get(0));
1453}
1454
1455
1456static uc32 canonicalize(uc32 c) {
1457 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1458 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1459 if (count == 0) {
1460 return c;
1461 } else {
1462 CHECK_EQ(1, count);
1463 return canon[0];
1464 }
1465}
1466
1467
1468TEST(LatinCanonicalize) {
1469 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1470 for (char lower = 'a'; lower <= 'z'; lower++) {
1471 char upper = lower + ('A' - 'a');
1472 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1473 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1474 int length = un_canonicalize.get(lower, '\0', uncanon);
1475 CHECK_EQ(2, length);
1476 CHECK_EQ(upper, uncanon[0]);
1477 CHECK_EQ(lower, uncanon[1]);
1478 }
1479 for (uc32 c = 128; c < (1 << 21); c++)
1480 CHECK_GE(canonicalize(c), 128);
1481 unibrow::Mapping<unibrow::ToUppercase> to_upper;
Ben Murdochbb769b22010-08-11 14:56:33 +01001482 // Canonicalization is only defined for the Basic Multilingual Plane.
1483 for (uc32 c = 0; c < (1 << 16); c++) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001484 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1485 int length = to_upper.get(c, '\0', upper);
1486 if (length == 0) {
1487 length = 1;
1488 upper[0] = c;
1489 }
1490 uc32 u = upper[0];
1491 if (length > 1 || (c >= 128 && u < 128))
1492 u = c;
1493 CHECK_EQ(u, canonicalize(c));
1494 }
1495}
1496
1497
Ben Murdochbb769b22010-08-11 14:56:33 +01001498static uc32 CanonRangeEnd(uc32 c) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001499 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1500 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1501 if (count == 0) {
1502 return c;
1503 } else {
1504 CHECK_EQ(1, count);
1505 return canon[0];
1506 }
1507}
1508
1509
1510TEST(RangeCanonicalization) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001511 // Check that we arrive at the same result when using the basic
1512 // range canonicalization primitives as when using immediate
1513 // canonicalization.
1514 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
Ben Murdochbb769b22010-08-11 14:56:33 +01001515 int block_start = 0;
1516 while (block_start <= 0xFFFF) {
1517 uc32 block_end = CanonRangeEnd(block_start);
1518 unsigned block_length = block_end - block_start + 1;
1519 if (block_length > 1) {
1520 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1521 int first_length = un_canonicalize.get(block_start, '\0', first);
1522 for (unsigned i = 1; i < block_length; i++) {
1523 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1524 int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1525 CHECK_EQ(first_length, succ_length);
1526 for (int j = 0; j < succ_length; j++) {
1527 int calc = first[j] + i;
1528 int found = succ[j];
1529 CHECK_EQ(calc, found);
1530 }
Steve Blocka7e24c12009-10-30 11:49:00 +00001531 }
1532 }
Ben Murdochbb769b22010-08-11 14:56:33 +01001533 block_start = block_start + block_length;
Steve Blocka7e24c12009-10-30 11:49:00 +00001534 }
1535}
1536
1537
1538TEST(UncanonicalizeEquivalence) {
1539 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1540 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1541 for (int i = 0; i < (1 << 16); i++) {
1542 int length = un_canonicalize.get(i, '\0', chars);
1543 for (int j = 0; j < length; j++) {
1544 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1545 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1546 CHECK_EQ(length, length2);
1547 for (int k = 0; k < length; k++)
1548 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1549 }
1550 }
1551}
1552
1553
1554static void TestRangeCaseIndependence(CharacterRange input,
1555 Vector<CharacterRange> expected) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001556 Zone zone(CcTest::i_isolate());
Steve Blocka7e24c12009-10-30 11:49:00 +00001557 int count = expected.length();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001558 ZoneList<CharacterRange>* list =
1559 new(&zone) ZoneList<CharacterRange>(count, &zone);
1560 input.AddCaseEquivalents(list, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001561 CHECK_EQ(count, list->length());
1562 for (int i = 0; i < list->length(); i++) {
1563 CHECK_EQ(expected[i].from(), list->at(i).from());
1564 CHECK_EQ(expected[i].to(), list->at(i).to());
1565 }
1566}
1567
1568
1569static void TestSimpleRangeCaseIndependence(CharacterRange input,
1570 CharacterRange expected) {
1571 EmbeddedVector<CharacterRange, 1> vector;
1572 vector[0] = expected;
1573 TestRangeCaseIndependence(input, vector);
1574}
1575
1576
1577TEST(CharacterRangeCaseIndependence) {
1578 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1579 CharacterRange::Singleton('A'));
1580 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1581 CharacterRange::Singleton('Z'));
1582 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1583 CharacterRange('A', 'Z'));
1584 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1585 CharacterRange('C', 'F'));
1586 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1587 CharacterRange('A', 'B'));
1588 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1589 CharacterRange('Y', 'Z'));
1590 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1591 CharacterRange('A', 'Z'));
1592 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1593 CharacterRange('a', 'z'));
1594 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1595 CharacterRange('c', 'f'));
1596 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1597 CharacterRange('a', 'z'));
1598 // Here we need to add [l-z] to complete the case independence of
1599 // [A-Za-z] but we expect [a-z] to be added since we always add a
1600 // whole block at a time.
1601 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1602 CharacterRange('a', 'z'));
1603}
1604
1605
1606static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1607 if (ranges == NULL)
1608 return false;
1609 for (int i = 0; i < ranges->length(); i++) {
1610 CharacterRange range = ranges->at(i);
1611 if (range.from() <= c && c <= range.to())
1612 return true;
1613 }
1614 return false;
1615}
1616
1617
1618TEST(CharClassDifference) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001619 Zone zone(CcTest::i_isolate());
1620 ZoneList<CharacterRange>* base =
1621 new(&zone) ZoneList<CharacterRange>(1, &zone);
1622 base->Add(CharacterRange::Everything(), &zone);
1623 Vector<const int> overlay = CharacterRange::GetWordBounds();
Steve Blocka7e24c12009-10-30 11:49:00 +00001624 ZoneList<CharacterRange>* included = NULL;
1625 ZoneList<CharacterRange>* excluded = NULL;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001626 CharacterRange::Split(base, overlay, &included, &excluded, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001627 for (int i = 0; i < (1 << 16); i++) {
1628 bool in_base = InClass(i, base);
1629 if (in_base) {
1630 bool in_overlay = false;
1631 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001632 if (overlay[j] <= i && i < overlay[j+1])
Steve Blocka7e24c12009-10-30 11:49:00 +00001633 in_overlay = true;
1634 }
1635 CHECK_EQ(in_overlay, InClass(i, included));
1636 CHECK_EQ(!in_overlay, InClass(i, excluded));
1637 } else {
1638 CHECK(!InClass(i, included));
1639 CHECK(!InClass(i, excluded));
1640 }
1641 }
1642}
1643
1644
Leon Clarkee46be812010-01-19 14:06:41 +00001645TEST(CanonicalizeCharacterSets) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001646 Zone zone(CcTest::i_isolate());
1647 ZoneList<CharacterRange>* list =
1648 new(&zone) ZoneList<CharacterRange>(4, &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001649 CharacterSet set(list);
1650
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001651 list->Add(CharacterRange(10, 20), &zone);
1652 list->Add(CharacterRange(30, 40), &zone);
1653 list->Add(CharacterRange(50, 60), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001654 set.Canonicalize();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001655 DCHECK_EQ(3, list->length());
1656 DCHECK_EQ(10, list->at(0).from());
1657 DCHECK_EQ(20, list->at(0).to());
1658 DCHECK_EQ(30, list->at(1).from());
1659 DCHECK_EQ(40, list->at(1).to());
1660 DCHECK_EQ(50, list->at(2).from());
1661 DCHECK_EQ(60, list->at(2).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001662
1663 list->Rewind(0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001664 list->Add(CharacterRange(10, 20), &zone);
1665 list->Add(CharacterRange(50, 60), &zone);
1666 list->Add(CharacterRange(30, 40), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001667 set.Canonicalize();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001668 DCHECK_EQ(3, list->length());
1669 DCHECK_EQ(10, list->at(0).from());
1670 DCHECK_EQ(20, list->at(0).to());
1671 DCHECK_EQ(30, list->at(1).from());
1672 DCHECK_EQ(40, list->at(1).to());
1673 DCHECK_EQ(50, list->at(2).from());
1674 DCHECK_EQ(60, list->at(2).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001675
1676 list->Rewind(0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001677 list->Add(CharacterRange(30, 40), &zone);
1678 list->Add(CharacterRange(10, 20), &zone);
1679 list->Add(CharacterRange(25, 25), &zone);
1680 list->Add(CharacterRange(100, 100), &zone);
1681 list->Add(CharacterRange(1, 1), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001682 set.Canonicalize();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001683 DCHECK_EQ(5, list->length());
1684 DCHECK_EQ(1, list->at(0).from());
1685 DCHECK_EQ(1, list->at(0).to());
1686 DCHECK_EQ(10, list->at(1).from());
1687 DCHECK_EQ(20, list->at(1).to());
1688 DCHECK_EQ(25, list->at(2).from());
1689 DCHECK_EQ(25, list->at(2).to());
1690 DCHECK_EQ(30, list->at(3).from());
1691 DCHECK_EQ(40, list->at(3).to());
1692 DCHECK_EQ(100, list->at(4).from());
1693 DCHECK_EQ(100, list->at(4).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001694
1695 list->Rewind(0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001696 list->Add(CharacterRange(10, 19), &zone);
1697 list->Add(CharacterRange(21, 30), &zone);
1698 list->Add(CharacterRange(20, 20), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001699 set.Canonicalize();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001700 DCHECK_EQ(1, list->length());
1701 DCHECK_EQ(10, list->at(0).from());
1702 DCHECK_EQ(30, list->at(0).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001703}
1704
Leon Clarked91b9f72010-01-27 17:25:45 +00001705
1706TEST(CharacterRangeMerge) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001707 Zone zone(CcTest::i_isolate());
1708 ZoneList<CharacterRange> l1(4, &zone);
1709 ZoneList<CharacterRange> l2(4, &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001710 // Create all combinations of intersections of ranges, both singletons and
1711 // longer.
1712
1713 int offset = 0;
1714
1715 // The five kinds of singleton intersections:
1716 // X
1717 // Y - outside before
1718 // Y - outside touching start
1719 // Y - overlap
1720 // Y - outside touching end
1721 // Y - outside after
1722
1723 for (int i = 0; i < 5; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001724 l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1725 l2.Add(CharacterRange::Singleton(offset + i), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001726 offset += 6;
1727 }
1728
1729 // The seven kinds of singleton/non-singleton intersections:
1730 // XXX
1731 // Y - outside before
1732 // Y - outside touching start
1733 // Y - inside touching start
1734 // Y - entirely inside
1735 // Y - inside touching end
1736 // Y - outside touching end
1737 // Y - disjoint after
1738
1739 for (int i = 0; i < 7; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001740 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1741 l2.Add(CharacterRange::Singleton(offset + i), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001742 offset += 8;
1743 }
1744
1745 // The eleven kinds of non-singleton intersections:
1746 //
1747 // XXXXXXXX
1748 // YYYY - outside before.
1749 // YYYY - outside touching start.
1750 // YYYY - overlapping start
1751 // YYYY - inside touching start
1752 // YYYY - entirely inside
1753 // YYYY - inside touching end
1754 // YYYY - overlapping end
1755 // YYYY - outside touching end
1756 // YYYY - outside after
1757 // YYYYYYYY - identical
1758 // YYYYYYYYYYYY - containing entirely.
1759
1760 for (int i = 0; i < 9; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001761 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1762 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001763 offset += 22;
1764 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001765 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1766 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001767 offset += 22;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001768 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1769 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001770 offset += 22;
1771
1772 // Different kinds of multi-range overlap:
1773 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1774 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1775
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001776 l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1777 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001778 for (int i = 0; i < 6; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001779 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1780 l2.Add(CharacterRange::Singleton(offset + 8), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001781 offset += 9;
1782 }
1783
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001784 DCHECK(CharacterRange::IsCanonical(&l1));
1785 DCHECK(CharacterRange::IsCanonical(&l2));
Leon Clarked91b9f72010-01-27 17:25:45 +00001786
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001787 ZoneList<CharacterRange> first_only(4, &zone);
1788 ZoneList<CharacterRange> second_only(4, &zone);
1789 ZoneList<CharacterRange> both(4, &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001790}
Leon Clarkee46be812010-01-19 14:06:41 +00001791
1792
Steve Blocka7e24c12009-10-30 11:49:00 +00001793TEST(Graph) {
Leon Clarkee46be812010-01-19 14:06:41 +00001794 Execute("\\b\\w+\\b", false, true, true);
Steve Blocka7e24c12009-10-30 11:49:00 +00001795}