blob: 071ab1840d0a072f525799aad280ac419bb1331e [file] [log] [blame]
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001// Copyright 2012 the V8 project authors. All rights reserved.
Steve Blocka7e24c12009-10-30 11:49:00 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
Emily Bernierd0a1eb72015-03-24 16:35:39 -040028#include <cstdlib>
29#include <sstream>
Steve Blocka7e24c12009-10-30 11:49:00 +000030
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000031#include "include/v8.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000032#include "src/v8.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000033
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000034#include "src/ast/ast.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000035#include "src/char-predicates-inl.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000036#include "src/ostreams.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000037#include "src/regexp/jsregexp.h"
38#include "src/regexp/regexp-macro-assembler.h"
39#include "src/regexp/regexp-macro-assembler-irregexp.h"
40#include "src/regexp/regexp-parser.h"
41#include "src/splay-tree-inl.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000042#include "src/string-stream.h"
Steve Block6ded16b2010-05-10 14:33:55 +010043#ifdef V8_INTERPRETED_REGEXP
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000044#include "src/regexp/interpreter-irregexp.h"
Steve Block6ded16b2010-05-10 14:33:55 +010045#else // V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000046#include "src/macro-assembler.h"
47#if V8_TARGET_ARCH_ARM
48#include "src/arm/assembler-arm.h" // NOLINT
49#include "src/arm/macro-assembler-arm.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000050#include "src/regexp/arm/regexp-macro-assembler-arm.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000051#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000052#if V8_TARGET_ARCH_ARM64
53#include "src/arm64/assembler-arm64.h"
54#include "src/arm64/macro-assembler-arm64.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000055#include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
56#endif
Ben Murdochda12d292016-06-02 14:46:10 +010057#if V8_TARGET_ARCH_S390
58#include "src/regexp/s390/regexp-macro-assembler-s390.h"
59#include "src/s390/assembler-s390.h"
60#include "src/s390/macro-assembler-s390.h"
61#endif
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000062#if V8_TARGET_ARCH_PPC
63#include "src/ppc/assembler-ppc.h"
64#include "src/ppc/macro-assembler-ppc.h"
65#include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
Steve Block44f0eee2011-05-26 01:26:41 +010066#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000067#if V8_TARGET_ARCH_MIPS
68#include "src/mips/assembler-mips.h"
69#include "src/mips/macro-assembler-mips.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000070#include "src/regexp/mips/regexp-macro-assembler-mips.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000071#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000072#if V8_TARGET_ARCH_MIPS64
73#include "src/mips64/assembler-mips64.h"
74#include "src/mips64/macro-assembler-mips64.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000075#include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000076#endif
77#if V8_TARGET_ARCH_X64
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000078#include "src/regexp/x64/regexp-macro-assembler-x64.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000079#include "src/x64/assembler-x64.h"
80#include "src/x64/macro-assembler-x64.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000081#endif
82#if V8_TARGET_ARCH_IA32
83#include "src/ia32/assembler-ia32.h"
84#include "src/ia32/macro-assembler-ia32.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000085#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000086#endif
87#if V8_TARGET_ARCH_X87
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000088#include "src/regexp/x87/regexp-macro-assembler-x87.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000089#include "src/x87/assembler-x87.h"
90#include "src/x87/macro-assembler-x87.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000091#endif
Steve Block6ded16b2010-05-10 14:33:55 +010092#endif // V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000093#include "test/cctest/cctest.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000094
95using namespace v8::internal;
96
97
Leon Clarkee46be812010-01-19 14:06:41 +000098static bool CheckParse(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +000099 v8::HandleScope scope(CcTest::isolate());
Ben Murdochda12d292016-06-02 14:46:10 +0100100 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000101 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Leon Clarkee46be812010-01-19 14:06:41 +0000102 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000103 return v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch097c5b22016-05-18 11:27:45 +0100104 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
Leon Clarkee46be812010-01-19 14:06:41 +0000105}
106
107
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000108static void CheckParseEq(const char* input, const char* expected,
109 bool unicode = false) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000110 v8::HandleScope scope(CcTest::isolate());
Ben Murdochda12d292016-06-02 14:46:10 +0100111 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000112 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000113 RegExpCompileData result;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100114 JSRegExp::Flags flags = JSRegExp::kNone;
115 if (unicode) flags |= JSRegExp::kUnicode;
116 CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
117 &reader, flags, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000118 CHECK(result.tree != NULL);
119 CHECK(result.error.is_null());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400120 std::ostringstream os;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000121 result.tree->Print(os, &zone);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000122 if (strcmp(expected, os.str().c_str()) != 0) {
123 printf("%s | %s\n", expected, os.str().c_str());
124 }
125 CHECK_EQ(0, strcmp(expected, os.str().c_str()));
Steve Blocka7e24c12009-10-30 11:49:00 +0000126}
127
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000128
Steve Blocka7e24c12009-10-30 11:49:00 +0000129static bool CheckSimple(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000130 v8::HandleScope scope(CcTest::isolate());
Ben Murdochda12d292016-06-02 14:46:10 +0100131 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000132 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000133 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000134 CHECK(v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch097c5b22016-05-18 11:27:45 +0100135 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000136 CHECK(result.tree != NULL);
137 CHECK(result.error.is_null());
138 return result.simple;
139}
140
141struct MinMaxPair {
142 int min_match;
143 int max_match;
144};
145
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000146
Steve Blocka7e24c12009-10-30 11:49:00 +0000147static MinMaxPair CheckMinMaxMatch(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000148 v8::HandleScope scope(CcTest::isolate());
Ben Murdochda12d292016-06-02 14:46:10 +0100149 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000150 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000151 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000152 CHECK(v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch097c5b22016-05-18 11:27:45 +0100153 CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000154 CHECK(result.tree != NULL);
155 CHECK(result.error.is_null());
156 int min_match = result.tree->min_match();
157 int max_match = result.tree->max_match();
158 MinMaxPair pair = { min_match, max_match };
159 return pair;
160}
161
162
Leon Clarkee46be812010-01-19 14:06:41 +0000163#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
Steve Blocka7e24c12009-10-30 11:49:00 +0000164#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
165#define CHECK_MIN_MAX(input, min, max) \
166 { MinMaxPair min_max = CheckMinMaxMatch(input); \
167 CHECK_EQ(min, min_max.min_match); \
168 CHECK_EQ(max, min_max.max_match); \
169 }
170
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000171
172void TestRegExpParser(bool lookbehind) {
173 FLAG_harmony_regexp_lookbehind = lookbehind;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000174
Leon Clarkee46be812010-01-19 14:06:41 +0000175 CHECK_PARSE_ERROR("?");
176
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000177 CheckParseEq("abc", "'abc'");
178 CheckParseEq("", "%");
179 CheckParseEq("abc|def", "(| 'abc' 'def')");
180 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
181 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
182 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
183 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
184 CheckParseEq("a*", "(# 0 - g 'a')");
185 CheckParseEq("a*?", "(# 0 - n 'a')");
186 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
187 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
188 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
189 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
190 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
191 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
192 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
193 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
194 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
195 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
196 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
197 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
198 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
199 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
200 CheckParseEq("(?:foo)", "'foo'");
201 CheckParseEq("(?: foo )", "' foo '");
202 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
203 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
204 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
205 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000206 if (lookbehind) {
207 CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
208 CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
209 } else {
210 CHECK_PARSE_ERROR("foo(?<=bar)baz");
211 CHECK_PARSE_ERROR("foo(?<!bar)baz");
212 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000213 CheckParseEq("()", "(^ %)");
214 CheckParseEq("(?=)", "(-> + %)");
Ben Murdoch097c5b22016-05-18 11:27:45 +0100215 CheckParseEq("[]", "^[\\x00-\\u{10ffff}]"); // Doesn't compile on windows
216 CheckParseEq("[^]", "[\\x00-\\u{10ffff}]"); // \uffff isn't in codepage 1252
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000217 CheckParseEq("[x]", "[x]");
218 CheckParseEq("[xyz]", "[x y z]");
219 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
220 CheckParseEq("[-123]", "[- 1 2 3]");
221 CheckParseEq("[^123]", "^[1 2 3]");
222 CheckParseEq("]", "']'");
223 CheckParseEq("}", "'}'");
224 CheckParseEq("[a-b-c]", "[a-b - c]");
225 CheckParseEq("[\\d]", "[0-9]");
226 CheckParseEq("[x\\dz]", "[x 0-9 z]");
227 CheckParseEq("[\\d-z]", "[0-9 - z]");
228 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
229 CheckParseEq("[z-\\d]", "[z - 0-9]");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100230 // Control character outside character class.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000231 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
232 CheckParseEq("\\c!", "'\\c!'");
233 CheckParseEq("\\c_", "'\\c_'");
234 CheckParseEq("\\c~", "'\\c~'");
235 CheckParseEq("\\c1", "'\\c1'");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100236 // Control character inside character class.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000237 CheckParseEq("[\\c!]", "[\\ c !]");
238 CheckParseEq("[\\c_]", "[\\x1f]");
239 CheckParseEq("[\\c~]", "[\\ c ~]");
240 CheckParseEq("[\\ca]", "[\\x01]");
241 CheckParseEq("[\\cz]", "[\\x1a]");
242 CheckParseEq("[\\cA]", "[\\x01]");
243 CheckParseEq("[\\cZ]", "[\\x1a]");
244 CheckParseEq("[\\c1]", "[\\x11]");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100245
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000246 CheckParseEq("[a\\]c]", "[a ] c]");
247 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
248 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
249 CheckParseEq("\\0", "'\\x00'");
250 CheckParseEq("\\8", "'8'");
251 CheckParseEq("\\9", "'9'");
252 CheckParseEq("\\11", "'\\x09'");
253 CheckParseEq("\\11a", "'\\x09a'");
254 CheckParseEq("\\011", "'\\x09'");
255 CheckParseEq("\\00011", "'\\x0011'");
256 CheckParseEq("\\118", "'\\x098'");
257 CheckParseEq("\\111", "'I'");
258 CheckParseEq("\\1111", "'I1'");
259 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
260 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
261 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
262 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
263 CheckParseEq("(x)(x)(x)\\1*",
264 "(: (^ 'x') (^ 'x') (^ 'x')"
265 " (# 0 - g (<- 1)))");
266 CheckParseEq("(x)(x)(x)\\2*",
267 "(: (^ 'x') (^ 'x') (^ 'x')"
268 " (# 0 - g (<- 2)))");
269 CheckParseEq("(x)(x)(x)\\3*",
270 "(: (^ 'x') (^ 'x') (^ 'x')"
271 " (# 0 - g (<- 3)))");
272 CheckParseEq("(x)(x)(x)\\4*",
273 "(: (^ 'x') (^ 'x') (^ 'x')"
274 " (# 0 - g '\\x04'))");
275 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
276 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
277 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
278 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
279 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
280 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
281 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
282 CheckParseEq("(a\\1)", "(^ 'a')");
283 CheckParseEq("(\\1a)", "(^ 'a')");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000284 CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000285 CheckParseEq("(?=a)?a", "'a'");
286 CheckParseEq("(?=a){0,10}a", "'a'");
287 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
288 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
289 CheckParseEq("(?!a)?a", "'a'");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000290 CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000291 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000292 CheckParseEq("(?!\\1(a\\1)\\1)\\1",
293 "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
294 CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
295 "(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))");
296 if (lookbehind) {
297 CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
298 "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
299 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000300 CheckParseEq("[\\0]", "[\\x00]");
301 CheckParseEq("[\\11]", "[\\x09]");
302 CheckParseEq("[\\11a]", "[\\x09 a]");
303 CheckParseEq("[\\011]", "[\\x09]");
304 CheckParseEq("[\\00011]", "[\\x00 1 1]");
305 CheckParseEq("[\\118]", "[\\x09 8]");
306 CheckParseEq("[\\111]", "[I]");
307 CheckParseEq("[\\1111]", "[I 1]");
308 CheckParseEq("\\x34", "'\x34'");
309 CheckParseEq("\\x60", "'\x60'");
310 CheckParseEq("\\x3z", "'x3z'");
311 CheckParseEq("\\c", "'\\c'");
312 CheckParseEq("\\u0034", "'\x34'");
313 CheckParseEq("\\u003z", "'u003z'");
314 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
Steve Blocka7e24c12009-10-30 11:49:00 +0000315
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000316 // Unicode regexps
317 CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
318 CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
319 true);
320 CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
321 true);
322 CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
323 CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
324
Ben Murdoch097c5b22016-05-18 11:27:45 +0100325 CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
326 CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
327 true);
328
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000329 CHECK_SIMPLE("", false);
Steve Blocka7e24c12009-10-30 11:49:00 +0000330 CHECK_SIMPLE("a", true);
331 CHECK_SIMPLE("a|b", false);
332 CHECK_SIMPLE("a\\n", false);
333 CHECK_SIMPLE("^a", false);
334 CHECK_SIMPLE("a$", false);
335 CHECK_SIMPLE("a\\b!", false);
336 CHECK_SIMPLE("a\\Bb", false);
337 CHECK_SIMPLE("a*", false);
338 CHECK_SIMPLE("a*?", false);
339 CHECK_SIMPLE("a?", false);
340 CHECK_SIMPLE("a??", false);
341 CHECK_SIMPLE("a{0,1}?", false);
342 CHECK_SIMPLE("a{1,1}?", false);
343 CHECK_SIMPLE("a{1,2}?", false);
344 CHECK_SIMPLE("a+?", false);
345 CHECK_SIMPLE("(a)", false);
346 CHECK_SIMPLE("(a)\\1", false);
347 CHECK_SIMPLE("(\\1a)", false);
348 CHECK_SIMPLE("\\1(a)", false);
349 CHECK_SIMPLE("a\\s", false);
350 CHECK_SIMPLE("a\\S", false);
351 CHECK_SIMPLE("a\\d", false);
352 CHECK_SIMPLE("a\\D", false);
353 CHECK_SIMPLE("a\\w", false);
354 CHECK_SIMPLE("a\\W", false);
355 CHECK_SIMPLE("a.", false);
356 CHECK_SIMPLE("a\\q", false);
357 CHECK_SIMPLE("a[a]", false);
358 CHECK_SIMPLE("a[^a]", false);
359 CHECK_SIMPLE("a[a-z]", false);
360 CHECK_SIMPLE("a[\\q]", false);
361 CHECK_SIMPLE("a(?:b)", false);
362 CHECK_SIMPLE("a(?=b)", false);
363 CHECK_SIMPLE("a(?!b)", false);
364 CHECK_SIMPLE("\\x60", false);
365 CHECK_SIMPLE("\\u0060", false);
366 CHECK_SIMPLE("\\cA", false);
367 CHECK_SIMPLE("\\q", false);
368 CHECK_SIMPLE("\\1112", false);
369 CHECK_SIMPLE("\\0", false);
370 CHECK_SIMPLE("(a)\\1", false);
371 CHECK_SIMPLE("(?=a)?a", false);
372 CHECK_SIMPLE("(?!a)?a\\1", false);
373 CHECK_SIMPLE("(?:(?=a))a\\1", false);
374
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000375 CheckParseEq("a{}", "'a{}'");
376 CheckParseEq("a{,}", "'a{,}'");
377 CheckParseEq("a{", "'a{'");
378 CheckParseEq("a{z}", "'a{z}'");
379 CheckParseEq("a{1z}", "'a{1z}'");
380 CheckParseEq("a{12z}", "'a{12z}'");
381 CheckParseEq("a{12,", "'a{12,'");
382 CheckParseEq("a{12,3b", "'a{12,3b'");
383 CheckParseEq("{}", "'{}'");
384 CheckParseEq("{,}", "'{,}'");
385 CheckParseEq("{", "'{'");
386 CheckParseEq("{z}", "'{z}'");
387 CheckParseEq("{1z}", "'{1z}'");
388 CheckParseEq("{12z}", "'{12z}'");
389 CheckParseEq("{12,", "'{12,'");
390 CheckParseEq("{12,3b", "'{12,3b'");
Steve Blocka7e24c12009-10-30 11:49:00 +0000391
392 CHECK_MIN_MAX("a", 1, 1);
393 CHECK_MIN_MAX("abc", 3, 3);
394 CHECK_MIN_MAX("a[bc]d", 3, 3);
395 CHECK_MIN_MAX("a|bc", 1, 2);
396 CHECK_MIN_MAX("ab|c", 1, 2);
397 CHECK_MIN_MAX("a||bc", 0, 2);
398 CHECK_MIN_MAX("|", 0, 0);
399 CHECK_MIN_MAX("(?:ab)", 2, 2);
400 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
401 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
402 CHECK_MIN_MAX("(ab)", 2, 2);
403 CHECK_MIN_MAX("(ab|cde)", 2, 3);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000404 CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
405 CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
Steve Blocka7e24c12009-10-30 11:49:00 +0000406 CHECK_MIN_MAX("(?:ab)?", 0, 2);
407 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
408 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
409 CHECK_MIN_MAX("a?", 0, 1);
410 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
411 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
412 CHECK_MIN_MAX("a??", 0, 1);
413 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
414 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
415 CHECK_MIN_MAX("(?:a?)?", 0, 1);
416 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
417 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
418 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
419 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
420 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
421 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
422 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
423 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
424 CHECK_MIN_MAX("a{0}", 0, 0);
425 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
426 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
427 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
428 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
429 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
430 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
431 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
432 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
433 CHECK_MIN_MAX("a\\bc", 2, 2);
434 CHECK_MIN_MAX("a\\Bc", 2, 2);
435 CHECK_MIN_MAX("a\\sc", 3, 3);
436 CHECK_MIN_MAX("a\\Sc", 3, 3);
437 CHECK_MIN_MAX("a(?=b)c", 2, 2);
438 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
439 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100440
441 FLAG_harmony_regexp_named_captures = true;
442 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
443 "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
444 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
445 "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
446 CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
447 "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
448 CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
449 CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
450 CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
451 CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
452 CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
453
454 CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
455 CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
456 FLAG_harmony_regexp_named_captures = false;
Steve Blocka7e24c12009-10-30 11:49:00 +0000457}
458
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000459
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000460TEST(ParserWithLookbehind) {
461 TestRegExpParser(true); // Lookbehind enabled.
462}
463
464
465TEST(ParserWithoutLookbehind) {
466 TestRegExpParser(true); // Lookbehind enabled.
467}
468
Steve Blocka7e24c12009-10-30 11:49:00 +0000469TEST(ParserRegression) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000470 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
471 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
472 CheckParseEq("{", "'{'");
473 CheckParseEq("a|", "(| 'a' %)");
Steve Blocka7e24c12009-10-30 11:49:00 +0000474}
475
Ben Murdoch61f157c2016-09-16 13:49:30 +0100476static void ExpectError(const char* input, const char* expected,
477 bool unicode = false) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000478 v8::HandleScope scope(CcTest::isolate());
Ben Murdochda12d292016-06-02 14:46:10 +0100479 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000480 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000481 RegExpCompileData result;
Ben Murdoch61f157c2016-09-16 13:49:30 +0100482 JSRegExp::Flags flags = JSRegExp::kNone;
483 if (unicode) flags |= JSRegExp::kUnicode;
484 CHECK(!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
485 &reader, flags, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000486 CHECK(result.tree == NULL);
487 CHECK(!result.error.is_null());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000488 v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
489 CHECK_EQ(0, strcmp(expected, str.get()));
Steve Blocka7e24c12009-10-30 11:49:00 +0000490}
491
492
493TEST(Errors) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000494 const char* kEndBackslash = "\\ at end of pattern";
495 ExpectError("\\", kEndBackslash);
496 const char* kUnterminatedGroup = "Unterminated group";
497 ExpectError("(foo", kUnterminatedGroup);
498 const char* kInvalidGroup = "Invalid group";
499 ExpectError("(?", kInvalidGroup);
500 const char* kUnterminatedCharacterClass = "Unterminated character class";
501 ExpectError("[", kUnterminatedCharacterClass);
502 ExpectError("[a-", kUnterminatedCharacterClass);
503 const char* kNothingToRepeat = "Nothing to repeat";
504 ExpectError("*", kNothingToRepeat);
505 ExpectError("?", kNothingToRepeat);
506 ExpectError("+", kNothingToRepeat);
507 ExpectError("{1}", kNothingToRepeat);
508 ExpectError("{1,2}", kNothingToRepeat);
509 ExpectError("{1,}", kNothingToRepeat);
510
511 // Check that we don't allow more than kMaxCapture captures
512 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
513 const char* kTooManyCaptures = "Too many captures";
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400514 std::ostringstream os;
Steve Blocka7e24c12009-10-30 11:49:00 +0000515 for (int i = 0; i <= kMaxCaptures; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000516 os << "()";
Steve Blocka7e24c12009-10-30 11:49:00 +0000517 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400518 ExpectError(os.str().c_str(), kTooManyCaptures);
Ben Murdoch61f157c2016-09-16 13:49:30 +0100519
520 FLAG_harmony_regexp_named_captures = true;
521 const char* kInvalidCaptureName = "Invalid capture group name";
522 ExpectError("(?<>.)", kInvalidCaptureName, true);
523 ExpectError("(?<1>.)", kInvalidCaptureName, true);
524 ExpectError("(?<_%>.)", kInvalidCaptureName, true);
525 ExpectError("\\k<a", kInvalidCaptureName, true);
526 const char* kDuplicateCaptureName = "Duplicate capture group name";
527 ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
528 const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
529 ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
530 const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
531 ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
532 ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
533 const char* kInvalidNamedReference = "Invalid named reference";
534 ExpectError("\\ka", kInvalidNamedReference, true);
535 FLAG_harmony_regexp_named_captures = false;
Steve Blocka7e24c12009-10-30 11:49:00 +0000536}
537
538
539static bool IsDigit(uc16 c) {
540 return ('0' <= c && c <= '9');
541}
542
543
544static bool NotDigit(uc16 c) {
545 return !IsDigit(c);
546}
547
548
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000549static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
550 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
551 // WhiteSpace (7.2) and LineTerminator (7.3) values.
552 return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000553}
554
555
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000556static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
557 return !IsWhiteSpaceOrLineTerminator(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000558}
559
560
561static bool NotWord(uc16 c) {
562 return !IsRegExpWord(c);
563}
564
565
566static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
Ben Murdochda12d292016-06-02 14:46:10 +0100567 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000568 ZoneList<CharacterRange>* ranges =
569 new(&zone) ZoneList<CharacterRange>(2, &zone);
570 CharacterRange::AddClassEscape(c, ranges, &zone);
Ben Murdoch097c5b22016-05-18 11:27:45 +0100571 for (uc32 i = 0; i < (1 << 16); i++) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000572 bool in_class = false;
573 for (int j = 0; !in_class && j < ranges->length(); j++) {
574 CharacterRange& range = ranges->at(j);
575 in_class = (range.from() <= i && i <= range.to());
576 }
577 CHECK_EQ(pred(i), in_class);
578 }
579}
580
581
582TEST(CharacterClassEscapes) {
583 TestCharacterClassEscapes('.', IsRegExpNewline);
584 TestCharacterClassEscapes('d', IsDigit);
585 TestCharacterClassEscapes('D', NotDigit);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000586 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
587 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
Steve Blocka7e24c12009-10-30 11:49:00 +0000588 TestCharacterClassEscapes('w', IsRegExpWord);
589 TestCharacterClassEscapes('W', NotWord);
590}
591
592
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000593static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
594 bool is_one_byte, Zone* zone) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000595 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +0100596 FlatStringReader reader(isolate, CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000597 RegExpCompileData compile_data;
Ben Murdoch097c5b22016-05-18 11:27:45 +0100598 JSRegExp::Flags flags = JSRegExp::kNone;
599 if (multiline) flags = JSRegExp::kMultiline;
600 if (unicode) flags = JSRegExp::kUnicode;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000601 if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
Ben Murdoch097c5b22016-05-18 11:27:45 +0100602 &reader, flags, &compile_data))
Steve Blocka7e24c12009-10-30 11:49:00 +0000603 return NULL;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000604 Handle<String> pattern = isolate->factory()
605 ->NewStringFromUtf8(CStrVector(input))
606 .ToHandleChecked();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000607 Handle<String> sample_subject =
608 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
Ben Murdoch097c5b22016-05-18 11:27:45 +0100609 RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
610 sample_subject, is_one_byte);
Steve Blocka7e24c12009-10-30 11:49:00 +0000611 return compile_data.node;
612}
613
614
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000615static void Execute(const char* input, bool multiline, bool unicode,
616 bool is_one_byte, bool dot_output = false) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000617 v8::HandleScope scope(CcTest::isolate());
Ben Murdochda12d292016-06-02 14:46:10 +0100618 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000619 RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000620 USE(node);
621#ifdef DEBUG
622 if (dot_output) {
623 RegExpEngine::DotPrint(input, node, false);
Steve Blocka7e24c12009-10-30 11:49:00 +0000624 }
625#endif // DEBUG
626}
627
628
629class TestConfig {
630 public:
631 typedef int Key;
632 typedef int Value;
633 static const int kNoKey;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100634 static int NoValue() { return 0; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000635 static inline int Compare(int a, int b) {
636 if (a < b)
637 return -1;
638 else if (a > b)
639 return 1;
640 else
641 return 0;
642 }
643};
644
645
646const int TestConfig::kNoKey = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +0000647
648
649static unsigned PseudoRandom(int i, int j) {
650 return ~(~((i * 781) ^ (j * 329)));
651}
652
653
654TEST(SplayTreeSimple) {
655 static const unsigned kLimit = 1000;
Ben Murdochda12d292016-06-02 14:46:10 +0100656 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000657 ZoneSplayTree<TestConfig> tree(&zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000658 bool seen[kLimit];
659 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
660#define CHECK_MAPS_EQUAL() do { \
661 for (unsigned k = 0; k < kLimit; k++) \
662 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
663 } while (false)
664 for (int i = 0; i < 50; i++) {
665 for (int j = 0; j < 50; j++) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000666 int next = PseudoRandom(i, j) % kLimit;
Steve Blocka7e24c12009-10-30 11:49:00 +0000667 if (seen[next]) {
668 // We've already seen this one. Check the value and remove
669 // it.
670 ZoneSplayTree<TestConfig>::Locator loc;
671 CHECK(tree.Find(next, &loc));
672 CHECK_EQ(next, loc.key());
673 CHECK_EQ(3 * next, loc.value());
674 tree.Remove(next);
675 seen[next] = false;
676 CHECK_MAPS_EQUAL();
677 } else {
678 // Check that it wasn't there already and then add it.
679 ZoneSplayTree<TestConfig>::Locator loc;
680 CHECK(!tree.Find(next, &loc));
681 CHECK(tree.Insert(next, &loc));
682 CHECK_EQ(next, loc.key());
683 loc.set_value(3 * next);
684 seen[next] = true;
685 CHECK_MAPS_EQUAL();
686 }
687 int val = PseudoRandom(j, i) % kLimit;
688 if (seen[val]) {
689 ZoneSplayTree<TestConfig>::Locator loc;
690 CHECK(tree.FindGreatestLessThan(val, &loc));
691 CHECK_EQ(loc.key(), val);
692 break;
693 }
694 val = PseudoRandom(i + j, i - j) % kLimit;
695 if (seen[val]) {
696 ZoneSplayTree<TestConfig>::Locator loc;
697 CHECK(tree.FindLeastGreaterThan(val, &loc));
698 CHECK_EQ(loc.key(), val);
699 break;
700 }
701 }
702 }
703}
704
705
706TEST(DispatchTableConstruction) {
707 // Initialize test data.
708 static const int kLimit = 1000;
709 static const int kRangeCount = 8;
710 static const int kRangeSize = 16;
711 uc16 ranges[kRangeCount][2 * kRangeSize];
712 for (int i = 0; i < kRangeCount; i++) {
713 Vector<uc16> range(ranges[i], 2 * kRangeSize);
714 for (int j = 0; j < 2 * kRangeSize; j++) {
715 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
716 }
717 range.Sort();
718 for (int j = 1; j < 2 * kRangeSize; j++) {
719 CHECK(range[j-1] <= range[j]);
720 }
721 }
722 // Enter test data into dispatch table.
Ben Murdochda12d292016-06-02 14:46:10 +0100723 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000724 DispatchTable table(&zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000725 for (int i = 0; i < kRangeCount; i++) {
726 uc16* range = ranges[i];
727 for (int j = 0; j < 2 * kRangeSize; j += 2)
Ben Murdoch097c5b22016-05-18 11:27:45 +0100728 table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000729 }
730 // Check that the table looks as we would expect
731 for (int p = 0; p < kLimit; p++) {
732 OutSet* outs = table.Get(p);
733 for (int j = 0; j < kRangeCount; j++) {
734 uc16* range = ranges[j];
735 bool is_on = false;
736 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
737 is_on = (range[k] <= p && p <= range[k + 1]);
738 CHECK_EQ(is_on, outs->Get(j));
739 }
740 }
741}
742
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000743
Leon Clarkee46be812010-01-19 14:06:41 +0000744// Test of debug-only syntax.
745#ifdef DEBUG
746
747TEST(ParsePossessiveRepetition) {
748 bool old_flag_value = FLAG_regexp_possessive_quantifier;
749
750 // Enable possessive quantifier syntax.
751 FLAG_regexp_possessive_quantifier = true;
752
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000753 CheckParseEq("a*+", "(# 0 - p 'a')");
754 CheckParseEq("a++", "(# 1 - p 'a')");
755 CheckParseEq("a?+", "(# 0 1 p 'a')");
756 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
757 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
Leon Clarkee46be812010-01-19 14:06:41 +0000758
759 // Disable possessive quantifier syntax.
760 FLAG_regexp_possessive_quantifier = false;
761
762 CHECK_PARSE_ERROR("a*+");
763 CHECK_PARSE_ERROR("a++");
764 CHECK_PARSE_ERROR("a?+");
765 CHECK_PARSE_ERROR("a{10,20}+");
766 CHECK_PARSE_ERROR("a{10,20}+b");
767
768 FLAG_regexp_possessive_quantifier = old_flag_value;
769}
770
771#endif
Steve Blocka7e24c12009-10-30 11:49:00 +0000772
773// Tests of interpreter.
774
775
Steve Block6ded16b2010-05-10 14:33:55 +0100776#ifndef V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +0000777
778#if V8_TARGET_ARCH_IA32
779typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
780#elif V8_TARGET_ARCH_X64
781typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
782#elif V8_TARGET_ARCH_ARM
783typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000784#elif V8_TARGET_ARCH_ARM64
785typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
Ben Murdochda12d292016-06-02 14:46:10 +0100786#elif V8_TARGET_ARCH_S390
787typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000788#elif V8_TARGET_ARCH_PPC
789typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
Andrei Popescu31002712010-02-23 13:46:05 +0000790#elif V8_TARGET_ARCH_MIPS
Steve Block44f0eee2011-05-26 01:26:41 +0100791typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000792#elif V8_TARGET_ARCH_MIPS64
793typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
794#elif V8_TARGET_ARCH_X87
795typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
Steve Blocka7e24c12009-10-30 11:49:00 +0000796#endif
797
798class ContextInitializer {
799 public:
800 ContextInitializer()
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000801 : scope_(CcTest::isolate()),
802 env_(v8::Context::New(CcTest::isolate())) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000803 env_->Enter();
804 }
805 ~ContextInitializer() {
806 env_->Exit();
Steve Blocka7e24c12009-10-30 11:49:00 +0000807 }
808 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000809 v8::HandleScope scope_;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000810 v8::Local<v8::Context> env_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000811};
812
813
814static ArchRegExpMacroAssembler::Result Execute(Code* code,
815 String* input,
816 int start_offset,
817 const byte* input_start,
818 const byte* input_end,
Leon Clarked91b9f72010-01-27 17:25:45 +0000819 int* captures) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000820 return NativeRegExpMacroAssembler::Execute(
821 code,
822 input,
823 start_offset,
824 input_start,
825 input_end,
Steve Block44f0eee2011-05-26 01:26:41 +0100826 captures,
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000827 0,
828 CcTest::i_isolate());
Steve Blocka7e24c12009-10-30 11:49:00 +0000829}
830
831
832TEST(MacroAssemblerNativeSuccess) {
833 v8::V8::Initialize();
834 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000835 Isolate* isolate = CcTest::i_isolate();
836 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +0100837 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +0000838
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000839 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
840 4);
Steve Blocka7e24c12009-10-30 11:49:00 +0000841
842 m.Succeed();
843
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000844 Handle<String> source = factory->NewStringFromStaticChars("");
Steve Blocka7e24c12009-10-30 11:49:00 +0000845 Handle<Object> code_object = m.GetCode(source);
846 Handle<Code> code = Handle<Code>::cast(code_object);
847
848 int captures[4] = {42, 37, 87, 117};
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000849 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
850 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000851 const byte* start_adr =
852 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
853
854 NativeRegExpMacroAssembler::Result result =
855 Execute(*code,
856 *input,
857 0,
858 start_adr,
859 start_adr + seq_input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000860 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000861
862 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
863 CHECK_EQ(-1, captures[0]);
864 CHECK_EQ(-1, captures[1]);
865 CHECK_EQ(-1, captures[2]);
866 CHECK_EQ(-1, captures[3]);
867}
868
869
870TEST(MacroAssemblerNativeSimple) {
871 v8::V8::Initialize();
872 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000873 Isolate* isolate = CcTest::i_isolate();
874 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +0100875 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +0000876
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000877 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
878 4);
Steve Blocka7e24c12009-10-30 11:49:00 +0000879
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000880 Label fail, backtrack;
881 m.PushBacktrack(&fail);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000882 m.CheckNotAtStart(0, NULL);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000883 m.LoadCurrentCharacter(2, NULL);
884 m.CheckNotCharacter('o', NULL);
885 m.LoadCurrentCharacter(1, NULL, false);
886 m.CheckNotCharacter('o', NULL);
887 m.LoadCurrentCharacter(0, NULL, false);
888 m.CheckNotCharacter('f', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000889 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000890 m.WriteCurrentPositionToRegister(1, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +0000891 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000892 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +0000893 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000894 m.Bind(&backtrack);
895 m.Backtrack();
Steve Blocka7e24c12009-10-30 11:49:00 +0000896 m.Bind(&fail);
897 m.Fail();
898
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000899 Handle<String> source = factory->NewStringFromStaticChars("^foo");
Steve Blocka7e24c12009-10-30 11:49:00 +0000900 Handle<Object> code_object = m.GetCode(source);
901 Handle<Code> code = Handle<Code>::cast(code_object);
902
903 int captures[4] = {42, 37, 87, 117};
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000904 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
905 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000906 Address start_adr = seq_input->GetCharsAddress();
907
908 NativeRegExpMacroAssembler::Result result =
909 Execute(*code,
910 *input,
911 0,
912 start_adr,
913 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000914 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000915
916 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
917 CHECK_EQ(0, captures[0]);
918 CHECK_EQ(3, captures[1]);
919 CHECK_EQ(-1, captures[2]);
920 CHECK_EQ(-1, captures[3]);
921
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000922 input = factory->NewStringFromStaticChars("barbarbar");
923 seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000924 start_adr = seq_input->GetCharsAddress();
925
926 result = Execute(*code,
927 *input,
928 0,
929 start_adr,
930 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000931 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000932
933 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
934}
935
936
937TEST(MacroAssemblerNativeSimpleUC16) {
938 v8::V8::Initialize();
939 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000940 Isolate* isolate = CcTest::i_isolate();
941 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +0100942 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +0000943
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000944 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
945 4);
Steve Blocka7e24c12009-10-30 11:49:00 +0000946
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000947 Label fail, backtrack;
948 m.PushBacktrack(&fail);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000949 m.CheckNotAtStart(0, NULL);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000950 m.LoadCurrentCharacter(2, NULL);
951 m.CheckNotCharacter('o', NULL);
952 m.LoadCurrentCharacter(1, NULL, false);
953 m.CheckNotCharacter('o', NULL);
954 m.LoadCurrentCharacter(0, NULL, false);
955 m.CheckNotCharacter('f', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000956 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000957 m.WriteCurrentPositionToRegister(1, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +0000958 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000959 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +0000960 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000961 m.Bind(&backtrack);
962 m.Backtrack();
Steve Blocka7e24c12009-10-30 11:49:00 +0000963 m.Bind(&fail);
964 m.Fail();
965
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000966 Handle<String> source = factory->NewStringFromStaticChars("^foo");
Steve Blocka7e24c12009-10-30 11:49:00 +0000967 Handle<Object> code_object = m.GetCode(source);
968 Handle<Code> code = Handle<Code>::cast(code_object);
969
970 int captures[4] = {42, 37, 87, 117};
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100971 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000972 static_cast<uc16>(0x2603)};
973 Handle<String> input = factory->NewStringFromTwoByte(
974 Vector<const uc16>(input_data, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +0000975 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
976 Address start_adr = seq_input->GetCharsAddress();
977
978 NativeRegExpMacroAssembler::Result result =
979 Execute(*code,
980 *input,
981 0,
982 start_adr,
983 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000984 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000985
986 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
987 CHECK_EQ(0, captures[0]);
988 CHECK_EQ(3, captures[1]);
989 CHECK_EQ(-1, captures[2]);
990 CHECK_EQ(-1, captures[3]);
991
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100992 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000993 static_cast<uc16>(0x2603)};
994 input = factory->NewStringFromTwoByte(
995 Vector<const uc16>(input_data2, 9)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +0000996 seq_input = Handle<SeqTwoByteString>::cast(input);
997 start_adr = seq_input->GetCharsAddress();
998
999 result = Execute(*code,
1000 *input,
1001 0,
1002 start_adr,
1003 start_adr + input->length() * 2,
Leon Clarked91b9f72010-01-27 17:25:45 +00001004 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +00001005
1006 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
1007}
1008
1009
1010TEST(MacroAssemblerNativeBacktrack) {
1011 v8::V8::Initialize();
1012 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001013 Isolate* isolate = CcTest::i_isolate();
1014 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001015 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001016
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001017 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1018 0);
Steve Blocka7e24c12009-10-30 11:49:00 +00001019
1020 Label fail;
1021 Label backtrack;
1022 m.LoadCurrentCharacter(10, &fail);
1023 m.Succeed();
1024 m.Bind(&fail);
1025 m.PushBacktrack(&backtrack);
1026 m.LoadCurrentCharacter(10, NULL);
1027 m.Succeed();
1028 m.Bind(&backtrack);
1029 m.Fail();
1030
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001031 Handle<String> source = factory->NewStringFromStaticChars("..........");
Steve Blocka7e24c12009-10-30 11:49:00 +00001032 Handle<Object> code_object = m.GetCode(source);
1033 Handle<Code> code = Handle<Code>::cast(code_object);
1034
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001035 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
1036 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001037 Address start_adr = seq_input->GetCharsAddress();
1038
1039 NativeRegExpMacroAssembler::Result result =
1040 Execute(*code,
1041 *input,
1042 0,
1043 start_adr,
1044 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001045 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001046
1047 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
1048}
1049
1050
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001051TEST(MacroAssemblerNativeBackReferenceLATIN1) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001052 v8::V8::Initialize();
1053 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001054 Isolate* isolate = CcTest::i_isolate();
1055 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001056 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001057
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001058 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1059 4);
Steve Blocka7e24c12009-10-30 11:49:00 +00001060
1061 m.WriteCurrentPositionToRegister(0, 0);
1062 m.AdvanceCurrentPosition(2);
1063 m.WriteCurrentPositionToRegister(1, 0);
1064 Label nomatch;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001065 m.CheckNotBackReference(0, false, &nomatch);
Steve Blocka7e24c12009-10-30 11:49:00 +00001066 m.Fail();
1067 m.Bind(&nomatch);
1068 m.AdvanceCurrentPosition(2);
1069 Label missing_match;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001070 m.CheckNotBackReference(0, false, &missing_match);
Steve Blocka7e24c12009-10-30 11:49:00 +00001071 m.WriteCurrentPositionToRegister(2, 0);
1072 m.Succeed();
1073 m.Bind(&missing_match);
1074 m.Fail();
1075
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001076 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
Steve Blocka7e24c12009-10-30 11:49:00 +00001077 Handle<Object> code_object = m.GetCode(source);
1078 Handle<Code> code = Handle<Code>::cast(code_object);
1079
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001080 Handle<String> input = factory->NewStringFromStaticChars("fooofo");
1081 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001082 Address start_adr = seq_input->GetCharsAddress();
1083
1084 int output[4];
1085 NativeRegExpMacroAssembler::Result result =
1086 Execute(*code,
1087 *input,
1088 0,
1089 start_adr,
1090 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001091 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001092
1093 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1094 CHECK_EQ(0, output[0]);
1095 CHECK_EQ(2, output[1]);
1096 CHECK_EQ(6, output[2]);
1097 CHECK_EQ(-1, output[3]);
1098}
1099
1100
1101TEST(MacroAssemblerNativeBackReferenceUC16) {
1102 v8::V8::Initialize();
1103 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001104 Isolate* isolate = CcTest::i_isolate();
1105 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001106 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001107
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001108 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
1109 4);
Steve Blocka7e24c12009-10-30 11:49:00 +00001110
1111 m.WriteCurrentPositionToRegister(0, 0);
1112 m.AdvanceCurrentPosition(2);
1113 m.WriteCurrentPositionToRegister(1, 0);
1114 Label nomatch;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001115 m.CheckNotBackReference(0, false, &nomatch);
Steve Blocka7e24c12009-10-30 11:49:00 +00001116 m.Fail();
1117 m.Bind(&nomatch);
1118 m.AdvanceCurrentPosition(2);
1119 Label missing_match;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001120 m.CheckNotBackReference(0, false, &missing_match);
Steve Blocka7e24c12009-10-30 11:49:00 +00001121 m.WriteCurrentPositionToRegister(2, 0);
1122 m.Succeed();
1123 m.Bind(&missing_match);
1124 m.Fail();
1125
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001126 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
Steve Blocka7e24c12009-10-30 11:49:00 +00001127 Handle<Object> code_object = m.GetCode(source);
1128 Handle<Code> code = Handle<Code>::cast(code_object);
1129
1130 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001131 Handle<String> input = factory->NewStringFromTwoByte(
1132 Vector<const uc16>(input_data, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001133 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1134 Address start_adr = seq_input->GetCharsAddress();
1135
1136 int output[4];
1137 NativeRegExpMacroAssembler::Result result =
1138 Execute(*code,
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001139 *input,
1140 0,
1141 start_adr,
1142 start_adr + input->length() * 2,
1143 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001144
1145 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1146 CHECK_EQ(0, output[0]);
1147 CHECK_EQ(2, output[1]);
1148 CHECK_EQ(6, output[2]);
1149 CHECK_EQ(-1, output[3]);
1150}
1151
1152
1153
1154TEST(MacroAssemblernativeAtStart) {
1155 v8::V8::Initialize();
1156 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001157 Isolate* isolate = CcTest::i_isolate();
1158 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001159 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001160
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001161 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1162 0);
Steve Blocka7e24c12009-10-30 11:49:00 +00001163
1164 Label not_at_start, newline, fail;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001165 m.CheckNotAtStart(0, &not_at_start);
Steve Blocka7e24c12009-10-30 11:49:00 +00001166 // Check that prevchar = '\n' and current = 'f'.
1167 m.CheckCharacter('\n', &newline);
1168 m.Bind(&fail);
1169 m.Fail();
1170 m.Bind(&newline);
1171 m.LoadCurrentCharacter(0, &fail);
1172 m.CheckNotCharacter('f', &fail);
1173 m.Succeed();
1174
1175 m.Bind(&not_at_start);
1176 // Check that prevchar = 'o' and current = 'b'.
1177 Label prevo;
1178 m.CheckCharacter('o', &prevo);
1179 m.Fail();
1180 m.Bind(&prevo);
1181 m.LoadCurrentCharacter(0, &fail);
1182 m.CheckNotCharacter('b', &fail);
1183 m.Succeed();
1184
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001185 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
Steve Blocka7e24c12009-10-30 11:49:00 +00001186 Handle<Object> code_object = m.GetCode(source);
1187 Handle<Code> code = Handle<Code>::cast(code_object);
1188
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001189 Handle<String> input = factory->NewStringFromStaticChars("foobar");
1190 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001191 Address start_adr = seq_input->GetCharsAddress();
1192
1193 NativeRegExpMacroAssembler::Result result =
1194 Execute(*code,
1195 *input,
1196 0,
1197 start_adr,
1198 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001199 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001200
1201 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1202
1203 result = Execute(*code,
1204 *input,
1205 3,
1206 start_adr + 3,
1207 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001208 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001209
1210 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1211}
1212
1213
1214TEST(MacroAssemblerNativeBackRefNoCase) {
1215 v8::V8::Initialize();
1216 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001217 Isolate* isolate = CcTest::i_isolate();
1218 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001219 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001220
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001221 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1222 4);
Steve Blocka7e24c12009-10-30 11:49:00 +00001223
1224 Label fail, succ;
1225
1226 m.WriteCurrentPositionToRegister(0, 0);
1227 m.WriteCurrentPositionToRegister(2, 0);
1228 m.AdvanceCurrentPosition(3);
1229 m.WriteCurrentPositionToRegister(3, 0);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001230 m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC".
1231 m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC".
Steve Blocka7e24c12009-10-30 11:49:00 +00001232 Label expected_fail;
Ben Murdoch097c5b22016-05-18 11:27:45 +01001233 m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
Steve Blocka7e24c12009-10-30 11:49:00 +00001234 m.Bind(&fail);
1235 m.Fail();
1236
1237 m.Bind(&expected_fail);
1238 m.AdvanceCurrentPosition(3); // Skip "xYz"
Ben Murdoch097c5b22016-05-18 11:27:45 +01001239 m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
Steve Blocka7e24c12009-10-30 11:49:00 +00001240 m.Fail();
1241
1242 m.Bind(&succ);
1243 m.WriteCurrentPositionToRegister(1, 0);
1244 m.Succeed();
1245
1246 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001247 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
Steve Blocka7e24c12009-10-30 11:49:00 +00001248 Handle<Object> code_object = m.GetCode(source);
1249 Handle<Code> code = Handle<Code>::cast(code_object);
1250
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001251 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1252 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001253 Address start_adr = seq_input->GetCharsAddress();
1254
1255 int output[4];
1256 NativeRegExpMacroAssembler::Result result =
1257 Execute(*code,
1258 *input,
1259 0,
1260 start_adr,
1261 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001262 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001263
1264 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1265 CHECK_EQ(0, output[0]);
1266 CHECK_EQ(12, output[1]);
1267 CHECK_EQ(0, output[2]);
1268 CHECK_EQ(3, output[3]);
1269}
1270
1271
1272
1273TEST(MacroAssemblerNativeRegisters) {
1274 v8::V8::Initialize();
1275 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001276 Isolate* isolate = CcTest::i_isolate();
1277 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001278 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001279
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001280 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1281 6);
Steve Blocka7e24c12009-10-30 11:49:00 +00001282
1283 uc16 foo_chars[3] = {'f', 'o', 'o'};
1284 Vector<const uc16> foo(foo_chars, 3);
1285
1286 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1287 Label fail;
1288 Label backtrack;
1289 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1290 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1291 m.PushBacktrack(&backtrack);
1292 m.WriteStackPointerToRegister(sp);
1293 // Fill stack and registers
1294 m.AdvanceCurrentPosition(2);
1295 m.WriteCurrentPositionToRegister(out1, 0);
1296 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1297 m.PushBacktrack(&fail);
1298 // Drop backtrack stack frames.
1299 m.ReadStackPointerFromRegister(sp);
1300 // And take the first backtrack (to &backtrack)
1301 m.Backtrack();
1302
1303 m.PushCurrentPosition();
1304 m.AdvanceCurrentPosition(2);
1305 m.PopCurrentPosition();
1306
1307 m.Bind(&backtrack);
1308 m.PopRegister(out1);
1309 m.ReadCurrentPositionFromRegister(out1);
1310 m.AdvanceCurrentPosition(3);
1311 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1312
1313 Label loop;
1314 m.SetRegister(loop_cnt, 0); // loop counter
1315 m.Bind(&loop);
1316 m.AdvanceRegister(loop_cnt, 1);
1317 m.AdvanceCurrentPosition(1);
1318 m.IfRegisterLT(loop_cnt, 3, &loop);
1319 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1320
1321 Label loop2;
1322 m.SetRegister(loop_cnt, 2); // loop counter
1323 m.Bind(&loop2);
1324 m.AdvanceRegister(loop_cnt, -1);
1325 m.AdvanceCurrentPosition(1);
1326 m.IfRegisterGE(loop_cnt, 0, &loop2);
1327 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1328
1329 Label loop3;
1330 Label exit_loop3;
1331 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1332 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1333 m.ReadCurrentPositionFromRegister(out3);
1334 m.Bind(&loop3);
1335 m.AdvanceCurrentPosition(1);
1336 m.CheckGreedyLoop(&exit_loop3);
1337 m.GoTo(&loop3);
1338 m.Bind(&exit_loop3);
1339 m.PopCurrentPosition();
1340 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1341
1342 m.Succeed();
1343
1344 m.Bind(&fail);
1345 m.Fail();
1346
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001347 Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001348 Handle<Object> code_object = m.GetCode(source);
1349 Handle<Code> code = Handle<Code>::cast(code_object);
1350
1351 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001352 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1353 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001354 Address start_adr = seq_input->GetCharsAddress();
1355
1356 int output[6];
1357 NativeRegExpMacroAssembler::Result result =
1358 Execute(*code,
1359 *input,
1360 0,
1361 start_adr,
1362 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001363 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001364
1365 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1366 CHECK_EQ(0, output[0]);
1367 CHECK_EQ(3, output[1]);
1368 CHECK_EQ(6, output[2]);
1369 CHECK_EQ(9, output[3]);
1370 CHECK_EQ(9, output[4]);
1371 CHECK_EQ(-1, output[5]);
1372}
1373
1374
1375TEST(MacroAssemblerStackOverflow) {
1376 v8::V8::Initialize();
1377 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001378 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001379 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001380 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001381
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001382 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1383 0);
Steve Blocka7e24c12009-10-30 11:49:00 +00001384
1385 Label loop;
1386 m.Bind(&loop);
1387 m.PushBacktrack(&loop);
1388 m.GoTo(&loop);
1389
1390 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001391 factory->NewStringFromStaticChars("<stack overflow test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001392 Handle<Object> code_object = m.GetCode(source);
1393 Handle<Code> code = Handle<Code>::cast(code_object);
1394
1395 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001396 Handle<String> input = factory->NewStringFromStaticChars("dummy");
1397 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001398 Address start_adr = seq_input->GetCharsAddress();
1399
1400 NativeRegExpMacroAssembler::Result result =
1401 Execute(*code,
1402 *input,
1403 0,
1404 start_adr,
1405 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001406 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001407
1408 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
Ben Murdoch8b112d22011-06-08 16:22:53 +01001409 CHECK(isolate->has_pending_exception());
1410 isolate->clear_pending_exception();
Steve Blocka7e24c12009-10-30 11:49:00 +00001411}
1412
1413
1414TEST(MacroAssemblerNativeLotsOfRegisters) {
1415 v8::V8::Initialize();
1416 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001417 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001418 Factory* factory = isolate->factory();
Ben Murdochda12d292016-06-02 14:46:10 +01001419 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001420
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001421 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1422 2);
Steve Blocka7e24c12009-10-30 11:49:00 +00001423
1424 // At least 2048, to ensure the allocated space for registers
1425 // span one full page.
1426 const int large_number = 8000;
1427 m.WriteCurrentPositionToRegister(large_number, 42);
1428 m.WriteCurrentPositionToRegister(0, 0);
1429 m.WriteCurrentPositionToRegister(1, 1);
1430 Label done;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001431 m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
Steve Blocka7e24c12009-10-30 11:49:00 +00001432 m.Bind(&done);
1433 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1434 m.PopRegister(1);
1435 m.Succeed();
1436
1437 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001438 factory->NewStringFromStaticChars("<huge register space test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001439 Handle<Object> code_object = m.GetCode(source);
1440 Handle<Code> code = Handle<Code>::cast(code_object);
1441
1442 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001443 Handle<String> input = factory->NewStringFromStaticChars("sample text");
1444 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001445 Address start_adr = seq_input->GetCharsAddress();
1446
1447 int captures[2];
1448 NativeRegExpMacroAssembler::Result result =
1449 Execute(*code,
1450 *input,
1451 0,
1452 start_adr,
1453 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001454 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +00001455
1456 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1457 CHECK_EQ(0, captures[0]);
1458 CHECK_EQ(42, captures[1]);
1459
Ben Murdoch8b112d22011-06-08 16:22:53 +01001460 isolate->clear_pending_exception();
Steve Blocka7e24c12009-10-30 11:49:00 +00001461}
1462
Steve Block6ded16b2010-05-10 14:33:55 +01001463#else // V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +00001464
1465TEST(MacroAssembler) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001466 byte codes[1024];
Ben Murdochda12d292016-06-02 14:46:10 +01001467 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001468 RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
1469 &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001470 // ^f(o)o.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001471 Label start, fail, backtrack;
1472
Steve Blocka7e24c12009-10-30 11:49:00 +00001473 m.SetRegister(4, 42);
1474 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1475 m.AdvanceRegister(4, 42);
1476 m.GoTo(&start);
1477 m.Fail();
1478 m.Bind(&start);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001479 m.PushBacktrack(&fail);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001480 m.CheckNotAtStart(0, NULL);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001481 m.LoadCurrentCharacter(0, NULL);
1482 m.CheckNotCharacter('f', NULL);
1483 m.LoadCurrentCharacter(1, NULL);
1484 m.CheckNotCharacter('o', NULL);
1485 m.LoadCurrentCharacter(2, NULL);
1486 m.CheckNotCharacter('o', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001487 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001488 m.WriteCurrentPositionToRegister(1, 3);
1489 m.WriteCurrentPositionToRegister(2, 1);
1490 m.WriteCurrentPositionToRegister(3, 2);
Steve Blocka7e24c12009-10-30 11:49:00 +00001491 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001492 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +00001493 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001494 m.Bind(&backtrack);
1495 m.ClearRegisters(2, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +00001496 m.Backtrack();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001497 m.Bind(&fail);
Steve Blocka7e24c12009-10-30 11:49:00 +00001498 m.PopRegister(0);
1499 m.Fail();
1500
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001501 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001502 Factory* factory = isolate->factory();
1503 HandleScope scope(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001504
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001505 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
Steve Blocka7e24c12009-10-30 11:49:00 +00001506 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1507 int captures[5];
1508
1509 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001510 Handle<String> f1_16 = factory->NewStringFromTwoByte(
1511 Vector<const uc16>(str1, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001512
Ben Murdoch8b112d22011-06-08 16:22:53 +01001513 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
Steve Blocka7e24c12009-10-30 11:49:00 +00001514 CHECK_EQ(0, captures[0]);
1515 CHECK_EQ(3, captures[1]);
1516 CHECK_EQ(1, captures[2]);
1517 CHECK_EQ(2, captures[3]);
1518 CHECK_EQ(84, captures[4]);
1519
1520 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001521 Handle<String> f2_16 = factory->NewStringFromTwoByte(
1522 Vector<const uc16>(str2, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001523
Ben Murdoch8b112d22011-06-08 16:22:53 +01001524 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
Steve Blocka7e24c12009-10-30 11:49:00 +00001525 CHECK_EQ(42, captures[0]);
1526}
1527
Steve Block6ded16b2010-05-10 14:33:55 +01001528#endif // V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +00001529
1530
1531TEST(AddInverseToTable) {
1532 static const int kLimit = 1000;
1533 static const int kRangeCount = 16;
1534 for (int t = 0; t < 10; t++) {
Ben Murdochda12d292016-06-02 14:46:10 +01001535 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001536 ZoneList<CharacterRange>* ranges =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001537 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001538 for (int i = 0; i < kRangeCount; i++) {
1539 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1540 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1541 if (to > kLimit) to = kLimit;
Ben Murdoch097c5b22016-05-18 11:27:45 +01001542 ranges->Add(CharacterRange::Range(from, to), &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001543 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001544 DispatchTable table(&zone);
1545 DispatchTableConstructor cons(&table, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001546 cons.set_choice_index(0);
1547 cons.AddInverse(ranges);
1548 for (int i = 0; i < kLimit; i++) {
1549 bool is_on = false;
1550 for (int j = 0; !is_on && j < kRangeCount; j++)
1551 is_on = ranges->at(j).Contains(i);
1552 OutSet* set = table.Get(i);
1553 CHECK_EQ(is_on, set->Get(0) == false);
1554 }
1555 }
Ben Murdochda12d292016-06-02 14:46:10 +01001556 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001557 ZoneList<CharacterRange>* ranges =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001558 new(&zone) ZoneList<CharacterRange>(1, &zone);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001559 ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001560 DispatchTable table(&zone);
1561 DispatchTableConstructor cons(&table, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001562 cons.set_choice_index(0);
1563 cons.AddInverse(ranges);
1564 CHECK(!table.Get(0xFFFE)->Get(0));
1565 CHECK(table.Get(0xFFFF)->Get(0));
1566}
1567
1568
1569static uc32 canonicalize(uc32 c) {
1570 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1571 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1572 if (count == 0) {
1573 return c;
1574 } else {
1575 CHECK_EQ(1, count);
1576 return canon[0];
1577 }
1578}
1579
1580
1581TEST(LatinCanonicalize) {
1582 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001583 for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
1584 unibrow::uchar upper = lower + ('A' - 'a');
Steve Blocka7e24c12009-10-30 11:49:00 +00001585 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1586 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1587 int length = un_canonicalize.get(lower, '\0', uncanon);
1588 CHECK_EQ(2, length);
1589 CHECK_EQ(upper, uncanon[0]);
1590 CHECK_EQ(lower, uncanon[1]);
1591 }
1592 for (uc32 c = 128; c < (1 << 21); c++)
1593 CHECK_GE(canonicalize(c), 128);
1594 unibrow::Mapping<unibrow::ToUppercase> to_upper;
Ben Murdochbb769b22010-08-11 14:56:33 +01001595 // Canonicalization is only defined for the Basic Multilingual Plane.
1596 for (uc32 c = 0; c < (1 << 16); c++) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001597 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1598 int length = to_upper.get(c, '\0', upper);
1599 if (length == 0) {
1600 length = 1;
1601 upper[0] = c;
1602 }
1603 uc32 u = upper[0];
1604 if (length > 1 || (c >= 128 && u < 128))
1605 u = c;
1606 CHECK_EQ(u, canonicalize(c));
1607 }
1608}
1609
1610
Ben Murdochbb769b22010-08-11 14:56:33 +01001611static uc32 CanonRangeEnd(uc32 c) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001612 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1613 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1614 if (count == 0) {
1615 return c;
1616 } else {
1617 CHECK_EQ(1, count);
1618 return canon[0];
1619 }
1620}
1621
1622
1623TEST(RangeCanonicalization) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001624 // Check that we arrive at the same result when using the basic
1625 // range canonicalization primitives as when using immediate
1626 // canonicalization.
1627 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
Ben Murdochbb769b22010-08-11 14:56:33 +01001628 int block_start = 0;
1629 while (block_start <= 0xFFFF) {
1630 uc32 block_end = CanonRangeEnd(block_start);
1631 unsigned block_length = block_end - block_start + 1;
1632 if (block_length > 1) {
1633 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1634 int first_length = un_canonicalize.get(block_start, '\0', first);
1635 for (unsigned i = 1; i < block_length; i++) {
1636 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1637 int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1638 CHECK_EQ(first_length, succ_length);
1639 for (int j = 0; j < succ_length; j++) {
1640 int calc = first[j] + i;
1641 int found = succ[j];
1642 CHECK_EQ(calc, found);
1643 }
Steve Blocka7e24c12009-10-30 11:49:00 +00001644 }
1645 }
Ben Murdochbb769b22010-08-11 14:56:33 +01001646 block_start = block_start + block_length;
Steve Blocka7e24c12009-10-30 11:49:00 +00001647 }
1648}
1649
1650
1651TEST(UncanonicalizeEquivalence) {
1652 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1653 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1654 for (int i = 0; i < (1 << 16); i++) {
1655 int length = un_canonicalize.get(i, '\0', chars);
1656 for (int j = 0; j < length; j++) {
1657 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1658 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1659 CHECK_EQ(length, length2);
1660 for (int k = 0; k < length; k++)
1661 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1662 }
1663 }
1664}
1665
1666
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001667static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
Steve Blocka7e24c12009-10-30 11:49:00 +00001668 Vector<CharacterRange> expected) {
Ben Murdochda12d292016-06-02 14:46:10 +01001669 Zone zone(CcTest::i_isolate()->allocator());
Steve Blocka7e24c12009-10-30 11:49:00 +00001670 int count = expected.length();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001671 ZoneList<CharacterRange>* list =
1672 new(&zone) ZoneList<CharacterRange>(count, &zone);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001673 list->Add(input, &zone);
1674 CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
1675 list->Remove(0); // Remove the input before checking results.
Steve Blocka7e24c12009-10-30 11:49:00 +00001676 CHECK_EQ(count, list->length());
1677 for (int i = 0; i < list->length(); i++) {
1678 CHECK_EQ(expected[i].from(), list->at(i).from());
1679 CHECK_EQ(expected[i].to(), list->at(i).to());
1680 }
1681}
1682
1683
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001684static void TestSimpleRangeCaseIndependence(Isolate* isolate,
1685 CharacterRange input,
Steve Blocka7e24c12009-10-30 11:49:00 +00001686 CharacterRange expected) {
1687 EmbeddedVector<CharacterRange, 1> vector;
1688 vector[0] = expected;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001689 TestRangeCaseIndependence(isolate, input, vector);
Steve Blocka7e24c12009-10-30 11:49:00 +00001690}
1691
1692
1693TEST(CharacterRangeCaseIndependence) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001694 Isolate* isolate = CcTest::i_isolate();
1695 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001696 CharacterRange::Singleton('A'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001697 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001698 CharacterRange::Singleton('Z'));
Ben Murdoch097c5b22016-05-18 11:27:45 +01001699 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
1700 CharacterRange::Range('A', 'Z'));
1701 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
1702 CharacterRange::Range('C', 'F'));
1703 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
1704 CharacterRange::Range('A', 'B'));
1705 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
1706 CharacterRange::Range('Y', 'Z'));
1707 TestSimpleRangeCaseIndependence(isolate,
1708 CharacterRange::Range('a' - 1, 'z' + 1),
1709 CharacterRange::Range('A', 'Z'));
1710 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
1711 CharacterRange::Range('a', 'z'));
1712 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
1713 CharacterRange::Range('c', 'f'));
1714 TestSimpleRangeCaseIndependence(isolate,
1715 CharacterRange::Range('A' - 1, 'Z' + 1),
1716 CharacterRange::Range('a', 'z'));
Steve Blocka7e24c12009-10-30 11:49:00 +00001717 // Here we need to add [l-z] to complete the case independence of
1718 // [A-Za-z] but we expect [a-z] to be added since we always add a
1719 // whole block at a time.
Ben Murdoch097c5b22016-05-18 11:27:45 +01001720 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
1721 CharacterRange::Range('a', 'z'));
Steve Blocka7e24c12009-10-30 11:49:00 +00001722}
1723
1724
Ben Murdoch097c5b22016-05-18 11:27:45 +01001725static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001726 if (ranges == NULL)
1727 return false;
1728 for (int i = 0; i < ranges->length(); i++) {
1729 CharacterRange range = ranges->at(i);
1730 if (range.from() <= c && c <= range.to())
1731 return true;
1732 }
1733 return false;
1734}
1735
1736
Ben Murdoch097c5b22016-05-18 11:27:45 +01001737TEST(UnicodeRangeSplitter) {
Ben Murdochda12d292016-06-02 14:46:10 +01001738 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001739 ZoneList<CharacterRange>* base =
1740 new(&zone) ZoneList<CharacterRange>(1, &zone);
1741 base->Add(CharacterRange::Everything(), &zone);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001742 UnicodeRangeSplitter splitter(&zone, base);
1743 // BMP
1744 for (uc32 c = 0; c < 0xd800; c++) {
1745 CHECK(InClass(c, splitter.bmp()));
1746 CHECK(!InClass(c, splitter.lead_surrogates()));
1747 CHECK(!InClass(c, splitter.trail_surrogates()));
1748 CHECK(!InClass(c, splitter.non_bmp()));
1749 }
1750 // Lead surrogates
1751 for (uc32 c = 0xd800; c < 0xdbff; c++) {
1752 CHECK(!InClass(c, splitter.bmp()));
1753 CHECK(InClass(c, splitter.lead_surrogates()));
1754 CHECK(!InClass(c, splitter.trail_surrogates()));
1755 CHECK(!InClass(c, splitter.non_bmp()));
1756 }
1757 // Trail surrogates
1758 for (uc32 c = 0xdc00; c < 0xdfff; c++) {
1759 CHECK(!InClass(c, splitter.bmp()));
1760 CHECK(!InClass(c, splitter.lead_surrogates()));
1761 CHECK(InClass(c, splitter.trail_surrogates()));
1762 CHECK(!InClass(c, splitter.non_bmp()));
1763 }
1764 // BMP
1765 for (uc32 c = 0xe000; c < 0xffff; c++) {
1766 CHECK(InClass(c, splitter.bmp()));
1767 CHECK(!InClass(c, splitter.lead_surrogates()));
1768 CHECK(!InClass(c, splitter.trail_surrogates()));
1769 CHECK(!InClass(c, splitter.non_bmp()));
1770 }
1771 // Non-BMP
1772 for (uc32 c = 0x10000; c < 0x10ffff; c++) {
1773 CHECK(!InClass(c, splitter.bmp()));
1774 CHECK(!InClass(c, splitter.lead_surrogates()));
1775 CHECK(!InClass(c, splitter.trail_surrogates()));
1776 CHECK(InClass(c, splitter.non_bmp()));
Steve Blocka7e24c12009-10-30 11:49:00 +00001777 }
1778}
1779
1780
Leon Clarkee46be812010-01-19 14:06:41 +00001781TEST(CanonicalizeCharacterSets) {
Ben Murdochda12d292016-06-02 14:46:10 +01001782 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001783 ZoneList<CharacterRange>* list =
1784 new(&zone) ZoneList<CharacterRange>(4, &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001785 CharacterSet set(list);
1786
Ben Murdoch097c5b22016-05-18 11:27:45 +01001787 list->Add(CharacterRange::Range(10, 20), &zone);
1788 list->Add(CharacterRange::Range(30, 40), &zone);
1789 list->Add(CharacterRange::Range(50, 60), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001790 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001791 CHECK_EQ(3, list->length());
1792 CHECK_EQ(10, list->at(0).from());
1793 CHECK_EQ(20, list->at(0).to());
1794 CHECK_EQ(30, list->at(1).from());
1795 CHECK_EQ(40, list->at(1).to());
1796 CHECK_EQ(50, list->at(2).from());
1797 CHECK_EQ(60, list->at(2).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001798
1799 list->Rewind(0);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001800 list->Add(CharacterRange::Range(10, 20), &zone);
1801 list->Add(CharacterRange::Range(50, 60), &zone);
1802 list->Add(CharacterRange::Range(30, 40), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001803 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001804 CHECK_EQ(3, list->length());
1805 CHECK_EQ(10, list->at(0).from());
1806 CHECK_EQ(20, list->at(0).to());
1807 CHECK_EQ(30, list->at(1).from());
1808 CHECK_EQ(40, list->at(1).to());
1809 CHECK_EQ(50, list->at(2).from());
1810 CHECK_EQ(60, list->at(2).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001811
1812 list->Rewind(0);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001813 list->Add(CharacterRange::Range(30, 40), &zone);
1814 list->Add(CharacterRange::Range(10, 20), &zone);
1815 list->Add(CharacterRange::Range(25, 25), &zone);
1816 list->Add(CharacterRange::Range(100, 100), &zone);
1817 list->Add(CharacterRange::Range(1, 1), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001818 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001819 CHECK_EQ(5, list->length());
1820 CHECK_EQ(1, list->at(0).from());
1821 CHECK_EQ(1, list->at(0).to());
1822 CHECK_EQ(10, list->at(1).from());
1823 CHECK_EQ(20, list->at(1).to());
1824 CHECK_EQ(25, list->at(2).from());
1825 CHECK_EQ(25, list->at(2).to());
1826 CHECK_EQ(30, list->at(3).from());
1827 CHECK_EQ(40, list->at(3).to());
1828 CHECK_EQ(100, list->at(4).from());
1829 CHECK_EQ(100, list->at(4).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001830
1831 list->Rewind(0);
Ben Murdoch097c5b22016-05-18 11:27:45 +01001832 list->Add(CharacterRange::Range(10, 19), &zone);
1833 list->Add(CharacterRange::Range(21, 30), &zone);
1834 list->Add(CharacterRange::Range(20, 20), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001835 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001836 CHECK_EQ(1, list->length());
1837 CHECK_EQ(10, list->at(0).from());
1838 CHECK_EQ(30, list->at(0).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001839}
1840
Leon Clarked91b9f72010-01-27 17:25:45 +00001841
1842TEST(CharacterRangeMerge) {
Ben Murdochda12d292016-06-02 14:46:10 +01001843 Zone zone(CcTest::i_isolate()->allocator());
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001844 ZoneList<CharacterRange> l1(4, &zone);
1845 ZoneList<CharacterRange> l2(4, &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001846 // Create all combinations of intersections of ranges, both singletons and
1847 // longer.
1848
1849 int offset = 0;
1850
1851 // The five kinds of singleton intersections:
1852 // X
1853 // Y - outside before
1854 // Y - outside touching start
1855 // Y - overlap
1856 // Y - outside touching end
1857 // Y - outside after
1858
1859 for (int i = 0; i < 5; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001860 l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1861 l2.Add(CharacterRange::Singleton(offset + i), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001862 offset += 6;
1863 }
1864
1865 // The seven kinds of singleton/non-singleton intersections:
1866 // XXX
1867 // Y - outside before
1868 // Y - outside touching start
1869 // Y - inside touching start
1870 // Y - entirely inside
1871 // Y - inside touching end
1872 // Y - outside touching end
1873 // Y - disjoint after
1874
1875 for (int i = 0; i < 7; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001876 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1877 l2.Add(CharacterRange::Singleton(offset + i), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001878 offset += 8;
1879 }
1880
1881 // The eleven kinds of non-singleton intersections:
1882 //
1883 // XXXXXXXX
1884 // YYYY - outside before.
1885 // YYYY - outside touching start.
1886 // YYYY - overlapping start
1887 // YYYY - inside touching start
1888 // YYYY - entirely inside
1889 // YYYY - inside touching end
1890 // YYYY - overlapping end
1891 // YYYY - outside touching end
1892 // YYYY - outside after
1893 // YYYYYYYY - identical
1894 // YYYYYYYYYYYY - containing entirely.
1895
1896 for (int i = 0; i < 9; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001897 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1898 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001899 offset += 22;
1900 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001901 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1902 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001903 offset += 22;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001904 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1905 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001906 offset += 22;
1907
1908 // Different kinds of multi-range overlap:
1909 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1910 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1911
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001912 l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1913 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001914 for (int i = 0; i < 6; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001915 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1916 l2.Add(CharacterRange::Singleton(offset + 8), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001917 offset += 9;
1918 }
1919
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001920 CHECK(CharacterRange::IsCanonical(&l1));
1921 CHECK(CharacterRange::IsCanonical(&l2));
Leon Clarked91b9f72010-01-27 17:25:45 +00001922
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001923 ZoneList<CharacterRange> first_only(4, &zone);
1924 ZoneList<CharacterRange> second_only(4, &zone);
1925 ZoneList<CharacterRange> both(4, &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001926}
Leon Clarkee46be812010-01-19 14:06:41 +00001927
1928
Steve Blocka7e24c12009-10-30 11:49:00 +00001929TEST(Graph) {
Leon Clarkee46be812010-01-19 14:06:41 +00001930 Execute("\\b\\w+\\b", false, true, true);
Steve Blocka7e24c12009-10-30 11:49:00 +00001931}
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001932
1933
1934namespace {
1935
1936int* global_use_counts = NULL;
1937
1938void MockUseCounterCallback(v8::Isolate* isolate,
1939 v8::Isolate::UseCounterFeature feature) {
1940 ++global_use_counts[feature];
1941}
1942}
1943
1944
1945// Test that ES2015 RegExp compatibility fixes are in place, that they
1946// are not overly broad, and the appropriate UseCounters are incremented
1947TEST(UseCountRegExp) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001948 v8::Isolate* isolate = CcTest::isolate();
1949 v8::HandleScope scope(isolate);
1950 LocalContext env;
1951 int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
1952 global_use_counts = use_counts;
1953 CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
1954
1955 // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
1956 v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
1957 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1958 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1959 CHECK(resultSticky->IsUndefined());
1960
1961 // re.sticky has approriate value and doesn't touch UseCounter
1962 v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
1963 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1964 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1965 CHECK(resultReSticky->IsFalse());
1966
1967 // When the getter is caleld on another object, throw an exception
1968 // and don't increment the UseCounter
1969 v8::Local<v8::Value> resultStickyError = CompileRun(
1970 "var exception;"
1971 "try { "
1972 " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
1973 " .get.call(null);"
1974 "} catch (e) {"
1975 " exception = e;"
1976 "}"
1977 "exception");
1978 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1979 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1980 CHECK(resultStickyError->IsObject());
1981
1982 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
1983 // a UseCounter is incremented to track it.
1984 v8::Local<v8::Value> resultToString =
1985 CompileRun("RegExp.prototype.toString().length");
Ben Murdochda12d292016-06-02 14:46:10 +01001986 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001987 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1988 CHECK(resultToString->IsInt32());
1989 CHECK_EQ(6,
1990 resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1991
1992 // .toString() works on normal RegExps
1993 v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
Ben Murdochda12d292016-06-02 14:46:10 +01001994 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001995 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1996 CHECK(resultReToString->IsInt32());
1997 CHECK_EQ(
1998 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1999
2000 // .toString() throws on non-RegExps that aren't RegExp.prototype
2001 v8::Local<v8::Value> resultToStringError = CompileRun(
2002 "var exception;"
2003 "try { RegExp.prototype.toString.call(null) }"
2004 "catch (e) { exception = e; }"
2005 "exception");
Ben Murdochda12d292016-06-02 14:46:10 +01002006 CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00002007 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
2008 CHECK(resultToStringError->IsObject());
2009}
Ben Murdoch097c5b22016-05-18 11:27:45 +01002010
2011class UncachedExternalString
2012 : public v8::String::ExternalOneByteStringResource {
2013 public:
2014 const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
2015 size_t length() const override { return 26; }
2016 bool IsCompressible() const override { return true; }
2017};
2018
2019TEST(UncachedExternalString) {
2020 v8::Isolate* isolate = CcTest::isolate();
2021 v8::HandleScope scope(isolate);
2022 LocalContext env;
2023 v8::Local<v8::String> external =
2024 v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
2025 .ToLocalChecked();
2026 CHECK(v8::Utils::OpenHandle(*external)->map() ==
2027 CcTest::i_isolate()->heap()->short_external_one_byte_string_map());
2028 v8::Local<v8::Object> global = env->Global();
2029 global->Set(env.local(), v8_str("external"), external).FromJust();
2030 CompileRun("var re = /y(.)/; re.test('ab');");
2031 ExpectString("external.substring(1).match(re)[1]", "z");
2032}