blob: a91058cc248501101ef921b34f900c10bc0ca993 [file] [log] [blame]
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001// Copyright 2012 the V8 project authors. All rights reserved.
Steve Blocka7e24c12009-10-30 11:49:00 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
Emily Bernierd0a1eb72015-03-24 16:35:39 -040028#include <cstdlib>
29#include <sstream>
Steve Blocka7e24c12009-10-30 11:49:00 +000030
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000031#include "include/v8.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000032#include "src/v8.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000033
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000034#include "src/ast/ast.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000035#include "src/char-predicates-inl.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000036#include "src/ostreams.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000037#include "src/regexp/jsregexp.h"
38#include "src/regexp/regexp-macro-assembler.h"
39#include "src/regexp/regexp-macro-assembler-irregexp.h"
40#include "src/regexp/regexp-parser.h"
41#include "src/splay-tree-inl.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000042#include "src/string-stream.h"
Steve Block6ded16b2010-05-10 14:33:55 +010043#ifdef V8_INTERPRETED_REGEXP
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000044#include "src/regexp/interpreter-irregexp.h"
Steve Block6ded16b2010-05-10 14:33:55 +010045#else // V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000046#include "src/macro-assembler.h"
47#if V8_TARGET_ARCH_ARM
48#include "src/arm/assembler-arm.h" // NOLINT
49#include "src/arm/macro-assembler-arm.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000050#include "src/regexp/arm/regexp-macro-assembler-arm.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000051#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000052#if V8_TARGET_ARCH_ARM64
53#include "src/arm64/assembler-arm64.h"
54#include "src/arm64/macro-assembler-arm64.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000055#include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
56#endif
57#if V8_TARGET_ARCH_PPC
58#include "src/ppc/assembler-ppc.h"
59#include "src/ppc/macro-assembler-ppc.h"
60#include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
Steve Block44f0eee2011-05-26 01:26:41 +010061#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000062#if V8_TARGET_ARCH_MIPS
63#include "src/mips/assembler-mips.h"
64#include "src/mips/macro-assembler-mips.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000065#include "src/regexp/mips/regexp-macro-assembler-mips.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000066#endif
Ben Murdochb8a8cc12014-11-26 15:28:44 +000067#if V8_TARGET_ARCH_MIPS64
68#include "src/mips64/assembler-mips64.h"
69#include "src/mips64/macro-assembler-mips64.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000070#include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000071#endif
72#if V8_TARGET_ARCH_X64
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000073#include "src/regexp/x64/regexp-macro-assembler-x64.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000074#include "src/x64/assembler-x64.h"
75#include "src/x64/macro-assembler-x64.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000076#endif
77#if V8_TARGET_ARCH_IA32
78#include "src/ia32/assembler-ia32.h"
79#include "src/ia32/macro-assembler-ia32.h"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000080#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000081#endif
82#if V8_TARGET_ARCH_X87
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000083#include "src/regexp/x87/regexp-macro-assembler-x87.h"
Ben Murdochb8a8cc12014-11-26 15:28:44 +000084#include "src/x87/assembler-x87.h"
85#include "src/x87/macro-assembler-x87.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000086#endif
Steve Block6ded16b2010-05-10 14:33:55 +010087#endif // V8_INTERPRETED_REGEXP
Ben Murdochb8a8cc12014-11-26 15:28:44 +000088#include "test/cctest/cctest.h"
Steve Blocka7e24c12009-10-30 11:49:00 +000089
90using namespace v8::internal;
91
92
Leon Clarkee46be812010-01-19 14:06:41 +000093static bool CheckParse(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +000094 v8::HandleScope scope(CcTest::isolate());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000095 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +000096 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Leon Clarkee46be812010-01-19 14:06:41 +000097 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +000098 return v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +000099 CcTest::i_isolate(), &zone, &reader, false, false, &result);
Leon Clarkee46be812010-01-19 14:06:41 +0000100}
101
102
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000103static void CheckParseEq(const char* input, const char* expected,
104 bool unicode = false) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000105 v8::HandleScope scope(CcTest::isolate());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000106 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000107 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000108 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000109 CHECK(v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000110 CcTest::i_isolate(), &zone, &reader, false, unicode, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000111 CHECK(result.tree != NULL);
112 CHECK(result.error.is_null());
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400113 std::ostringstream os;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000114 result.tree->Print(os, &zone);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000115 if (strcmp(expected, os.str().c_str()) != 0) {
116 printf("%s | %s\n", expected, os.str().c_str());
117 }
118 CHECK_EQ(0, strcmp(expected, os.str().c_str()));
Steve Blocka7e24c12009-10-30 11:49:00 +0000119}
120
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000121
Steve Blocka7e24c12009-10-30 11:49:00 +0000122static bool CheckSimple(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000123 v8::HandleScope scope(CcTest::isolate());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000124 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000125 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000126 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000127 CHECK(v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000128 CcTest::i_isolate(), &zone, &reader, false, false, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000129 CHECK(result.tree != NULL);
130 CHECK(result.error.is_null());
131 return result.simple;
132}
133
134struct MinMaxPair {
135 int min_match;
136 int max_match;
137};
138
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000139
Steve Blocka7e24c12009-10-30 11:49:00 +0000140static MinMaxPair CheckMinMaxMatch(const char* input) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000141 v8::HandleScope scope(CcTest::isolate());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000142 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000143 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000144 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000145 CHECK(v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000146 CcTest::i_isolate(), &zone, &reader, false, false, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000147 CHECK(result.tree != NULL);
148 CHECK(result.error.is_null());
149 int min_match = result.tree->min_match();
150 int max_match = result.tree->max_match();
151 MinMaxPair pair = { min_match, max_match };
152 return pair;
153}
154
155
Leon Clarkee46be812010-01-19 14:06:41 +0000156#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
Steve Blocka7e24c12009-10-30 11:49:00 +0000157#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
158#define CHECK_MIN_MAX(input, min, max) \
159 { MinMaxPair min_max = CheckMinMaxMatch(input); \
160 CHECK_EQ(min, min_max.min_match); \
161 CHECK_EQ(max, min_max.max_match); \
162 }
163
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000164
165void TestRegExpParser(bool lookbehind) {
166 FLAG_harmony_regexp_lookbehind = lookbehind;
167 FLAG_harmony_unicode_regexps = true;
168
Leon Clarkee46be812010-01-19 14:06:41 +0000169 CHECK_PARSE_ERROR("?");
170
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000171 CheckParseEq("abc", "'abc'");
172 CheckParseEq("", "%");
173 CheckParseEq("abc|def", "(| 'abc' 'def')");
174 CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
175 CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
176 CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
177 CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
178 CheckParseEq("a*", "(# 0 - g 'a')");
179 CheckParseEq("a*?", "(# 0 - n 'a')");
180 CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
181 CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
182 CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
183 CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
184 CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
185 CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
186 CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
187 CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
188 CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
189 CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
190 CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
191 CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
192 CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
193 CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
194 CheckParseEq("(?:foo)", "'foo'");
195 CheckParseEq("(?: foo )", "' foo '");
196 CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
197 CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
198 CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
199 CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000200 if (lookbehind) {
201 CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
202 CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
203 } else {
204 CHECK_PARSE_ERROR("foo(?<=bar)baz");
205 CHECK_PARSE_ERROR("foo(?<!bar)baz");
206 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000207 CheckParseEq("()", "(^ %)");
208 CheckParseEq("(?=)", "(-> + %)");
209 CheckParseEq("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
210 CheckParseEq("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
211 CheckParseEq("[x]", "[x]");
212 CheckParseEq("[xyz]", "[x y z]");
213 CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
214 CheckParseEq("[-123]", "[- 1 2 3]");
215 CheckParseEq("[^123]", "^[1 2 3]");
216 CheckParseEq("]", "']'");
217 CheckParseEq("}", "'}'");
218 CheckParseEq("[a-b-c]", "[a-b - c]");
219 CheckParseEq("[\\d]", "[0-9]");
220 CheckParseEq("[x\\dz]", "[x 0-9 z]");
221 CheckParseEq("[\\d-z]", "[0-9 - z]");
222 CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
223 CheckParseEq("[z-\\d]", "[z - 0-9]");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100224 // Control character outside character class.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000225 CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
226 CheckParseEq("\\c!", "'\\c!'");
227 CheckParseEq("\\c_", "'\\c_'");
228 CheckParseEq("\\c~", "'\\c~'");
229 CheckParseEq("\\c1", "'\\c1'");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100230 // Control character inside character class.
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000231 CheckParseEq("[\\c!]", "[\\ c !]");
232 CheckParseEq("[\\c_]", "[\\x1f]");
233 CheckParseEq("[\\c~]", "[\\ c ~]");
234 CheckParseEq("[\\ca]", "[\\x01]");
235 CheckParseEq("[\\cz]", "[\\x1a]");
236 CheckParseEq("[\\cA]", "[\\x01]");
237 CheckParseEq("[\\cZ]", "[\\x1a]");
238 CheckParseEq("[\\c1]", "[\\x11]");
Ben Murdoch086aeea2011-05-13 15:57:08 +0100239
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000240 CheckParseEq("[a\\]c]", "[a ] c]");
241 CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
242 CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
243 CheckParseEq("\\0", "'\\x00'");
244 CheckParseEq("\\8", "'8'");
245 CheckParseEq("\\9", "'9'");
246 CheckParseEq("\\11", "'\\x09'");
247 CheckParseEq("\\11a", "'\\x09a'");
248 CheckParseEq("\\011", "'\\x09'");
249 CheckParseEq("\\00011", "'\\x0011'");
250 CheckParseEq("\\118", "'\\x098'");
251 CheckParseEq("\\111", "'I'");
252 CheckParseEq("\\1111", "'I1'");
253 CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
254 CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
255 CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
256 CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
257 CheckParseEq("(x)(x)(x)\\1*",
258 "(: (^ 'x') (^ 'x') (^ 'x')"
259 " (# 0 - g (<- 1)))");
260 CheckParseEq("(x)(x)(x)\\2*",
261 "(: (^ 'x') (^ 'x') (^ 'x')"
262 " (# 0 - g (<- 2)))");
263 CheckParseEq("(x)(x)(x)\\3*",
264 "(: (^ 'x') (^ 'x') (^ 'x')"
265 " (# 0 - g (<- 3)))");
266 CheckParseEq("(x)(x)(x)\\4*",
267 "(: (^ 'x') (^ 'x') (^ 'x')"
268 " (# 0 - g '\\x04'))");
269 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
270 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
271 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
272 CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
273 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
274 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
275 CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
276 CheckParseEq("(a\\1)", "(^ 'a')");
277 CheckParseEq("(\\1a)", "(^ 'a')");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000278 CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000279 CheckParseEq("(?=a)?a", "'a'");
280 CheckParseEq("(?=a){0,10}a", "'a'");
281 CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
282 CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
283 CheckParseEq("(?!a)?a", "'a'");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000284 CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000285 CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000286 CheckParseEq("(?!\\1(a\\1)\\1)\\1",
287 "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
288 CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
289 "(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))");
290 if (lookbehind) {
291 CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
292 "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
293 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000294 CheckParseEq("[\\0]", "[\\x00]");
295 CheckParseEq("[\\11]", "[\\x09]");
296 CheckParseEq("[\\11a]", "[\\x09 a]");
297 CheckParseEq("[\\011]", "[\\x09]");
298 CheckParseEq("[\\00011]", "[\\x00 1 1]");
299 CheckParseEq("[\\118]", "[\\x09 8]");
300 CheckParseEq("[\\111]", "[I]");
301 CheckParseEq("[\\1111]", "[I 1]");
302 CheckParseEq("\\x34", "'\x34'");
303 CheckParseEq("\\x60", "'\x60'");
304 CheckParseEq("\\x3z", "'x3z'");
305 CheckParseEq("\\c", "'\\c'");
306 CheckParseEq("\\u0034", "'\x34'");
307 CheckParseEq("\\u003z", "'u003z'");
308 CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
Steve Blocka7e24c12009-10-30 11:49:00 +0000309
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000310 // Unicode regexps
311 CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
312 CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
313 true);
314 CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
315 true);
316 CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
317 CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
318
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000319 CHECK_SIMPLE("", false);
Steve Blocka7e24c12009-10-30 11:49:00 +0000320 CHECK_SIMPLE("a", true);
321 CHECK_SIMPLE("a|b", false);
322 CHECK_SIMPLE("a\\n", false);
323 CHECK_SIMPLE("^a", false);
324 CHECK_SIMPLE("a$", false);
325 CHECK_SIMPLE("a\\b!", false);
326 CHECK_SIMPLE("a\\Bb", false);
327 CHECK_SIMPLE("a*", false);
328 CHECK_SIMPLE("a*?", false);
329 CHECK_SIMPLE("a?", false);
330 CHECK_SIMPLE("a??", false);
331 CHECK_SIMPLE("a{0,1}?", false);
332 CHECK_SIMPLE("a{1,1}?", false);
333 CHECK_SIMPLE("a{1,2}?", false);
334 CHECK_SIMPLE("a+?", false);
335 CHECK_SIMPLE("(a)", false);
336 CHECK_SIMPLE("(a)\\1", false);
337 CHECK_SIMPLE("(\\1a)", false);
338 CHECK_SIMPLE("\\1(a)", false);
339 CHECK_SIMPLE("a\\s", false);
340 CHECK_SIMPLE("a\\S", false);
341 CHECK_SIMPLE("a\\d", false);
342 CHECK_SIMPLE("a\\D", false);
343 CHECK_SIMPLE("a\\w", false);
344 CHECK_SIMPLE("a\\W", false);
345 CHECK_SIMPLE("a.", false);
346 CHECK_SIMPLE("a\\q", false);
347 CHECK_SIMPLE("a[a]", false);
348 CHECK_SIMPLE("a[^a]", false);
349 CHECK_SIMPLE("a[a-z]", false);
350 CHECK_SIMPLE("a[\\q]", false);
351 CHECK_SIMPLE("a(?:b)", false);
352 CHECK_SIMPLE("a(?=b)", false);
353 CHECK_SIMPLE("a(?!b)", false);
354 CHECK_SIMPLE("\\x60", false);
355 CHECK_SIMPLE("\\u0060", false);
356 CHECK_SIMPLE("\\cA", false);
357 CHECK_SIMPLE("\\q", false);
358 CHECK_SIMPLE("\\1112", false);
359 CHECK_SIMPLE("\\0", false);
360 CHECK_SIMPLE("(a)\\1", false);
361 CHECK_SIMPLE("(?=a)?a", false);
362 CHECK_SIMPLE("(?!a)?a\\1", false);
363 CHECK_SIMPLE("(?:(?=a))a\\1", false);
364
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000365 CheckParseEq("a{}", "'a{}'");
366 CheckParseEq("a{,}", "'a{,}'");
367 CheckParseEq("a{", "'a{'");
368 CheckParseEq("a{z}", "'a{z}'");
369 CheckParseEq("a{1z}", "'a{1z}'");
370 CheckParseEq("a{12z}", "'a{12z}'");
371 CheckParseEq("a{12,", "'a{12,'");
372 CheckParseEq("a{12,3b", "'a{12,3b'");
373 CheckParseEq("{}", "'{}'");
374 CheckParseEq("{,}", "'{,}'");
375 CheckParseEq("{", "'{'");
376 CheckParseEq("{z}", "'{z}'");
377 CheckParseEq("{1z}", "'{1z}'");
378 CheckParseEq("{12z}", "'{12z}'");
379 CheckParseEq("{12,", "'{12,'");
380 CheckParseEq("{12,3b", "'{12,3b'");
Steve Blocka7e24c12009-10-30 11:49:00 +0000381
382 CHECK_MIN_MAX("a", 1, 1);
383 CHECK_MIN_MAX("abc", 3, 3);
384 CHECK_MIN_MAX("a[bc]d", 3, 3);
385 CHECK_MIN_MAX("a|bc", 1, 2);
386 CHECK_MIN_MAX("ab|c", 1, 2);
387 CHECK_MIN_MAX("a||bc", 0, 2);
388 CHECK_MIN_MAX("|", 0, 0);
389 CHECK_MIN_MAX("(?:ab)", 2, 2);
390 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
391 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
392 CHECK_MIN_MAX("(ab)", 2, 2);
393 CHECK_MIN_MAX("(ab|cde)", 2, 3);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000394 CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
395 CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
Steve Blocka7e24c12009-10-30 11:49:00 +0000396 CHECK_MIN_MAX("(?:ab)?", 0, 2);
397 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
398 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
399 CHECK_MIN_MAX("a?", 0, 1);
400 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
401 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
402 CHECK_MIN_MAX("a??", 0, 1);
403 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
404 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
405 CHECK_MIN_MAX("(?:a?)?", 0, 1);
406 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
407 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
408 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
409 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
410 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
411 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
412 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
413 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
414 CHECK_MIN_MAX("a{0}", 0, 0);
415 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
416 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
417 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
418 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
419 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
420 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
421 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
422 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
423 CHECK_MIN_MAX("a\\bc", 2, 2);
424 CHECK_MIN_MAX("a\\Bc", 2, 2);
425 CHECK_MIN_MAX("a\\sc", 3, 3);
426 CHECK_MIN_MAX("a\\Sc", 3, 3);
427 CHECK_MIN_MAX("a(?=b)c", 2, 2);
428 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
429 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
430}
431
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000432
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000433TEST(ParserWithLookbehind) {
434 TestRegExpParser(true); // Lookbehind enabled.
435}
436
437
438TEST(ParserWithoutLookbehind) {
439 TestRegExpParser(true); // Lookbehind enabled.
440}
441
442
Steve Blocka7e24c12009-10-30 11:49:00 +0000443TEST(ParserRegression) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000444 CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
445 CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
446 CheckParseEq("{", "'{'");
447 CheckParseEq("a|", "(| 'a' %)");
Steve Blocka7e24c12009-10-30 11:49:00 +0000448}
449
450static void ExpectError(const char* input,
451 const char* expected) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000452 v8::HandleScope scope(CcTest::isolate());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000453 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000454 FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000455 RegExpCompileData result;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000456 CHECK(!v8::internal::RegExpParser::ParseRegExp(
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000457 CcTest::i_isolate(), &zone, &reader, false, false, &result));
Steve Blocka7e24c12009-10-30 11:49:00 +0000458 CHECK(result.tree == NULL);
459 CHECK(!result.error.is_null());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000460 v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
461 CHECK_EQ(0, strcmp(expected, str.get()));
Steve Blocka7e24c12009-10-30 11:49:00 +0000462}
463
464
465TEST(Errors) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000466 const char* kEndBackslash = "\\ at end of pattern";
467 ExpectError("\\", kEndBackslash);
468 const char* kUnterminatedGroup = "Unterminated group";
469 ExpectError("(foo", kUnterminatedGroup);
470 const char* kInvalidGroup = "Invalid group";
471 ExpectError("(?", kInvalidGroup);
472 const char* kUnterminatedCharacterClass = "Unterminated character class";
473 ExpectError("[", kUnterminatedCharacterClass);
474 ExpectError("[a-", kUnterminatedCharacterClass);
475 const char* kNothingToRepeat = "Nothing to repeat";
476 ExpectError("*", kNothingToRepeat);
477 ExpectError("?", kNothingToRepeat);
478 ExpectError("+", kNothingToRepeat);
479 ExpectError("{1}", kNothingToRepeat);
480 ExpectError("{1,2}", kNothingToRepeat);
481 ExpectError("{1,}", kNothingToRepeat);
482
483 // Check that we don't allow more than kMaxCapture captures
484 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
485 const char* kTooManyCaptures = "Too many captures";
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400486 std::ostringstream os;
Steve Blocka7e24c12009-10-30 11:49:00 +0000487 for (int i = 0; i <= kMaxCaptures; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000488 os << "()";
Steve Blocka7e24c12009-10-30 11:49:00 +0000489 }
Emily Bernierd0a1eb72015-03-24 16:35:39 -0400490 ExpectError(os.str().c_str(), kTooManyCaptures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000491}
492
493
494static bool IsDigit(uc16 c) {
495 return ('0' <= c && c <= '9');
496}
497
498
499static bool NotDigit(uc16 c) {
500 return !IsDigit(c);
501}
502
503
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000504static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
505 // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
506 // WhiteSpace (7.2) and LineTerminator (7.3) values.
507 return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000508}
509
510
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000511static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
512 return !IsWhiteSpaceOrLineTerminator(c);
Steve Blocka7e24c12009-10-30 11:49:00 +0000513}
514
515
516static bool NotWord(uc16 c) {
517 return !IsRegExpWord(c);
518}
519
520
521static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000522 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000523 ZoneList<CharacterRange>* ranges =
524 new(&zone) ZoneList<CharacterRange>(2, &zone);
525 CharacterRange::AddClassEscape(c, ranges, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000526 for (unsigned i = 0; i < (1 << 16); i++) {
527 bool in_class = false;
528 for (int j = 0; !in_class && j < ranges->length(); j++) {
529 CharacterRange& range = ranges->at(j);
530 in_class = (range.from() <= i && i <= range.to());
531 }
532 CHECK_EQ(pred(i), in_class);
533 }
534}
535
536
537TEST(CharacterClassEscapes) {
538 TestCharacterClassEscapes('.', IsRegExpNewline);
539 TestCharacterClassEscapes('d', IsDigit);
540 TestCharacterClassEscapes('D', NotDigit);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000541 TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
542 TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
Steve Blocka7e24c12009-10-30 11:49:00 +0000543 TestCharacterClassEscapes('w', IsRegExpWord);
544 TestCharacterClassEscapes('W', NotWord);
545}
546
547
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000548static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
549 bool is_one_byte, Zone* zone) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000550 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +0100551 FlatStringReader reader(isolate, CStrVector(input));
Steve Blocka7e24c12009-10-30 11:49:00 +0000552 RegExpCompileData compile_data;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000553 if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
554 &reader, multiline, unicode,
555 &compile_data))
Steve Blocka7e24c12009-10-30 11:49:00 +0000556 return NULL;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000557 Handle<String> pattern = isolate->factory()
558 ->NewStringFromUtf8(CStrVector(input))
559 .ToHandleChecked();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000560 Handle<String> sample_subject =
561 isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000562 RegExpEngine::Compile(isolate, zone, &compile_data, false, false, multiline,
563 false, pattern, sample_subject, is_one_byte);
Steve Blocka7e24c12009-10-30 11:49:00 +0000564 return compile_data.node;
565}
566
567
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000568static void Execute(const char* input, bool multiline, bool unicode,
569 bool is_one_byte, bool dot_output = false) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000570 v8::HandleScope scope(CcTest::isolate());
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000571 Zone zone;
572 RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000573 USE(node);
574#ifdef DEBUG
575 if (dot_output) {
576 RegExpEngine::DotPrint(input, node, false);
Steve Blocka7e24c12009-10-30 11:49:00 +0000577 }
578#endif // DEBUG
579}
580
581
582class TestConfig {
583 public:
584 typedef int Key;
585 typedef int Value;
586 static const int kNoKey;
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100587 static int NoValue() { return 0; }
Steve Blocka7e24c12009-10-30 11:49:00 +0000588 static inline int Compare(int a, int b) {
589 if (a < b)
590 return -1;
591 else if (a > b)
592 return 1;
593 else
594 return 0;
595 }
596};
597
598
599const int TestConfig::kNoKey = 0;
Steve Blocka7e24c12009-10-30 11:49:00 +0000600
601
602static unsigned PseudoRandom(int i, int j) {
603 return ~(~((i * 781) ^ (j * 329)));
604}
605
606
607TEST(SplayTreeSimple) {
608 static const unsigned kLimit = 1000;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000609 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000610 ZoneSplayTree<TestConfig> tree(&zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000611 bool seen[kLimit];
612 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
613#define CHECK_MAPS_EQUAL() do { \
614 for (unsigned k = 0; k < kLimit; k++) \
615 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
616 } while (false)
617 for (int i = 0; i < 50; i++) {
618 for (int j = 0; j < 50; j++) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000619 int next = PseudoRandom(i, j) % kLimit;
Steve Blocka7e24c12009-10-30 11:49:00 +0000620 if (seen[next]) {
621 // We've already seen this one. Check the value and remove
622 // it.
623 ZoneSplayTree<TestConfig>::Locator loc;
624 CHECK(tree.Find(next, &loc));
625 CHECK_EQ(next, loc.key());
626 CHECK_EQ(3 * next, loc.value());
627 tree.Remove(next);
628 seen[next] = false;
629 CHECK_MAPS_EQUAL();
630 } else {
631 // Check that it wasn't there already and then add it.
632 ZoneSplayTree<TestConfig>::Locator loc;
633 CHECK(!tree.Find(next, &loc));
634 CHECK(tree.Insert(next, &loc));
635 CHECK_EQ(next, loc.key());
636 loc.set_value(3 * next);
637 seen[next] = true;
638 CHECK_MAPS_EQUAL();
639 }
640 int val = PseudoRandom(j, i) % kLimit;
641 if (seen[val]) {
642 ZoneSplayTree<TestConfig>::Locator loc;
643 CHECK(tree.FindGreatestLessThan(val, &loc));
644 CHECK_EQ(loc.key(), val);
645 break;
646 }
647 val = PseudoRandom(i + j, i - j) % kLimit;
648 if (seen[val]) {
649 ZoneSplayTree<TestConfig>::Locator loc;
650 CHECK(tree.FindLeastGreaterThan(val, &loc));
651 CHECK_EQ(loc.key(), val);
652 break;
653 }
654 }
655 }
656}
657
658
659TEST(DispatchTableConstruction) {
660 // Initialize test data.
661 static const int kLimit = 1000;
662 static const int kRangeCount = 8;
663 static const int kRangeSize = 16;
664 uc16 ranges[kRangeCount][2 * kRangeSize];
665 for (int i = 0; i < kRangeCount; i++) {
666 Vector<uc16> range(ranges[i], 2 * kRangeSize);
667 for (int j = 0; j < 2 * kRangeSize; j++) {
668 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
669 }
670 range.Sort();
671 for (int j = 1; j < 2 * kRangeSize; j++) {
672 CHECK(range[j-1] <= range[j]);
673 }
674 }
675 // Enter test data into dispatch table.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000676 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000677 DispatchTable table(&zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000678 for (int i = 0; i < kRangeCount; i++) {
679 uc16* range = ranges[i];
680 for (int j = 0; j < 2 * kRangeSize; j += 2)
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000681 table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +0000682 }
683 // Check that the table looks as we would expect
684 for (int p = 0; p < kLimit; p++) {
685 OutSet* outs = table.Get(p);
686 for (int j = 0; j < kRangeCount; j++) {
687 uc16* range = ranges[j];
688 bool is_on = false;
689 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
690 is_on = (range[k] <= p && p <= range[k + 1]);
691 CHECK_EQ(is_on, outs->Get(j));
692 }
693 }
694}
695
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000696
Leon Clarkee46be812010-01-19 14:06:41 +0000697// Test of debug-only syntax.
698#ifdef DEBUG
699
700TEST(ParsePossessiveRepetition) {
701 bool old_flag_value = FLAG_regexp_possessive_quantifier;
702
703 // Enable possessive quantifier syntax.
704 FLAG_regexp_possessive_quantifier = true;
705
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000706 CheckParseEq("a*+", "(# 0 - p 'a')");
707 CheckParseEq("a++", "(# 1 - p 'a')");
708 CheckParseEq("a?+", "(# 0 1 p 'a')");
709 CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
710 CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
Leon Clarkee46be812010-01-19 14:06:41 +0000711
712 // Disable possessive quantifier syntax.
713 FLAG_regexp_possessive_quantifier = false;
714
715 CHECK_PARSE_ERROR("a*+");
716 CHECK_PARSE_ERROR("a++");
717 CHECK_PARSE_ERROR("a?+");
718 CHECK_PARSE_ERROR("a{10,20}+");
719 CHECK_PARSE_ERROR("a{10,20}+b");
720
721 FLAG_regexp_possessive_quantifier = old_flag_value;
722}
723
724#endif
Steve Blocka7e24c12009-10-30 11:49:00 +0000725
726// Tests of interpreter.
727
728
Steve Block6ded16b2010-05-10 14:33:55 +0100729#ifndef V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +0000730
731#if V8_TARGET_ARCH_IA32
732typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
733#elif V8_TARGET_ARCH_X64
734typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
735#elif V8_TARGET_ARCH_ARM
736typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000737#elif V8_TARGET_ARCH_ARM64
738typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000739#elif V8_TARGET_ARCH_PPC
740typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
Andrei Popescu31002712010-02-23 13:46:05 +0000741#elif V8_TARGET_ARCH_MIPS
Steve Block44f0eee2011-05-26 01:26:41 +0100742typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000743#elif V8_TARGET_ARCH_MIPS64
744typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
745#elif V8_TARGET_ARCH_X87
746typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
Steve Blocka7e24c12009-10-30 11:49:00 +0000747#endif
748
749class ContextInitializer {
750 public:
751 ContextInitializer()
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000752 : scope_(CcTest::isolate()),
753 env_(v8::Context::New(CcTest::isolate())) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000754 env_->Enter();
755 }
756 ~ContextInitializer() {
757 env_->Exit();
Steve Blocka7e24c12009-10-30 11:49:00 +0000758 }
759 private:
Steve Blocka7e24c12009-10-30 11:49:00 +0000760 v8::HandleScope scope_;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000761 v8::Local<v8::Context> env_;
Steve Blocka7e24c12009-10-30 11:49:00 +0000762};
763
764
765static ArchRegExpMacroAssembler::Result Execute(Code* code,
766 String* input,
767 int start_offset,
768 const byte* input_start,
769 const byte* input_end,
Leon Clarked91b9f72010-01-27 17:25:45 +0000770 int* captures) {
Steve Blocka7e24c12009-10-30 11:49:00 +0000771 return NativeRegExpMacroAssembler::Execute(
772 code,
773 input,
774 start_offset,
775 input_start,
776 input_end,
Steve Block44f0eee2011-05-26 01:26:41 +0100777 captures,
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000778 0,
779 CcTest::i_isolate());
Steve Blocka7e24c12009-10-30 11:49:00 +0000780}
781
782
783TEST(MacroAssemblerNativeSuccess) {
784 v8::V8::Initialize();
785 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000786 Isolate* isolate = CcTest::i_isolate();
787 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000788 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +0000789
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000790 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
791 4);
Steve Blocka7e24c12009-10-30 11:49:00 +0000792
793 m.Succeed();
794
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000795 Handle<String> source = factory->NewStringFromStaticChars("");
Steve Blocka7e24c12009-10-30 11:49:00 +0000796 Handle<Object> code_object = m.GetCode(source);
797 Handle<Code> code = Handle<Code>::cast(code_object);
798
799 int captures[4] = {42, 37, 87, 117};
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000800 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
801 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000802 const byte* start_adr =
803 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
804
805 NativeRegExpMacroAssembler::Result result =
806 Execute(*code,
807 *input,
808 0,
809 start_adr,
810 start_adr + seq_input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000811 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000812
813 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
814 CHECK_EQ(-1, captures[0]);
815 CHECK_EQ(-1, captures[1]);
816 CHECK_EQ(-1, captures[2]);
817 CHECK_EQ(-1, captures[3]);
818}
819
820
821TEST(MacroAssemblerNativeSimple) {
822 v8::V8::Initialize();
823 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000824 Isolate* isolate = CcTest::i_isolate();
825 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000826 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +0000827
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000828 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
829 4);
Steve Blocka7e24c12009-10-30 11:49:00 +0000830
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000831 Label fail, backtrack;
832 m.PushBacktrack(&fail);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000833 m.CheckNotAtStart(0, NULL);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000834 m.LoadCurrentCharacter(2, NULL);
835 m.CheckNotCharacter('o', NULL);
836 m.LoadCurrentCharacter(1, NULL, false);
837 m.CheckNotCharacter('o', NULL);
838 m.LoadCurrentCharacter(0, NULL, false);
839 m.CheckNotCharacter('f', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000840 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000841 m.WriteCurrentPositionToRegister(1, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +0000842 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000843 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +0000844 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000845 m.Bind(&backtrack);
846 m.Backtrack();
Steve Blocka7e24c12009-10-30 11:49:00 +0000847 m.Bind(&fail);
848 m.Fail();
849
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000850 Handle<String> source = factory->NewStringFromStaticChars("^foo");
Steve Blocka7e24c12009-10-30 11:49:00 +0000851 Handle<Object> code_object = m.GetCode(source);
852 Handle<Code> code = Handle<Code>::cast(code_object);
853
854 int captures[4] = {42, 37, 87, 117};
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000855 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
856 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000857 Address start_adr = seq_input->GetCharsAddress();
858
859 NativeRegExpMacroAssembler::Result result =
860 Execute(*code,
861 *input,
862 0,
863 start_adr,
864 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000865 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000866
867 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
868 CHECK_EQ(0, captures[0]);
869 CHECK_EQ(3, captures[1]);
870 CHECK_EQ(-1, captures[2]);
871 CHECK_EQ(-1, captures[3]);
872
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000873 input = factory->NewStringFromStaticChars("barbarbar");
874 seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000875 start_adr = seq_input->GetCharsAddress();
876
877 result = Execute(*code,
878 *input,
879 0,
880 start_adr,
881 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000882 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000883
884 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
885}
886
887
888TEST(MacroAssemblerNativeSimpleUC16) {
889 v8::V8::Initialize();
890 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000891 Isolate* isolate = CcTest::i_isolate();
892 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000893 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +0000894
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000895 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
896 4);
Steve Blocka7e24c12009-10-30 11:49:00 +0000897
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000898 Label fail, backtrack;
899 m.PushBacktrack(&fail);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000900 m.CheckNotAtStart(0, NULL);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000901 m.LoadCurrentCharacter(2, NULL);
902 m.CheckNotCharacter('o', NULL);
903 m.LoadCurrentCharacter(1, NULL, false);
904 m.CheckNotCharacter('o', NULL);
905 m.LoadCurrentCharacter(0, NULL, false);
906 m.CheckNotCharacter('f', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000907 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000908 m.WriteCurrentPositionToRegister(1, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +0000909 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000910 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +0000911 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000912 m.Bind(&backtrack);
913 m.Backtrack();
Steve Blocka7e24c12009-10-30 11:49:00 +0000914 m.Bind(&fail);
915 m.Fail();
916
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000917 Handle<String> source = factory->NewStringFromStaticChars("^foo");
Steve Blocka7e24c12009-10-30 11:49:00 +0000918 Handle<Object> code_object = m.GetCode(source);
919 Handle<Code> code = Handle<Code>::cast(code_object);
920
921 int captures[4] = {42, 37, 87, 117};
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100922 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000923 static_cast<uc16>(0x2603)};
924 Handle<String> input = factory->NewStringFromTwoByte(
925 Vector<const uc16>(input_data, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +0000926 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
927 Address start_adr = seq_input->GetCharsAddress();
928
929 NativeRegExpMacroAssembler::Result result =
930 Execute(*code,
931 *input,
932 0,
933 start_adr,
934 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000935 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000936
937 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
938 CHECK_EQ(0, captures[0]);
939 CHECK_EQ(3, captures[1]);
940 CHECK_EQ(-1, captures[2]);
941 CHECK_EQ(-1, captures[3]);
942
Ben Murdoch3ef787d2012-04-12 10:51:47 +0100943 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000944 static_cast<uc16>(0x2603)};
945 input = factory->NewStringFromTwoByte(
946 Vector<const uc16>(input_data2, 9)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +0000947 seq_input = Handle<SeqTwoByteString>::cast(input);
948 start_adr = seq_input->GetCharsAddress();
949
950 result = Execute(*code,
951 *input,
952 0,
953 start_adr,
954 start_adr + input->length() * 2,
Leon Clarked91b9f72010-01-27 17:25:45 +0000955 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +0000956
957 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
958}
959
960
961TEST(MacroAssemblerNativeBacktrack) {
962 v8::V8::Initialize();
963 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000964 Isolate* isolate = CcTest::i_isolate();
965 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000966 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +0000967
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000968 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
969 0);
Steve Blocka7e24c12009-10-30 11:49:00 +0000970
971 Label fail;
972 Label backtrack;
973 m.LoadCurrentCharacter(10, &fail);
974 m.Succeed();
975 m.Bind(&fail);
976 m.PushBacktrack(&backtrack);
977 m.LoadCurrentCharacter(10, NULL);
978 m.Succeed();
979 m.Bind(&backtrack);
980 m.Fail();
981
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000982 Handle<String> source = factory->NewStringFromStaticChars("..........");
Steve Blocka7e24c12009-10-30 11:49:00 +0000983 Handle<Object> code_object = m.GetCode(source);
984 Handle<Code> code = Handle<Code>::cast(code_object);
985
Ben Murdochb8a8cc12014-11-26 15:28:44 +0000986 Handle<String> input = factory->NewStringFromStaticChars("foofoo");
987 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +0000988 Address start_adr = seq_input->GetCharsAddress();
989
990 NativeRegExpMacroAssembler::Result result =
991 Execute(*code,
992 *input,
993 0,
994 start_adr,
995 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +0000996 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +0000997
998 CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
999}
1000
1001
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001002TEST(MacroAssemblerNativeBackReferenceLATIN1) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001003 v8::V8::Initialize();
1004 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001005 Isolate* isolate = CcTest::i_isolate();
1006 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001007 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001008
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001009 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1010 4);
Steve Blocka7e24c12009-10-30 11:49:00 +00001011
1012 m.WriteCurrentPositionToRegister(0, 0);
1013 m.AdvanceCurrentPosition(2);
1014 m.WriteCurrentPositionToRegister(1, 0);
1015 Label nomatch;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001016 m.CheckNotBackReference(0, false, &nomatch);
Steve Blocka7e24c12009-10-30 11:49:00 +00001017 m.Fail();
1018 m.Bind(&nomatch);
1019 m.AdvanceCurrentPosition(2);
1020 Label missing_match;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001021 m.CheckNotBackReference(0, false, &missing_match);
Steve Blocka7e24c12009-10-30 11:49:00 +00001022 m.WriteCurrentPositionToRegister(2, 0);
1023 m.Succeed();
1024 m.Bind(&missing_match);
1025 m.Fail();
1026
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001027 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
Steve Blocka7e24c12009-10-30 11:49:00 +00001028 Handle<Object> code_object = m.GetCode(source);
1029 Handle<Code> code = Handle<Code>::cast(code_object);
1030
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001031 Handle<String> input = factory->NewStringFromStaticChars("fooofo");
1032 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001033 Address start_adr = seq_input->GetCharsAddress();
1034
1035 int output[4];
1036 NativeRegExpMacroAssembler::Result result =
1037 Execute(*code,
1038 *input,
1039 0,
1040 start_adr,
1041 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001042 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001043
1044 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1045 CHECK_EQ(0, output[0]);
1046 CHECK_EQ(2, output[1]);
1047 CHECK_EQ(6, output[2]);
1048 CHECK_EQ(-1, output[3]);
1049}
1050
1051
1052TEST(MacroAssemblerNativeBackReferenceUC16) {
1053 v8::V8::Initialize();
1054 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001055 Isolate* isolate = CcTest::i_isolate();
1056 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001057 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001058
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001059 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
1060 4);
Steve Blocka7e24c12009-10-30 11:49:00 +00001061
1062 m.WriteCurrentPositionToRegister(0, 0);
1063 m.AdvanceCurrentPosition(2);
1064 m.WriteCurrentPositionToRegister(1, 0);
1065 Label nomatch;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001066 m.CheckNotBackReference(0, false, &nomatch);
Steve Blocka7e24c12009-10-30 11:49:00 +00001067 m.Fail();
1068 m.Bind(&nomatch);
1069 m.AdvanceCurrentPosition(2);
1070 Label missing_match;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001071 m.CheckNotBackReference(0, false, &missing_match);
Steve Blocka7e24c12009-10-30 11:49:00 +00001072 m.WriteCurrentPositionToRegister(2, 0);
1073 m.Succeed();
1074 m.Bind(&missing_match);
1075 m.Fail();
1076
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001077 Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
Steve Blocka7e24c12009-10-30 11:49:00 +00001078 Handle<Object> code_object = m.GetCode(source);
1079 Handle<Code> code = Handle<Code>::cast(code_object);
1080
1081 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001082 Handle<String> input = factory->NewStringFromTwoByte(
1083 Vector<const uc16>(input_data, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001084 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1085 Address start_adr = seq_input->GetCharsAddress();
1086
1087 int output[4];
1088 NativeRegExpMacroAssembler::Result result =
1089 Execute(*code,
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001090 *input,
1091 0,
1092 start_adr,
1093 start_adr + input->length() * 2,
1094 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001095
1096 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1097 CHECK_EQ(0, output[0]);
1098 CHECK_EQ(2, output[1]);
1099 CHECK_EQ(6, output[2]);
1100 CHECK_EQ(-1, output[3]);
1101}
1102
1103
1104
1105TEST(MacroAssemblernativeAtStart) {
1106 v8::V8::Initialize();
1107 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001108 Isolate* isolate = CcTest::i_isolate();
1109 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001110 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001111
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001112 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1113 0);
Steve Blocka7e24c12009-10-30 11:49:00 +00001114
1115 Label not_at_start, newline, fail;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001116 m.CheckNotAtStart(0, &not_at_start);
Steve Blocka7e24c12009-10-30 11:49:00 +00001117 // Check that prevchar = '\n' and current = 'f'.
1118 m.CheckCharacter('\n', &newline);
1119 m.Bind(&fail);
1120 m.Fail();
1121 m.Bind(&newline);
1122 m.LoadCurrentCharacter(0, &fail);
1123 m.CheckNotCharacter('f', &fail);
1124 m.Succeed();
1125
1126 m.Bind(&not_at_start);
1127 // Check that prevchar = 'o' and current = 'b'.
1128 Label prevo;
1129 m.CheckCharacter('o', &prevo);
1130 m.Fail();
1131 m.Bind(&prevo);
1132 m.LoadCurrentCharacter(0, &fail);
1133 m.CheckNotCharacter('b', &fail);
1134 m.Succeed();
1135
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001136 Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
Steve Blocka7e24c12009-10-30 11:49:00 +00001137 Handle<Object> code_object = m.GetCode(source);
1138 Handle<Code> code = Handle<Code>::cast(code_object);
1139
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001140 Handle<String> input = factory->NewStringFromStaticChars("foobar");
1141 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001142 Address start_adr = seq_input->GetCharsAddress();
1143
1144 NativeRegExpMacroAssembler::Result result =
1145 Execute(*code,
1146 *input,
1147 0,
1148 start_adr,
1149 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001150 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001151
1152 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1153
1154 result = Execute(*code,
1155 *input,
1156 3,
1157 start_adr + 3,
1158 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001159 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001160
1161 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1162}
1163
1164
1165TEST(MacroAssemblerNativeBackRefNoCase) {
1166 v8::V8::Initialize();
1167 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001168 Isolate* isolate = CcTest::i_isolate();
1169 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001170 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001171
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001172 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1173 4);
Steve Blocka7e24c12009-10-30 11:49:00 +00001174
1175 Label fail, succ;
1176
1177 m.WriteCurrentPositionToRegister(0, 0);
1178 m.WriteCurrentPositionToRegister(2, 0);
1179 m.AdvanceCurrentPosition(3);
1180 m.WriteCurrentPositionToRegister(3, 0);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001181 m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "AbC".
1182 m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "ABC".
Steve Blocka7e24c12009-10-30 11:49:00 +00001183 Label expected_fail;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001184 m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
Steve Blocka7e24c12009-10-30 11:49:00 +00001185 m.Bind(&fail);
1186 m.Fail();
1187
1188 m.Bind(&expected_fail);
1189 m.AdvanceCurrentPosition(3); // Skip "xYz"
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001190 m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
Steve Blocka7e24c12009-10-30 11:49:00 +00001191 m.Fail();
1192
1193 m.Bind(&succ);
1194 m.WriteCurrentPositionToRegister(1, 0);
1195 m.Succeed();
1196
1197 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001198 factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
Steve Blocka7e24c12009-10-30 11:49:00 +00001199 Handle<Object> code_object = m.GetCode(source);
1200 Handle<Code> code = Handle<Code>::cast(code_object);
1201
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001202 Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1203 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001204 Address start_adr = seq_input->GetCharsAddress();
1205
1206 int output[4];
1207 NativeRegExpMacroAssembler::Result result =
1208 Execute(*code,
1209 *input,
1210 0,
1211 start_adr,
1212 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001213 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001214
1215 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1216 CHECK_EQ(0, output[0]);
1217 CHECK_EQ(12, output[1]);
1218 CHECK_EQ(0, output[2]);
1219 CHECK_EQ(3, output[3]);
1220}
1221
1222
1223
1224TEST(MacroAssemblerNativeRegisters) {
1225 v8::V8::Initialize();
1226 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001227 Isolate* isolate = CcTest::i_isolate();
1228 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001229 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001230
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001231 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1232 6);
Steve Blocka7e24c12009-10-30 11:49:00 +00001233
1234 uc16 foo_chars[3] = {'f', 'o', 'o'};
1235 Vector<const uc16> foo(foo_chars, 3);
1236
1237 enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1238 Label fail;
1239 Label backtrack;
1240 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1241 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1242 m.PushBacktrack(&backtrack);
1243 m.WriteStackPointerToRegister(sp);
1244 // Fill stack and registers
1245 m.AdvanceCurrentPosition(2);
1246 m.WriteCurrentPositionToRegister(out1, 0);
1247 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1248 m.PushBacktrack(&fail);
1249 // Drop backtrack stack frames.
1250 m.ReadStackPointerFromRegister(sp);
1251 // And take the first backtrack (to &backtrack)
1252 m.Backtrack();
1253
1254 m.PushCurrentPosition();
1255 m.AdvanceCurrentPosition(2);
1256 m.PopCurrentPosition();
1257
1258 m.Bind(&backtrack);
1259 m.PopRegister(out1);
1260 m.ReadCurrentPositionFromRegister(out1);
1261 m.AdvanceCurrentPosition(3);
1262 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1263
1264 Label loop;
1265 m.SetRegister(loop_cnt, 0); // loop counter
1266 m.Bind(&loop);
1267 m.AdvanceRegister(loop_cnt, 1);
1268 m.AdvanceCurrentPosition(1);
1269 m.IfRegisterLT(loop_cnt, 3, &loop);
1270 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1271
1272 Label loop2;
1273 m.SetRegister(loop_cnt, 2); // loop counter
1274 m.Bind(&loop2);
1275 m.AdvanceRegister(loop_cnt, -1);
1276 m.AdvanceCurrentPosition(1);
1277 m.IfRegisterGE(loop_cnt, 0, &loop2);
1278 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1279
1280 Label loop3;
1281 Label exit_loop3;
1282 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1283 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1284 m.ReadCurrentPositionFromRegister(out3);
1285 m.Bind(&loop3);
1286 m.AdvanceCurrentPosition(1);
1287 m.CheckGreedyLoop(&exit_loop3);
1288 m.GoTo(&loop3);
1289 m.Bind(&exit_loop3);
1290 m.PopCurrentPosition();
1291 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1292
1293 m.Succeed();
1294
1295 m.Bind(&fail);
1296 m.Fail();
1297
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001298 Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001299 Handle<Object> code_object = m.GetCode(source);
1300 Handle<Code> code = Handle<Code>::cast(code_object);
1301
1302 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001303 Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1304 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001305 Address start_adr = seq_input->GetCharsAddress();
1306
1307 int output[6];
1308 NativeRegExpMacroAssembler::Result result =
1309 Execute(*code,
1310 *input,
1311 0,
1312 start_adr,
1313 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001314 output);
Steve Blocka7e24c12009-10-30 11:49:00 +00001315
1316 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1317 CHECK_EQ(0, output[0]);
1318 CHECK_EQ(3, output[1]);
1319 CHECK_EQ(6, output[2]);
1320 CHECK_EQ(9, output[3]);
1321 CHECK_EQ(9, output[4]);
1322 CHECK_EQ(-1, output[5]);
1323}
1324
1325
1326TEST(MacroAssemblerStackOverflow) {
1327 v8::V8::Initialize();
1328 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001329 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001330 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001331 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001332
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001333 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1334 0);
Steve Blocka7e24c12009-10-30 11:49:00 +00001335
1336 Label loop;
1337 m.Bind(&loop);
1338 m.PushBacktrack(&loop);
1339 m.GoTo(&loop);
1340
1341 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001342 factory->NewStringFromStaticChars("<stack overflow test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001343 Handle<Object> code_object = m.GetCode(source);
1344 Handle<Code> code = Handle<Code>::cast(code_object);
1345
1346 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001347 Handle<String> input = factory->NewStringFromStaticChars("dummy");
1348 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001349 Address start_adr = seq_input->GetCharsAddress();
1350
1351 NativeRegExpMacroAssembler::Result result =
1352 Execute(*code,
1353 *input,
1354 0,
1355 start_adr,
1356 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001357 NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001358
1359 CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
Ben Murdoch8b112d22011-06-08 16:22:53 +01001360 CHECK(isolate->has_pending_exception());
1361 isolate->clear_pending_exception();
Steve Blocka7e24c12009-10-30 11:49:00 +00001362}
1363
1364
1365TEST(MacroAssemblerNativeLotsOfRegisters) {
1366 v8::V8::Initialize();
1367 ContextInitializer initializer;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001368 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001369 Factory* factory = isolate->factory();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001370 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001371
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001372 ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1373 2);
Steve Blocka7e24c12009-10-30 11:49:00 +00001374
1375 // At least 2048, to ensure the allocated space for registers
1376 // span one full page.
1377 const int large_number = 8000;
1378 m.WriteCurrentPositionToRegister(large_number, 42);
1379 m.WriteCurrentPositionToRegister(0, 0);
1380 m.WriteCurrentPositionToRegister(1, 1);
1381 Label done;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001382 m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
Steve Blocka7e24c12009-10-30 11:49:00 +00001383 m.Bind(&done);
1384 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1385 m.PopRegister(1);
1386 m.Succeed();
1387
1388 Handle<String> source =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001389 factory->NewStringFromStaticChars("<huge register space test>");
Steve Blocka7e24c12009-10-30 11:49:00 +00001390 Handle<Object> code_object = m.GetCode(source);
1391 Handle<Code> code = Handle<Code>::cast(code_object);
1392
1393 // String long enough for test (content doesn't matter).
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001394 Handle<String> input = factory->NewStringFromStaticChars("sample text");
1395 Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
Steve Blocka7e24c12009-10-30 11:49:00 +00001396 Address start_adr = seq_input->GetCharsAddress();
1397
1398 int captures[2];
1399 NativeRegExpMacroAssembler::Result result =
1400 Execute(*code,
1401 *input,
1402 0,
1403 start_adr,
1404 start_adr + input->length(),
Leon Clarked91b9f72010-01-27 17:25:45 +00001405 captures);
Steve Blocka7e24c12009-10-30 11:49:00 +00001406
1407 CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1408 CHECK_EQ(0, captures[0]);
1409 CHECK_EQ(42, captures[1]);
1410
Ben Murdoch8b112d22011-06-08 16:22:53 +01001411 isolate->clear_pending_exception();
Steve Blocka7e24c12009-10-30 11:49:00 +00001412}
1413
Steve Block6ded16b2010-05-10 14:33:55 +01001414#else // V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +00001415
1416TEST(MacroAssembler) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001417 byte codes[1024];
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001418 Zone zone;
1419 RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
1420 &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001421 // ^f(o)o.
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001422 Label start, fail, backtrack;
1423
Steve Blocka7e24c12009-10-30 11:49:00 +00001424 m.SetRegister(4, 42);
1425 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1426 m.AdvanceRegister(4, 42);
1427 m.GoTo(&start);
1428 m.Fail();
1429 m.Bind(&start);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001430 m.PushBacktrack(&fail);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001431 m.CheckNotAtStart(0, NULL);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001432 m.LoadCurrentCharacter(0, NULL);
1433 m.CheckNotCharacter('f', NULL);
1434 m.LoadCurrentCharacter(1, NULL);
1435 m.CheckNotCharacter('o', NULL);
1436 m.LoadCurrentCharacter(2, NULL);
1437 m.CheckNotCharacter('o', NULL);
Steve Blocka7e24c12009-10-30 11:49:00 +00001438 m.WriteCurrentPositionToRegister(0, 0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001439 m.WriteCurrentPositionToRegister(1, 3);
1440 m.WriteCurrentPositionToRegister(2, 1);
1441 m.WriteCurrentPositionToRegister(3, 2);
Steve Blocka7e24c12009-10-30 11:49:00 +00001442 m.AdvanceCurrentPosition(3);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001443 m.PushBacktrack(&backtrack);
Steve Blocka7e24c12009-10-30 11:49:00 +00001444 m.Succeed();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001445 m.Bind(&backtrack);
1446 m.ClearRegisters(2, 3);
Steve Blocka7e24c12009-10-30 11:49:00 +00001447 m.Backtrack();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001448 m.Bind(&fail);
Steve Blocka7e24c12009-10-30 11:49:00 +00001449 m.PopRegister(0);
1450 m.Fail();
1451
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001452 Isolate* isolate = CcTest::i_isolate();
Ben Murdoch8b112d22011-06-08 16:22:53 +01001453 Factory* factory = isolate->factory();
1454 HandleScope scope(isolate);
Steve Blocka7e24c12009-10-30 11:49:00 +00001455
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001456 Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
Steve Blocka7e24c12009-10-30 11:49:00 +00001457 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1458 int captures[5];
1459
1460 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001461 Handle<String> f1_16 = factory->NewStringFromTwoByte(
1462 Vector<const uc16>(str1, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001463
Ben Murdoch8b112d22011-06-08 16:22:53 +01001464 CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
Steve Blocka7e24c12009-10-30 11:49:00 +00001465 CHECK_EQ(0, captures[0]);
1466 CHECK_EQ(3, captures[1]);
1467 CHECK_EQ(1, captures[2]);
1468 CHECK_EQ(2, captures[3]);
1469 CHECK_EQ(84, captures[4]);
1470
1471 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001472 Handle<String> f2_16 = factory->NewStringFromTwoByte(
1473 Vector<const uc16>(str2, 6)).ToHandleChecked();
Steve Blocka7e24c12009-10-30 11:49:00 +00001474
Ben Murdoch8b112d22011-06-08 16:22:53 +01001475 CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
Steve Blocka7e24c12009-10-30 11:49:00 +00001476 CHECK_EQ(42, captures[0]);
1477}
1478
Steve Block6ded16b2010-05-10 14:33:55 +01001479#endif // V8_INTERPRETED_REGEXP
Steve Blocka7e24c12009-10-30 11:49:00 +00001480
1481
1482TEST(AddInverseToTable) {
1483 static const int kLimit = 1000;
1484 static const int kRangeCount = 16;
1485 for (int t = 0; t < 10; t++) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001486 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001487 ZoneList<CharacterRange>* ranges =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001488 new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001489 for (int i = 0; i < kRangeCount; i++) {
1490 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1491 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1492 if (to > kLimit) to = kLimit;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001493 ranges->Add(CharacterRange(from, to), &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001494 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001495 DispatchTable table(&zone);
1496 DispatchTableConstructor cons(&table, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001497 cons.set_choice_index(0);
1498 cons.AddInverse(ranges);
1499 for (int i = 0; i < kLimit; i++) {
1500 bool is_on = false;
1501 for (int j = 0; !is_on && j < kRangeCount; j++)
1502 is_on = ranges->at(j).Contains(i);
1503 OutSet* set = table.Get(i);
1504 CHECK_EQ(is_on, set->Get(0) == false);
1505 }
1506 }
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001507 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001508 ZoneList<CharacterRange>* ranges =
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001509 new(&zone) ZoneList<CharacterRange>(1, &zone);
1510 ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
1511 DispatchTable table(&zone);
1512 DispatchTableConstructor cons(&table, false, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001513 cons.set_choice_index(0);
1514 cons.AddInverse(ranges);
1515 CHECK(!table.Get(0xFFFE)->Get(0));
1516 CHECK(table.Get(0xFFFF)->Get(0));
1517}
1518
1519
1520static uc32 canonicalize(uc32 c) {
1521 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1522 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1523 if (count == 0) {
1524 return c;
1525 } else {
1526 CHECK_EQ(1, count);
1527 return canon[0];
1528 }
1529}
1530
1531
1532TEST(LatinCanonicalize) {
1533 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001534 for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
1535 unibrow::uchar upper = lower + ('A' - 'a');
Steve Blocka7e24c12009-10-30 11:49:00 +00001536 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1537 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1538 int length = un_canonicalize.get(lower, '\0', uncanon);
1539 CHECK_EQ(2, length);
1540 CHECK_EQ(upper, uncanon[0]);
1541 CHECK_EQ(lower, uncanon[1]);
1542 }
1543 for (uc32 c = 128; c < (1 << 21); c++)
1544 CHECK_GE(canonicalize(c), 128);
1545 unibrow::Mapping<unibrow::ToUppercase> to_upper;
Ben Murdochbb769b22010-08-11 14:56:33 +01001546 // Canonicalization is only defined for the Basic Multilingual Plane.
1547 for (uc32 c = 0; c < (1 << 16); c++) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001548 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1549 int length = to_upper.get(c, '\0', upper);
1550 if (length == 0) {
1551 length = 1;
1552 upper[0] = c;
1553 }
1554 uc32 u = upper[0];
1555 if (length > 1 || (c >= 128 && u < 128))
1556 u = c;
1557 CHECK_EQ(u, canonicalize(c));
1558 }
1559}
1560
1561
Ben Murdochbb769b22010-08-11 14:56:33 +01001562static uc32 CanonRangeEnd(uc32 c) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001563 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1564 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1565 if (count == 0) {
1566 return c;
1567 } else {
1568 CHECK_EQ(1, count);
1569 return canon[0];
1570 }
1571}
1572
1573
1574TEST(RangeCanonicalization) {
Steve Blocka7e24c12009-10-30 11:49:00 +00001575 // Check that we arrive at the same result when using the basic
1576 // range canonicalization primitives as when using immediate
1577 // canonicalization.
1578 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
Ben Murdochbb769b22010-08-11 14:56:33 +01001579 int block_start = 0;
1580 while (block_start <= 0xFFFF) {
1581 uc32 block_end = CanonRangeEnd(block_start);
1582 unsigned block_length = block_end - block_start + 1;
1583 if (block_length > 1) {
1584 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1585 int first_length = un_canonicalize.get(block_start, '\0', first);
1586 for (unsigned i = 1; i < block_length; i++) {
1587 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1588 int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1589 CHECK_EQ(first_length, succ_length);
1590 for (int j = 0; j < succ_length; j++) {
1591 int calc = first[j] + i;
1592 int found = succ[j];
1593 CHECK_EQ(calc, found);
1594 }
Steve Blocka7e24c12009-10-30 11:49:00 +00001595 }
1596 }
Ben Murdochbb769b22010-08-11 14:56:33 +01001597 block_start = block_start + block_length;
Steve Blocka7e24c12009-10-30 11:49:00 +00001598 }
1599}
1600
1601
1602TEST(UncanonicalizeEquivalence) {
1603 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1604 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1605 for (int i = 0; i < (1 << 16); i++) {
1606 int length = un_canonicalize.get(i, '\0', chars);
1607 for (int j = 0; j < length; j++) {
1608 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1609 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1610 CHECK_EQ(length, length2);
1611 for (int k = 0; k < length; k++)
1612 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1613 }
1614 }
1615}
1616
1617
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001618static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
Steve Blocka7e24c12009-10-30 11:49:00 +00001619 Vector<CharacterRange> expected) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001620 Zone zone;
Steve Blocka7e24c12009-10-30 11:49:00 +00001621 int count = expected.length();
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001622 ZoneList<CharacterRange>* list =
1623 new(&zone) ZoneList<CharacterRange>(count, &zone);
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001624 input.AddCaseEquivalents(isolate, &zone, list, false);
Steve Blocka7e24c12009-10-30 11:49:00 +00001625 CHECK_EQ(count, list->length());
1626 for (int i = 0; i < list->length(); i++) {
1627 CHECK_EQ(expected[i].from(), list->at(i).from());
1628 CHECK_EQ(expected[i].to(), list->at(i).to());
1629 }
1630}
1631
1632
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001633static void TestSimpleRangeCaseIndependence(Isolate* isolate,
1634 CharacterRange input,
Steve Blocka7e24c12009-10-30 11:49:00 +00001635 CharacterRange expected) {
1636 EmbeddedVector<CharacterRange, 1> vector;
1637 vector[0] = expected;
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001638 TestRangeCaseIndependence(isolate, input, vector);
Steve Blocka7e24c12009-10-30 11:49:00 +00001639}
1640
1641
1642TEST(CharacterRangeCaseIndependence) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001643 Isolate* isolate = CcTest::i_isolate();
1644 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001645 CharacterRange::Singleton('A'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001646 TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001647 CharacterRange::Singleton('Z'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001648 TestSimpleRangeCaseIndependence(isolate, CharacterRange('a', 'z'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001649 CharacterRange('A', 'Z'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001650 TestSimpleRangeCaseIndependence(isolate, CharacterRange('c', 'f'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001651 CharacterRange('C', 'F'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001652 TestSimpleRangeCaseIndependence(isolate, CharacterRange('a', 'b'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001653 CharacterRange('A', 'B'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001654 TestSimpleRangeCaseIndependence(isolate, CharacterRange('y', 'z'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001655 CharacterRange('Y', 'Z'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001656 TestSimpleRangeCaseIndependence(isolate, CharacterRange('a' - 1, 'z' + 1),
Steve Blocka7e24c12009-10-30 11:49:00 +00001657 CharacterRange('A', 'Z'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001658 TestSimpleRangeCaseIndependence(isolate, CharacterRange('A', 'Z'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001659 CharacterRange('a', 'z'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001660 TestSimpleRangeCaseIndependence(isolate, CharacterRange('C', 'F'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001661 CharacterRange('c', 'f'));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001662 TestSimpleRangeCaseIndependence(isolate, CharacterRange('A' - 1, 'Z' + 1),
Steve Blocka7e24c12009-10-30 11:49:00 +00001663 CharacterRange('a', 'z'));
1664 // Here we need to add [l-z] to complete the case independence of
1665 // [A-Za-z] but we expect [a-z] to be added since we always add a
1666 // whole block at a time.
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001667 TestSimpleRangeCaseIndependence(isolate, CharacterRange('A', 'k'),
Steve Blocka7e24c12009-10-30 11:49:00 +00001668 CharacterRange('a', 'z'));
1669}
1670
1671
1672static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1673 if (ranges == NULL)
1674 return false;
1675 for (int i = 0; i < ranges->length(); i++) {
1676 CharacterRange range = ranges->at(i);
1677 if (range.from() <= c && c <= range.to())
1678 return true;
1679 }
1680 return false;
1681}
1682
1683
1684TEST(CharClassDifference) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001685 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001686 ZoneList<CharacterRange>* base =
1687 new(&zone) ZoneList<CharacterRange>(1, &zone);
1688 base->Add(CharacterRange::Everything(), &zone);
1689 Vector<const int> overlay = CharacterRange::GetWordBounds();
Steve Blocka7e24c12009-10-30 11:49:00 +00001690 ZoneList<CharacterRange>* included = NULL;
1691 ZoneList<CharacterRange>* excluded = NULL;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001692 CharacterRange::Split(base, overlay, &included, &excluded, &zone);
Steve Blocka7e24c12009-10-30 11:49:00 +00001693 for (int i = 0; i < (1 << 16); i++) {
1694 bool in_base = InClass(i, base);
1695 if (in_base) {
1696 bool in_overlay = false;
1697 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001698 if (overlay[j] <= i && i < overlay[j+1])
Steve Blocka7e24c12009-10-30 11:49:00 +00001699 in_overlay = true;
1700 }
1701 CHECK_EQ(in_overlay, InClass(i, included));
1702 CHECK_EQ(!in_overlay, InClass(i, excluded));
1703 } else {
1704 CHECK(!InClass(i, included));
1705 CHECK(!InClass(i, excluded));
1706 }
1707 }
1708}
1709
1710
Leon Clarkee46be812010-01-19 14:06:41 +00001711TEST(CanonicalizeCharacterSets) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001712 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001713 ZoneList<CharacterRange>* list =
1714 new(&zone) ZoneList<CharacterRange>(4, &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001715 CharacterSet set(list);
1716
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001717 list->Add(CharacterRange(10, 20), &zone);
1718 list->Add(CharacterRange(30, 40), &zone);
1719 list->Add(CharacterRange(50, 60), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001720 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001721 CHECK_EQ(3, list->length());
1722 CHECK_EQ(10, list->at(0).from());
1723 CHECK_EQ(20, list->at(0).to());
1724 CHECK_EQ(30, list->at(1).from());
1725 CHECK_EQ(40, list->at(1).to());
1726 CHECK_EQ(50, list->at(2).from());
1727 CHECK_EQ(60, list->at(2).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001728
1729 list->Rewind(0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001730 list->Add(CharacterRange(10, 20), &zone);
1731 list->Add(CharacterRange(50, 60), &zone);
1732 list->Add(CharacterRange(30, 40), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001733 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001734 CHECK_EQ(3, list->length());
1735 CHECK_EQ(10, list->at(0).from());
1736 CHECK_EQ(20, list->at(0).to());
1737 CHECK_EQ(30, list->at(1).from());
1738 CHECK_EQ(40, list->at(1).to());
1739 CHECK_EQ(50, list->at(2).from());
1740 CHECK_EQ(60, list->at(2).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001741
1742 list->Rewind(0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001743 list->Add(CharacterRange(30, 40), &zone);
1744 list->Add(CharacterRange(10, 20), &zone);
1745 list->Add(CharacterRange(25, 25), &zone);
1746 list->Add(CharacterRange(100, 100), &zone);
1747 list->Add(CharacterRange(1, 1), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001748 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001749 CHECK_EQ(5, list->length());
1750 CHECK_EQ(1, list->at(0).from());
1751 CHECK_EQ(1, list->at(0).to());
1752 CHECK_EQ(10, list->at(1).from());
1753 CHECK_EQ(20, list->at(1).to());
1754 CHECK_EQ(25, list->at(2).from());
1755 CHECK_EQ(25, list->at(2).to());
1756 CHECK_EQ(30, list->at(3).from());
1757 CHECK_EQ(40, list->at(3).to());
1758 CHECK_EQ(100, list->at(4).from());
1759 CHECK_EQ(100, list->at(4).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001760
1761 list->Rewind(0);
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001762 list->Add(CharacterRange(10, 19), &zone);
1763 list->Add(CharacterRange(21, 30), &zone);
1764 list->Add(CharacterRange(20, 20), &zone);
Leon Clarkee46be812010-01-19 14:06:41 +00001765 set.Canonicalize();
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001766 CHECK_EQ(1, list->length());
1767 CHECK_EQ(10, list->at(0).from());
1768 CHECK_EQ(30, list->at(0).to());
Leon Clarkee46be812010-01-19 14:06:41 +00001769}
1770
Leon Clarked91b9f72010-01-27 17:25:45 +00001771
1772TEST(CharacterRangeMerge) {
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001773 Zone zone;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001774 ZoneList<CharacterRange> l1(4, &zone);
1775 ZoneList<CharacterRange> l2(4, &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001776 // Create all combinations of intersections of ranges, both singletons and
1777 // longer.
1778
1779 int offset = 0;
1780
1781 // The five kinds of singleton intersections:
1782 // X
1783 // Y - outside before
1784 // Y - outside touching start
1785 // Y - overlap
1786 // Y - outside touching end
1787 // Y - outside after
1788
1789 for (int i = 0; i < 5; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001790 l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1791 l2.Add(CharacterRange::Singleton(offset + i), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001792 offset += 6;
1793 }
1794
1795 // The seven kinds of singleton/non-singleton intersections:
1796 // XXX
1797 // Y - outside before
1798 // Y - outside touching start
1799 // Y - inside touching start
1800 // Y - entirely inside
1801 // Y - inside touching end
1802 // Y - outside touching end
1803 // Y - disjoint after
1804
1805 for (int i = 0; i < 7; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001806 l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1807 l2.Add(CharacterRange::Singleton(offset + i), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001808 offset += 8;
1809 }
1810
1811 // The eleven kinds of non-singleton intersections:
1812 //
1813 // XXXXXXXX
1814 // YYYY - outside before.
1815 // YYYY - outside touching start.
1816 // YYYY - overlapping start
1817 // YYYY - inside touching start
1818 // YYYY - entirely inside
1819 // YYYY - inside touching end
1820 // YYYY - overlapping end
1821 // YYYY - outside touching end
1822 // YYYY - outside after
1823 // YYYYYYYY - identical
1824 // YYYYYYYYYYYY - containing entirely.
1825
1826 for (int i = 0; i < 9; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001827 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1828 l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001829 offset += 22;
1830 }
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001831 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1832 l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001833 offset += 22;
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001834 l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1835 l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001836 offset += 22;
1837
1838 // Different kinds of multi-range overlap:
1839 // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1840 // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1841
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001842 l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1843 l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001844 for (int i = 0; i < 6; i++) {
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001845 l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1846 l2.Add(CharacterRange::Singleton(offset + 8), &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001847 offset += 9;
1848 }
1849
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001850 CHECK(CharacterRange::IsCanonical(&l1));
1851 CHECK(CharacterRange::IsCanonical(&l2));
Leon Clarked91b9f72010-01-27 17:25:45 +00001852
Ben Murdochb8a8cc12014-11-26 15:28:44 +00001853 ZoneList<CharacterRange> first_only(4, &zone);
1854 ZoneList<CharacterRange> second_only(4, &zone);
1855 ZoneList<CharacterRange> both(4, &zone);
Leon Clarked91b9f72010-01-27 17:25:45 +00001856}
Leon Clarkee46be812010-01-19 14:06:41 +00001857
1858
Steve Blocka7e24c12009-10-30 11:49:00 +00001859TEST(Graph) {
Leon Clarkee46be812010-01-19 14:06:41 +00001860 Execute("\\b\\w+\\b", false, true, true);
Steve Blocka7e24c12009-10-30 11:49:00 +00001861}
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001862
1863
1864namespace {
1865
1866int* global_use_counts = NULL;
1867
1868void MockUseCounterCallback(v8::Isolate* isolate,
1869 v8::Isolate::UseCounterFeature feature) {
1870 ++global_use_counts[feature];
1871}
1872}
1873
1874
1875// Test that ES2015 RegExp compatibility fixes are in place, that they
1876// are not overly broad, and the appropriate UseCounters are incremented
1877TEST(UseCountRegExp) {
1878 i::FLAG_harmony_regexps = true;
1879 v8::Isolate* isolate = CcTest::isolate();
1880 v8::HandleScope scope(isolate);
1881 LocalContext env;
1882 int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
1883 global_use_counts = use_counts;
1884 CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
1885
1886 // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
1887 v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
1888 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1889 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1890 CHECK(resultSticky->IsUndefined());
1891
1892 // re.sticky has approriate value and doesn't touch UseCounter
1893 v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
1894 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1895 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1896 CHECK(resultReSticky->IsFalse());
1897
1898 // When the getter is caleld on another object, throw an exception
1899 // and don't increment the UseCounter
1900 v8::Local<v8::Value> resultStickyError = CompileRun(
1901 "var exception;"
1902 "try { "
1903 " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
1904 " .get.call(null);"
1905 "} catch (e) {"
1906 " exception = e;"
1907 "}"
1908 "exception");
1909 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1910 CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1911 CHECK(resultStickyError->IsObject());
1912
1913 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
1914 // a UseCounter is incremented to track it.
1915 v8::Local<v8::Value> resultToString =
1916 CompileRun("RegExp.prototype.toString().length");
1917 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1918 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1919 CHECK(resultToString->IsInt32());
1920 CHECK_EQ(6,
1921 resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1922
1923 // .toString() works on normal RegExps
1924 v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
1925 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1926 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1927 CHECK(resultReToString->IsInt32());
1928 CHECK_EQ(
1929 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1930
1931 // .toString() throws on non-RegExps that aren't RegExp.prototype
1932 v8::Local<v8::Value> resultToStringError = CompileRun(
1933 "var exception;"
1934 "try { RegExp.prototype.toString.call(null) }"
1935 "catch (e) { exception = e; }"
1936 "exception");
1937 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1938 CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1939 CHECK(resultToStringError->IsObject());
1940}