blob: ac75999d07f6290a3374ac11480f9547ec71b4f3 [file] [log] [blame]
ager@chromium.org5ec48922009-05-05 07:25:34 +00001/// Copyright 2008 the V8 project authors. All rights reserved.
ager@chromium.orga74f0da2008-12-03 16:05:52 +00002// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28
29#include <stdlib.h>
ager@chromium.orga74f0da2008-12-03 16:05:52 +000030
31#include "v8.h"
32
iposva@chromium.org245aa852009-02-10 00:49:54 +000033#include "string-stream.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000034#include "cctest.h"
35#include "zone-inl.h"
36#include "parser.h"
37#include "ast.h"
38#include "jsregexp-inl.h"
39#include "regexp-macro-assembler.h"
40#include "regexp-macro-assembler-irregexp.h"
ager@chromium.org5ec48922009-05-05 07:25:34 +000041#ifdef V8_ARCH_ARM
ager@chromium.org3a37e9b2009-04-27 09:26:21 +000042#include "arm/regexp-macro-assembler-arm.h"
ager@chromium.org5ec48922009-05-05 07:25:34 +000043#endif
44#ifdef V8_ARCH_X64
45// No X64-implementation yet.
46#endif
47#ifdef V8_ARCH_IA32
ager@chromium.org3a37e9b2009-04-27 09:26:21 +000048#include "ia32/macro-assembler-ia32.h"
49#include "ia32/regexp-macro-assembler-ia32.h"
ager@chromium.orga74f0da2008-12-03 16:05:52 +000050#endif
51#include "interpreter-irregexp.h"
52
53
54using namespace v8::internal;
55
56
57static SmartPointer<const char> Parse(const char* input) {
58 V8::Initialize(NULL);
59 v8::HandleScope scope;
60 ZoneScope zone_scope(DELETE_ON_EXIT);
61 FlatStringReader reader(CStrVector(input));
ager@chromium.org8bb60582008-12-11 12:02:20 +000062 RegExpCompileData result;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000063 CHECK(v8::internal::ParseRegExp(&reader, false, &result));
64 CHECK(result.tree != NULL);
65 CHECK(result.error.is_null());
66 SmartPointer<const char> output = result.tree->ToString();
67 return output;
68}
69
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000070static bool CheckSimple(const char* input) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +000071 V8::Initialize(NULL);
72 v8::HandleScope scope;
73 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
74 ZoneScope zone_scope(DELETE_ON_EXIT);
75 FlatStringReader reader(CStrVector(input));
ager@chromium.org8bb60582008-12-11 12:02:20 +000076 RegExpCompileData result;
ager@chromium.orga74f0da2008-12-03 16:05:52 +000077 CHECK(v8::internal::ParseRegExp(&reader, false, &result));
78 CHECK(result.tree != NULL);
79 CHECK(result.error.is_null());
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +000080 return result.simple;
81}
82
83struct MinMaxPair {
84 int min_match;
85 int max_match;
86};
87
88static MinMaxPair CheckMinMaxMatch(const char* input) {
89 V8::Initialize(NULL);
90 v8::HandleScope scope;
91 unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
92 ZoneScope zone_scope(DELETE_ON_EXIT);
93 FlatStringReader reader(CStrVector(input));
94 RegExpCompileData result;
95 CHECK(v8::internal::ParseRegExp(&reader, false, &result));
96 CHECK(result.tree != NULL);
97 CHECK(result.error.is_null());
98 int min_match = result.tree->min_match();
99 int max_match = result.tree->max_match();
100 MinMaxPair pair = { min_match, max_match };
101 return pair;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000102}
103
104
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000105
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000106#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000107#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
108#define CHECK_MIN_MAX(input, min, max) \
109 { MinMaxPair min_max = CheckMinMaxMatch(input); \
110 CHECK_EQ(min, min_max.min_match); \
111 CHECK_EQ(max, min_max.max_match); \
112 }
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000113
114TEST(Parser) {
115 V8::Initialize(NULL);
116 CHECK_PARSE_EQ("abc", "'abc'");
117 CHECK_PARSE_EQ("", "%");
118 CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
119 CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
120 CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
121 CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
122 CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
123 CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
124 CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
125 CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
126 CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
127 CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
128 CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
129 CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
130 CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
131 CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
132 CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
133 CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
134 CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
135 CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
136 CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
137 CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
138 CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
139 CHECK_PARSE_EQ("(?:foo)", "'foo'");
140 CHECK_PARSE_EQ("(?: foo )", "' foo '");
141 CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
142 CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
143 CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
144 CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
145 CHECK_PARSE_EQ("()", "(^ %)");
146 CHECK_PARSE_EQ("(?=)", "(-> + %)");
147 CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
148 CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]"); // \uffff isn't in codepage 1252
149 CHECK_PARSE_EQ("[x]", "[x]");
150 CHECK_PARSE_EQ("[xyz]", "[x y z]");
151 CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
152 CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
153 CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
154 CHECK_PARSE_EQ("]", "']'");
155 CHECK_PARSE_EQ("}", "'}'");
156 CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
157 CHECK_PARSE_EQ("[\\d]", "[0-9]");
158 CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
159 CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
160 CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
161 CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
162 CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
163 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
164 CHECK_PARSE_EQ("\\c!", "'c!'");
165 CHECK_PARSE_EQ("\\c_", "'c_'");
166 CHECK_PARSE_EQ("\\c~", "'c~'");
167 CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
168 CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
169 CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
170 CHECK_PARSE_EQ("\\0", "'\\x00'");
171 CHECK_PARSE_EQ("\\8", "'8'");
172 CHECK_PARSE_EQ("\\9", "'9'");
173 CHECK_PARSE_EQ("\\11", "'\\x09'");
174 CHECK_PARSE_EQ("\\11a", "'\\x09a'");
175 CHECK_PARSE_EQ("\\011", "'\\x09'");
176 CHECK_PARSE_EQ("\\00011", "'\\x0011'");
177 CHECK_PARSE_EQ("\\118", "'\\x098'");
178 CHECK_PARSE_EQ("\\111", "'I'");
179 CHECK_PARSE_EQ("\\1111", "'I1'");
180 CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
181 CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
182 CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
183 CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
184 CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
185 " (# 0 - g (<- 1)))");
186 CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
187 " (# 0 - g (<- 2)))");
188 CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
189 " (# 0 - g (<- 3)))");
190 CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
191 " (# 0 - g '\\x04'))");
192 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
193 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
194 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
195 CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
196 "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
197 " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
198 CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
199 CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
200 CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000201 CHECK_PARSE_EQ("(?=a)?a", "'a'");
202 CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
203 CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
204 CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
205 CHECK_PARSE_EQ("(?!a)?a", "'a'");
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000206 CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
207 CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
208 CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))");
209 CHECK_PARSE_EQ("[\\0]", "[\\x00]");
210 CHECK_PARSE_EQ("[\\11]", "[\\x09]");
211 CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
212 CHECK_PARSE_EQ("[\\011]", "[\\x09]");
213 CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
214 CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
215 CHECK_PARSE_EQ("[\\111]", "[I]");
216 CHECK_PARSE_EQ("[\\1111]", "[I 1]");
217 CHECK_PARSE_EQ("\\x34", "'\x34'");
218 CHECK_PARSE_EQ("\\x60", "'\x60'");
219 CHECK_PARSE_EQ("\\x3z", "'x3z'");
kasperl@chromium.org7be3c992009-03-12 07:19:55 +0000220 CHECK_PARSE_EQ("\\c", "'c'");
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000221 CHECK_PARSE_EQ("\\u0034", "'\x34'");
222 CHECK_PARSE_EQ("\\u003z", "'u003z'");
223 CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
224
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000225 CHECK_SIMPLE("a", true);
226 CHECK_SIMPLE("a|b", false);
227 CHECK_SIMPLE("a\\n", false);
228 CHECK_SIMPLE("^a", false);
229 CHECK_SIMPLE("a$", false);
230 CHECK_SIMPLE("a\\b!", false);
231 CHECK_SIMPLE("a\\Bb", false);
232 CHECK_SIMPLE("a*", false);
233 CHECK_SIMPLE("a*?", false);
234 CHECK_SIMPLE("a?", false);
235 CHECK_SIMPLE("a??", false);
236 CHECK_SIMPLE("a{0,1}?", false);
237 CHECK_SIMPLE("a{1,1}?", false);
238 CHECK_SIMPLE("a{1,2}?", false);
239 CHECK_SIMPLE("a+?", false);
240 CHECK_SIMPLE("(a)", false);
241 CHECK_SIMPLE("(a)\\1", false);
242 CHECK_SIMPLE("(\\1a)", false);
243 CHECK_SIMPLE("\\1(a)", false);
244 CHECK_SIMPLE("a\\s", false);
245 CHECK_SIMPLE("a\\S", false);
246 CHECK_SIMPLE("a\\d", false);
247 CHECK_SIMPLE("a\\D", false);
248 CHECK_SIMPLE("a\\w", false);
249 CHECK_SIMPLE("a\\W", false);
250 CHECK_SIMPLE("a.", false);
251 CHECK_SIMPLE("a\\q", false);
252 CHECK_SIMPLE("a[a]", false);
253 CHECK_SIMPLE("a[^a]", false);
254 CHECK_SIMPLE("a[a-z]", false);
255 CHECK_SIMPLE("a[\\q]", false);
256 CHECK_SIMPLE("a(?:b)", false);
257 CHECK_SIMPLE("a(?=b)", false);
258 CHECK_SIMPLE("a(?!b)", false);
259 CHECK_SIMPLE("\\x60", false);
260 CHECK_SIMPLE("\\u0060", false);
261 CHECK_SIMPLE("\\cA", false);
262 CHECK_SIMPLE("\\q", false);
263 CHECK_SIMPLE("\\1112", false);
264 CHECK_SIMPLE("\\0", false);
265 CHECK_SIMPLE("(a)\\1", false);
266 CHECK_SIMPLE("(?=a)?a", false);
267 CHECK_SIMPLE("(?!a)?a\\1", false);
268 CHECK_SIMPLE("(?:(?=a))a\\1", false);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000269
270 CHECK_PARSE_EQ("a{}", "'a{}'");
271 CHECK_PARSE_EQ("a{,}", "'a{,}'");
272 CHECK_PARSE_EQ("a{", "'a{'");
273 CHECK_PARSE_EQ("a{z}", "'a{z}'");
274 CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
275 CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
276 CHECK_PARSE_EQ("a{12,", "'a{12,'");
277 CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
278 CHECK_PARSE_EQ("{}", "'{}'");
279 CHECK_PARSE_EQ("{,}", "'{,}'");
280 CHECK_PARSE_EQ("{", "'{'");
281 CHECK_PARSE_EQ("{z}", "'{z}'");
282 CHECK_PARSE_EQ("{1z}", "'{1z}'");
283 CHECK_PARSE_EQ("{12z}", "'{12z}'");
284 CHECK_PARSE_EQ("{12,", "'{12,'");
285 CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000286
287 CHECK_MIN_MAX("a", 1, 1);
288 CHECK_MIN_MAX("abc", 3, 3);
289 CHECK_MIN_MAX("a[bc]d", 3, 3);
290 CHECK_MIN_MAX("a|bc", 1, 2);
291 CHECK_MIN_MAX("ab|c", 1, 2);
292 CHECK_MIN_MAX("a||bc", 0, 2);
293 CHECK_MIN_MAX("|", 0, 0);
294 CHECK_MIN_MAX("(?:ab)", 2, 2);
295 CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
296 CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
297 CHECK_MIN_MAX("(ab)", 2, 2);
298 CHECK_MIN_MAX("(ab|cde)", 2, 3);
ager@chromium.org32912102009-01-16 10:38:43 +0000299 CHECK_MIN_MAX("(ab)\\1", 2, 4);
300 CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000301 CHECK_MIN_MAX("(?:ab)?", 0, 2);
302 CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
303 CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
304 CHECK_MIN_MAX("a?", 0, 1);
305 CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
306 CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
307 CHECK_MIN_MAX("a??", 0, 1);
308 CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
309 CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
310 CHECK_MIN_MAX("(?:a?)?", 0, 1);
311 CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
312 CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
313 CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
314 CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
315 CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
316 CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
317 CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
318 CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
319 CHECK_MIN_MAX("a{0}", 0, 0);
320 CHECK_MIN_MAX("(?:a+){0}", 0, 0);
321 CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
322 CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
323 CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
324 CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
325 CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
326 CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
327 CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
328 CHECK_MIN_MAX("a\\bc", 2, 2);
329 CHECK_MIN_MAX("a\\Bc", 2, 2);
330 CHECK_MIN_MAX("a\\sc", 3, 3);
331 CHECK_MIN_MAX("a\\Sc", 3, 3);
332 CHECK_MIN_MAX("a(?=b)c", 2, 2);
333 CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
334 CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000335}
336
337TEST(ParserRegression) {
338 CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
339 CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
340 CHECK_PARSE_EQ("{", "'{'");
341 CHECK_PARSE_EQ("a|", "(| 'a' %)");
342}
343
344static void ExpectError(const char* input,
345 const char* expected) {
346 V8::Initialize(NULL);
347 v8::HandleScope scope;
348 ZoneScope zone_scope(DELETE_ON_EXIT);
349 FlatStringReader reader(CStrVector(input));
ager@chromium.org8bb60582008-12-11 12:02:20 +0000350 RegExpCompileData result;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000351 CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
352 CHECK(result.tree == NULL);
353 CHECK(!result.error.is_null());
354 SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
355 CHECK_EQ(expected, *str);
356}
357
358
359TEST(Errors) {
360 V8::Initialize(NULL);
361 const char* kEndBackslash = "\\ at end of pattern";
362 ExpectError("\\", kEndBackslash);
363 const char* kUnterminatedGroup = "Unterminated group";
364 ExpectError("(foo", kUnterminatedGroup);
365 const char* kInvalidGroup = "Invalid group";
366 ExpectError("(?", kInvalidGroup);
367 const char* kUnterminatedCharacterClass = "Unterminated character class";
368 ExpectError("[", kUnterminatedCharacterClass);
369 ExpectError("[a-", kUnterminatedCharacterClass);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000370 const char* kNothingToRepeat = "Nothing to repeat";
371 ExpectError("*", kNothingToRepeat);
372 ExpectError("?", kNothingToRepeat);
373 ExpectError("+", kNothingToRepeat);
374 ExpectError("{1}", kNothingToRepeat);
375 ExpectError("{1,2}", kNothingToRepeat);
376 ExpectError("{1,}", kNothingToRepeat);
iposva@chromium.org245aa852009-02-10 00:49:54 +0000377
378 // Check that we don't allow more than kMaxCapture captures
379 const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
380 const char* kTooManyCaptures = "Too many captures";
381 HeapStringAllocator allocator;
382 StringStream accumulator(&allocator);
383 for (int i = 0; i <= kMaxCaptures; i++) {
384 accumulator.Add("()");
385 }
386 SmartPointer<const char> many_captures(accumulator.ToCString());
387 ExpectError(*many_captures, kTooManyCaptures);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000388}
389
390
391static bool IsDigit(uc16 c) {
392 return ('0' <= c && c <= '9');
393}
394
395
396static bool NotDigit(uc16 c) {
397 return !IsDigit(c);
398}
399
400
401static bool IsWhiteSpace(uc16 c) {
402 switch (c) {
403 case 0x09:
404 case 0x0A:
405 case 0x0B:
406 case 0x0C:
407 case 0x0d:
408 case 0x20:
409 case 0xA0:
410 case 0x2028:
411 case 0x2029:
412 return true;
413 default:
414 return unibrow::Space::Is(c);
415 }
416}
417
418
419static bool NotWhiteSpace(uc16 c) {
420 return !IsWhiteSpace(c);
421}
422
423
424static bool NotWord(uc16 c) {
425 return !IsRegExpWord(c);
426}
427
428
429static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
430 ZoneScope scope(DELETE_ON_EXIT);
431 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
432 CharacterRange::AddClassEscape(c, ranges);
433 for (unsigned i = 0; i < (1 << 16); i++) {
434 bool in_class = false;
435 for (int j = 0; !in_class && j < ranges->length(); j++) {
436 CharacterRange& range = ranges->at(j);
437 in_class = (range.from() <= i && i <= range.to());
438 }
439 CHECK_EQ(pred(i), in_class);
440 }
441}
442
443
444TEST(CharacterClassEscapes) {
445 TestCharacterClassEscapes('.', IsRegExpNewline);
446 TestCharacterClassEscapes('d', IsDigit);
447 TestCharacterClassEscapes('D', NotDigit);
448 TestCharacterClassEscapes('s', IsWhiteSpace);
449 TestCharacterClassEscapes('S', NotWhiteSpace);
450 TestCharacterClassEscapes('w', IsRegExpWord);
451 TestCharacterClassEscapes('W', NotWord);
452}
453
454
ager@chromium.org8bb60582008-12-11 12:02:20 +0000455static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000456 V8::Initialize(NULL);
457 FlatStringReader reader(CStrVector(input));
ager@chromium.org8bb60582008-12-11 12:02:20 +0000458 RegExpCompileData compile_data;
459 if (!v8::internal::ParseRegExp(&reader, multiline, &compile_data))
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000460 return NULL;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000461 Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
462 RegExpEngine::Compile(&compile_data, false, multiline, pattern, is_ascii);
463 return compile_data.node;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000464}
465
466
467static void Execute(const char* input,
468 bool multiline,
ager@chromium.org8bb60582008-12-11 12:02:20 +0000469 bool is_ascii,
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000470 bool dot_output = false) {
471 v8::HandleScope scope;
472 ZoneScope zone_scope(DELETE_ON_EXIT);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000473 RegExpNode* node = Compile(input, multiline, is_ascii);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000474 USE(node);
475#ifdef DEBUG
476 if (dot_output) {
477 RegExpEngine::DotPrint(input, node, false);
478 exit(0);
479 }
480#endif // DEBUG
481}
482
483
484class TestConfig {
485 public:
486 typedef int Key;
487 typedef int Value;
488 static const int kNoKey;
489 static const int kNoValue;
490 static inline int Compare(int a, int b) {
491 if (a < b)
492 return -1;
493 else if (a > b)
494 return 1;
495 else
496 return 0;
497 }
498};
499
500
501const int TestConfig::kNoKey = 0;
502const int TestConfig::kNoValue = 0;
503
504
ager@chromium.org5ec48922009-05-05 07:25:34 +0000505static unsigned PseudoRandom(int i, int j) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000506 return ~(~((i * 781) ^ (j * 329)));
507}
508
509
510TEST(SplayTreeSimple) {
ager@chromium.org5ec48922009-05-05 07:25:34 +0000511 static const unsigned kLimit = 1000;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000512 ZoneScope zone_scope(DELETE_ON_EXIT);
513 ZoneSplayTree<TestConfig> tree;
ager@chromium.org5ec48922009-05-05 07:25:34 +0000514 bool seen[kLimit];
515 for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000516#define CHECK_MAPS_EQUAL() do { \
ager@chromium.org5ec48922009-05-05 07:25:34 +0000517 for (unsigned k = 0; k < kLimit; k++) \
518 CHECK_EQ(seen[k], tree.Find(k, &loc)); \
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000519 } while (false)
520 for (int i = 0; i < 50; i++) {
521 for (int j = 0; j < 50; j++) {
ager@chromium.org5ec48922009-05-05 07:25:34 +0000522 unsigned next = PseudoRandom(i, j) % kLimit;
523 if (seen[next]) {
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000524 // We've already seen this one. Check the value and remove
525 // it.
526 ZoneSplayTree<TestConfig>::Locator loc;
527 CHECK(tree.Find(next, &loc));
528 CHECK_EQ(next, loc.key());
529 CHECK_EQ(3 * next, loc.value());
530 tree.Remove(next);
ager@chromium.org5ec48922009-05-05 07:25:34 +0000531 seen[next] = false;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000532 CHECK_MAPS_EQUAL();
533 } else {
534 // Check that it wasn't there already and then add it.
535 ZoneSplayTree<TestConfig>::Locator loc;
536 CHECK(!tree.Find(next, &loc));
537 CHECK(tree.Insert(next, &loc));
538 CHECK_EQ(next, loc.key());
539 loc.set_value(3 * next);
ager@chromium.org5ec48922009-05-05 07:25:34 +0000540 seen[next] = true;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000541 CHECK_MAPS_EQUAL();
542 }
543 int val = PseudoRandom(j, i) % kLimit;
ager@chromium.org5ec48922009-05-05 07:25:34 +0000544 if (seen[val]) {
545 ZoneSplayTree<TestConfig>::Locator loc;
546 CHECK(tree.FindGreatestLessThan(val, &loc));
547 CHECK_EQ(loc.key(), val);
548 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000549 }
550 val = PseudoRandom(i + j, i - j) % kLimit;
ager@chromium.org5ec48922009-05-05 07:25:34 +0000551 if (seen[val]) {
552 ZoneSplayTree<TestConfig>::Locator loc;
553 CHECK(tree.FindLeastGreaterThan(val, &loc));
554 CHECK_EQ(loc.key(), val);
555 break;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000556 }
557 }
558 }
559}
560
561
562TEST(DispatchTableConstruction) {
563 // Initialize test data.
564 static const int kLimit = 1000;
565 static const int kRangeCount = 8;
566 static const int kRangeSize = 16;
567 uc16 ranges[kRangeCount][2 * kRangeSize];
568 for (int i = 0; i < kRangeCount; i++) {
569 Vector<uc16> range(ranges[i], 2 * kRangeSize);
570 for (int j = 0; j < 2 * kRangeSize; j++) {
571 range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
572 }
573 range.Sort();
574 for (int j = 1; j < 2 * kRangeSize; j++) {
575 CHECK(range[j-1] <= range[j]);
576 }
577 }
578 // Enter test data into dispatch table.
579 ZoneScope zone_scope(DELETE_ON_EXIT);
580 DispatchTable table;
581 for (int i = 0; i < kRangeCount; i++) {
582 uc16* range = ranges[i];
583 for (int j = 0; j < 2 * kRangeSize; j += 2)
584 table.AddRange(CharacterRange(range[j], range[j + 1]), i);
585 }
586 // Check that the table looks as we would expect
587 for (int p = 0; p < kLimit; p++) {
588 OutSet* outs = table.Get(p);
589 for (int j = 0; j < kRangeCount; j++) {
590 uc16* range = ranges[j];
591 bool is_on = false;
592 for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
593 is_on = (range[k] <= p && p <= range[k + 1]);
594 CHECK_EQ(is_on, outs->Get(j));
595 }
596 }
597}
598
599
600TEST(MacroAssembler) {
601 V8::Initialize(NULL);
602 byte codes[1024];
603 RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
604 // ^f(o)o.
605 Label fail, fail2, start;
606 uc16 foo_chars[3];
607 foo_chars[0] = 'f';
608 foo_chars[1] = 'o';
609 foo_chars[2] = 'o';
610 Vector<const uc16> foo(foo_chars, 3);
611 m.SetRegister(4, 42);
ager@chromium.org32912102009-01-16 10:38:43 +0000612 m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000613 m.AdvanceRegister(4, 42);
614 m.GoTo(&start);
615 m.Fail();
616 m.Bind(&start);
617 m.PushBacktrack(&fail2);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000618 m.CheckCharacters(foo, 0, &fail, true);
619 m.WriteCurrentPositionToRegister(0, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000620 m.PushCurrentPosition();
621 m.AdvanceCurrentPosition(3);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000622 m.WriteCurrentPositionToRegister(1, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000623 m.PopCurrentPosition();
624 m.AdvanceCurrentPosition(1);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000625 m.WriteCurrentPositionToRegister(2, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000626 m.AdvanceCurrentPosition(1);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000627 m.WriteCurrentPositionToRegister(3, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000628 m.Succeed();
629
630 m.Bind(&fail);
631 m.Backtrack();
632 m.Succeed();
633
634 m.Bind(&fail2);
635 m.PopRegister(0);
636 m.Fail();
637
638 v8::HandleScope scope;
639
ager@chromium.org8bb60582008-12-11 12:02:20 +0000640 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
641 Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000642 int captures[5];
643
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000644 const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
645 Handle<String> f1_16 =
646 Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
647
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000648 CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
649 CHECK_EQ(0, captures[0]);
650 CHECK_EQ(3, captures[1]);
651 CHECK_EQ(1, captures[2]);
652 CHECK_EQ(2, captures[3]);
653 CHECK_EQ(84, captures[4]);
654
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000655 const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
656 Handle<String> f2_16 =
657 Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
658
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000659 CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
660 CHECK_EQ(42, captures[0]);
661}
662
663
ager@chromium.org5ec48922009-05-05 07:25:34 +0000664#ifdef V8_ARCH_IA32 // IA32 only tests.
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000665
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000666class ContextInitializer {
667 public:
668 ContextInitializer() : env_(), scope_(), stack_guard_() {
669 env_ = v8::Context::New();
670 env_->Enter();
671 }
672 ~ContextInitializer() {
673 env_->Exit();
674 env_.Dispose();
675 }
676 private:
677 v8::Persistent<v8::Context> env_;
678 v8::HandleScope scope_;
679 v8::internal::StackGuard stack_guard_;
680};
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000681
ager@chromium.org32912102009-01-16 10:38:43 +0000682
ager@chromium.org32912102009-01-16 10:38:43 +0000683static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000684 String* input,
ager@chromium.org32912102009-01-16 10:38:43 +0000685 int start_offset,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000686 const byte* input_start,
687 const byte* input_end,
ager@chromium.org32912102009-01-16 10:38:43 +0000688 int* captures,
689 bool at_start) {
690 return RegExpMacroAssemblerIA32::Execute(
691 code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000692 input,
ager@chromium.org32912102009-01-16 10:38:43 +0000693 start_offset,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000694 input_start,
695 input_end,
ager@chromium.org32912102009-01-16 10:38:43 +0000696 captures,
697 at_start);
698}
699
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000700
701TEST(MacroAssemblerIA32Success) {
702 v8::V8::Initialize();
703 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000704
705 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
706
707 m.Succeed();
708
ager@chromium.org8bb60582008-12-11 12:02:20 +0000709 Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
710 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000711 Handle<Code> code = Handle<Code>::cast(code_object);
712
713 int captures[4] = {42, 37, 87, 117};
714 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
715 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000716 const byte* start_adr =
717 reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000718
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000719 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +0000720 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000721 *input,
722 0,
723 start_adr,
724 start_adr + seq_input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +0000725 captures,
726 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000727
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000728 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000729 CHECK_EQ(-1, captures[0]);
730 CHECK_EQ(-1, captures[1]);
731 CHECK_EQ(-1, captures[2]);
732 CHECK_EQ(-1, captures[3]);
733}
734
735
736TEST(MacroAssemblerIA32Simple) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000737 v8::V8::Initialize();
738 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000739
740 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
741
742 uc16 foo_chars[3] = {'f', 'o', 'o'};
743 Vector<const uc16> foo(foo_chars, 3);
744
745 Label fail;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000746 m.CheckCharacters(foo, 0, &fail, true);
747 m.WriteCurrentPositionToRegister(0, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000748 m.AdvanceCurrentPosition(3);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000749 m.WriteCurrentPositionToRegister(1, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000750 m.Succeed();
751 m.Bind(&fail);
752 m.Fail();
753
ager@chromium.org8bb60582008-12-11 12:02:20 +0000754 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
755 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000756 Handle<Code> code = Handle<Code>::cast(code_object);
757
758 int captures[4] = {42, 37, 87, 117};
759 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
760 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
761 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000762
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000763 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +0000764 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000765 *input,
766 0,
767 start_adr,
768 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +0000769 captures,
770 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000771
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000772 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000773 CHECK_EQ(0, captures[0]);
774 CHECK_EQ(3, captures[1]);
775 CHECK_EQ(-1, captures[2]);
776 CHECK_EQ(-1, captures[3]);
777
778 input = Factory::NewStringFromAscii(CStrVector("barbarbar"));
779 seq_input = Handle<SeqAsciiString>::cast(input);
780 start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000781
ager@chromium.org32912102009-01-16 10:38:43 +0000782 result = ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000783 *input,
784 0,
785 start_adr,
786 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +0000787 captures,
788 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000789
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000790 CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000791}
792
793
794TEST(MacroAssemblerIA32SimpleUC16) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000795 v8::V8::Initialize();
796 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000797
798 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 4);
799
800 uc16 foo_chars[3] = {'f', 'o', 'o'};
801 Vector<const uc16> foo(foo_chars, 3);
802
803 Label fail;
ager@chromium.org8bb60582008-12-11 12:02:20 +0000804 m.CheckCharacters(foo, 0, &fail, true);
805 m.WriteCurrentPositionToRegister(0, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000806 m.AdvanceCurrentPosition(3);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000807 m.WriteCurrentPositionToRegister(1, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000808 m.Succeed();
809 m.Bind(&fail);
810 m.Fail();
811
ager@chromium.org8bb60582008-12-11 12:02:20 +0000812 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
813 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000814 Handle<Code> code = Handle<Code>::cast(code_object);
815
816 int captures[4] = {42, 37, 87, 117};
817 const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', '\xa0'};
818 Handle<String> input =
819 Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
820 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
821 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000822
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000823 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +0000824 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000825 *input,
826 0,
827 start_adr,
828 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +0000829 captures,
830 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000831
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000832 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000833 CHECK_EQ(0, captures[0]);
834 CHECK_EQ(3, captures[1]);
835 CHECK_EQ(-1, captures[2]);
836 CHECK_EQ(-1, captures[3]);
837
838 const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', '\xa0'};
839 input = Factory::NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
840 seq_input = Handle<SeqTwoByteString>::cast(input);
841 start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000842
ager@chromium.org32912102009-01-16 10:38:43 +0000843 result = ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000844 *input,
845 0,
846 start_adr,
847 start_adr + input->length() * 2,
ager@chromium.org32912102009-01-16 10:38:43 +0000848 captures,
849 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000850
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000851 CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000852}
853
854
855TEST(MacroAssemblerIA32Backtrack) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000856 v8::V8::Initialize();
857 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000858
859 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
860
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000861 Label fail;
862 Label backtrack;
863 m.LoadCurrentCharacter(10, &fail);
864 m.Succeed();
865 m.Bind(&fail);
866 m.PushBacktrack(&backtrack);
867 m.LoadCurrentCharacter(10, NULL);
868 m.Succeed();
869 m.Bind(&backtrack);
870 m.Fail();
871
ager@chromium.org8bb60582008-12-11 12:02:20 +0000872 Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
873 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000874 Handle<Code> code = Handle<Code>::cast(code_object);
875
876 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
877 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
878 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000879
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000880 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +0000881 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000882 *input,
883 0,
884 start_adr,
885 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +0000886 NULL,
887 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000888
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000889 CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000890}
891
892
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000893TEST(MacroAssemblerIA32BackReferenceASCII) {
894 v8::V8::Initialize();
895 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000896
897 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
898
ager@chromium.org8bb60582008-12-11 12:02:20 +0000899 m.WriteCurrentPositionToRegister(0, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000900 m.AdvanceCurrentPosition(2);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000901 m.WriteCurrentPositionToRegister(1, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000902 Label nomatch;
903 m.CheckNotBackReference(0, &nomatch);
904 m.Fail();
905 m.Bind(&nomatch);
906 m.AdvanceCurrentPosition(2);
907 Label missing_match;
908 m.CheckNotBackReference(0, &missing_match);
ager@chromium.org8bb60582008-12-11 12:02:20 +0000909 m.WriteCurrentPositionToRegister(2, 0);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000910 m.Succeed();
911 m.Bind(&missing_match);
912 m.Fail();
913
ager@chromium.org8bb60582008-12-11 12:02:20 +0000914 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
915 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000916 Handle<Code> code = Handle<Code>::cast(code_object);
917
918 Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
919 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
920 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000921
922 int output[3];
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000923 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +0000924 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000925 *input,
926 0,
927 start_adr,
928 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +0000929 output,
930 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000931
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000932 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000933 CHECK_EQ(0, output[0]);
934 CHECK_EQ(2, output[1]);
935 CHECK_EQ(6, output[2]);
936}
937
938
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000939TEST(MacroAssemblerIA32BackReferenceUC16) {
940 v8::V8::Initialize();
941 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000942
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000943 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 3);
944
945 m.WriteCurrentPositionToRegister(0, 0);
946 m.AdvanceCurrentPosition(2);
947 m.WriteCurrentPositionToRegister(1, 0);
948 Label nomatch;
949 m.CheckNotBackReference(0, &nomatch);
950 m.Fail();
951 m.Bind(&nomatch);
952 m.AdvanceCurrentPosition(2);
953 Label missing_match;
954 m.CheckNotBackReference(0, &missing_match);
955 m.WriteCurrentPositionToRegister(2, 0);
956 m.Succeed();
957 m.Bind(&missing_match);
958 m.Fail();
959
960 Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
961 Handle<Object> code_object = m.GetCode(source);
962 Handle<Code> code = Handle<Code>::cast(code_object);
963
964 const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
965 Handle<String> input =
966 Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
967 Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
968 Address start_adr = seq_input->GetCharsAddress();
969
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000970 int output[3];
971 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +0000972 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +0000973 *input,
974 0,
975 start_adr,
976 start_adr + input->length() * 2,
ager@chromium.org32912102009-01-16 10:38:43 +0000977 output,
978 true);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +0000979
980 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
981 CHECK_EQ(0, output[0]);
982 CHECK_EQ(2, output[1]);
983 CHECK_EQ(6, output[2]);
984}
985
986
987
988TEST(MacroAssemblerIA32AtStart) {
989 v8::V8::Initialize();
990 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000991
992 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
993
ager@chromium.orga74f0da2008-12-03 16:05:52 +0000994 Label not_at_start, newline, fail;
995 m.CheckNotAtStart(&not_at_start);
996 // Check that prevchar = '\n' and current = 'f'.
997 m.CheckCharacter('\n', &newline);
998 m.Bind(&fail);
999 m.Fail();
1000 m.Bind(&newline);
1001 m.LoadCurrentCharacter(0, &fail);
1002 m.CheckNotCharacter('f', &fail);
1003 m.Succeed();
1004
1005 m.Bind(&not_at_start);
1006 // Check that prevchar = 'o' and current = 'b'.
1007 Label prevo;
1008 m.CheckCharacter('o', &prevo);
1009 m.Fail();
1010 m.Bind(&prevo);
1011 m.LoadCurrentCharacter(0, &fail);
1012 m.CheckNotCharacter('b', &fail);
1013 m.Succeed();
1014
ager@chromium.org8bb60582008-12-11 12:02:20 +00001015 Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
1016 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001017 Handle<Code> code = Handle<Code>::cast(code_object);
1018
1019 Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
1020 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1021 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001022
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001023 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +00001024 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +00001025 *input,
1026 0,
1027 start_adr,
1028 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +00001029 NULL,
1030 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001031
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001032 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001033
ager@chromium.org32912102009-01-16 10:38:43 +00001034 result = ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +00001035 *input,
1036 3,
1037 start_adr + 3,
1038 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +00001039 NULL,
1040 false);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001041
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001042 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001043}
1044
1045
ager@chromium.org8bb60582008-12-11 12:02:20 +00001046TEST(MacroAssemblerIA32BackRefNoCase) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001047 v8::V8::Initialize();
1048 ContextInitializer initializer;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001049
1050 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
1051
1052 Label fail, succ;
1053
1054 m.WriteCurrentPositionToRegister(0, 0);
1055 m.WriteCurrentPositionToRegister(2, 0);
1056 m.AdvanceCurrentPosition(3);
1057 m.WriteCurrentPositionToRegister(3, 0);
1058 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
1059 m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
1060 Label expected_fail;
1061 m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1062 m.Bind(&fail);
1063 m.Fail();
1064
1065 m.Bind(&expected_fail);
1066 m.AdvanceCurrentPosition(3); // Skip "xYz"
1067 m.CheckNotBackReferenceIgnoreCase(2, &succ);
1068 m.Fail();
1069
1070 m.Bind(&succ);
1071 m.WriteCurrentPositionToRegister(1, 0);
1072 m.Succeed();
1073
1074 Handle<String> source =
1075 Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1076 Handle<Object> code_object = m.GetCode(source);
1077 Handle<Code> code = Handle<Code>::cast(code_object);
1078
1079 Handle<String> input =
1080 Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1081 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1082 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.org8bb60582008-12-11 12:02:20 +00001083
1084 int output[4];
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001085 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +00001086 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +00001087 *input,
1088 0,
1089 start_adr,
1090 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +00001091 output,
1092 true);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001093
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001094 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001095 CHECK_EQ(0, output[0]);
1096 CHECK_EQ(12, output[1]);
1097 CHECK_EQ(0, output[2]);
1098 CHECK_EQ(3, output[3]);
1099}
1100
1101
1102
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001103TEST(MacroAssemblerIA32Registers) {
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001104 v8::V8::Initialize();
1105 ContextInitializer initializer;
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001106
1107 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 5);
1108
1109 uc16 foo_chars[3] = {'f', 'o', 'o'};
1110 Vector<const uc16> foo(foo_chars, 3);
1111
1112 enum registers { out1, out2, out3, out4, out5, sp, loop_cnt };
1113 Label fail;
1114 Label backtrack;
ager@chromium.org8bb60582008-12-11 12:02:20 +00001115 m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
ager@chromium.org32912102009-01-16 10:38:43 +00001116 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001117 m.PushBacktrack(&backtrack);
1118 m.WriteStackPointerToRegister(sp);
1119 // Fill stack and registers
1120 m.AdvanceCurrentPosition(2);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001121 m.WriteCurrentPositionToRegister(out1, 0);
ager@chromium.org32912102009-01-16 10:38:43 +00001122 m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001123 m.PushBacktrack(&fail);
1124 // Drop backtrack stack frames.
1125 m.ReadStackPointerFromRegister(sp);
1126 // And take the first backtrack (to &backtrack)
1127 m.Backtrack();
1128
1129 m.PushCurrentPosition();
1130 m.AdvanceCurrentPosition(2);
1131 m.PopCurrentPosition();
1132
1133 m.Bind(&backtrack);
1134 m.PopRegister(out1);
1135 m.ReadCurrentPositionFromRegister(out1);
1136 m.AdvanceCurrentPosition(3);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001137 m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001138
1139 Label loop;
1140 m.SetRegister(loop_cnt, 0); // loop counter
1141 m.Bind(&loop);
1142 m.AdvanceRegister(loop_cnt, 1);
1143 m.AdvanceCurrentPosition(1);
1144 m.IfRegisterLT(loop_cnt, 3, &loop);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001145 m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001146
1147 Label loop2;
1148 m.SetRegister(loop_cnt, 2); // loop counter
1149 m.Bind(&loop2);
1150 m.AdvanceRegister(loop_cnt, -1);
1151 m.AdvanceCurrentPosition(1);
1152 m.IfRegisterGE(loop_cnt, 0, &loop2);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001153 m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001154
1155 Label loop3;
1156 Label exit_loop3;
ager@chromium.org32912102009-01-16 10:38:43 +00001157 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1158 m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001159 m.ReadCurrentPositionFromRegister(out3);
1160 m.Bind(&loop3);
1161 m.AdvanceCurrentPosition(1);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001162 m.CheckGreedyLoop(&exit_loop3);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001163 m.GoTo(&loop3);
1164 m.Bind(&exit_loop3);
ager@chromium.org8bb60582008-12-11 12:02:20 +00001165 m.PopCurrentPosition();
1166 m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9]
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001167
1168 m.Succeed();
1169
1170 m.Bind(&fail);
1171 m.Fail();
1172
ager@chromium.org8bb60582008-12-11 12:02:20 +00001173 Handle<String> source =
1174 Factory::NewStringFromAscii(CStrVector("<loop test>"));
1175 Handle<Object> code_object = m.GetCode(source);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001176 Handle<Code> code = Handle<Code>::cast(code_object);
1177
1178 // String long enough for test (content doesn't matter).
1179 Handle<String> input =
1180 Factory::NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1181 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1182 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001183
1184 int output[5];
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001185 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +00001186 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +00001187 *input,
1188 0,
1189 start_adr,
1190 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +00001191 output,
1192 true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001193
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001194 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001195 CHECK_EQ(0, output[0]);
1196 CHECK_EQ(3, output[1]);
1197 CHECK_EQ(6, output[2]);
1198 CHECK_EQ(9, output[3]);
1199 CHECK_EQ(9, output[4]);
1200}
1201
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001202
1203TEST(MacroAssemblerIA32StackOverflow) {
1204 v8::V8::Initialize();
1205 ContextInitializer initializer;
1206
1207 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
1208
1209 Label loop;
1210 m.Bind(&loop);
1211 m.PushBacktrack(&loop);
1212 m.GoTo(&loop);
1213
1214 Handle<String> source =
1215 Factory::NewStringFromAscii(CStrVector("<stack overflow test>"));
1216 Handle<Object> code_object = m.GetCode(source);
1217 Handle<Code> code = Handle<Code>::cast(code_object);
1218
1219 // String long enough for test (content doesn't matter).
1220 Handle<String> input =
1221 Factory::NewStringFromAscii(CStrVector("dummy"));
1222 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1223 Address start_adr = seq_input->GetCharsAddress();
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001224
1225 RegExpMacroAssemblerIA32::Result result =
ager@chromium.org32912102009-01-16 10:38:43 +00001226 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +00001227 *input,
1228 0,
1229 start_adr,
1230 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +00001231 NULL,
1232 true);
christian.plesner.hansen@gmail.com37abdec2009-01-06 14:43:28 +00001233
1234 CHECK_EQ(RegExpMacroAssemblerIA32::EXCEPTION, result);
1235 CHECK(Top::has_pending_exception());
1236 Top::clear_pending_exception();
1237}
1238
1239
ager@chromium.org32912102009-01-16 10:38:43 +00001240TEST(MacroAssemblerIA32LotsOfRegisters) {
1241 v8::V8::Initialize();
1242 ContextInitializer initializer;
1243
1244 RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 2);
1245
1246 // At least 2048, to ensure the allocated space for registers
1247 // span one full page.
1248 const int large_number = 8000;
1249 m.WriteCurrentPositionToRegister(large_number, 42);
1250 m.WriteCurrentPositionToRegister(0, 0);
1251 m.WriteCurrentPositionToRegister(1, 1);
1252 Label done;
1253 m.CheckNotBackReference(0, &done); // Performs a system-stack push.
1254 m.Bind(&done);
1255 m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1256 m.PopRegister(1);
1257 m.Succeed();
1258
1259 Handle<String> source =
1260 Factory::NewStringFromAscii(CStrVector("<huge register space test>"));
1261 Handle<Object> code_object = m.GetCode(source);
1262 Handle<Code> code = Handle<Code>::cast(code_object);
1263
1264 // String long enough for test (content doesn't matter).
1265 Handle<String> input =
1266 Factory::NewStringFromAscii(CStrVector("sample text"));
1267 Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1268 Address start_adr = seq_input->GetCharsAddress();
ager@chromium.org32912102009-01-16 10:38:43 +00001269
1270 int captures[2];
1271 RegExpMacroAssemblerIA32::Result result =
1272 ExecuteIA32(*code,
ager@chromium.orgbb29dc92009-03-24 13:25:23 +00001273 *input,
1274 0,
1275 start_adr,
1276 start_adr + input->length(),
ager@chromium.org32912102009-01-16 10:38:43 +00001277 captures,
1278 true);
1279
1280 CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
1281 CHECK_EQ(0, captures[0]);
1282 CHECK_EQ(42, captures[1]);
1283
1284 Top::clear_pending_exception();
1285}
1286
1287
1288
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001289#endif // !defined ARM
1290
1291TEST(AddInverseToTable) {
1292 static const int kLimit = 1000;
1293 static const int kRangeCount = 16;
1294 for (int t = 0; t < 10; t++) {
1295 ZoneScope zone_scope(DELETE_ON_EXIT);
1296 ZoneList<CharacterRange>* ranges =
1297 new ZoneList<CharacterRange>(kRangeCount);
1298 for (int i = 0; i < kRangeCount; i++) {
1299 int from = PseudoRandom(t + 87, i + 25) % kLimit;
1300 int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1301 if (to > kLimit) to = kLimit;
1302 ranges->Add(CharacterRange(from, to));
1303 }
1304 DispatchTable table;
1305 DispatchTableConstructor cons(&table, false);
1306 cons.set_choice_index(0);
1307 cons.AddInverse(ranges);
1308 for (int i = 0; i < kLimit; i++) {
1309 bool is_on = false;
1310 for (int j = 0; !is_on && j < kRangeCount; j++)
1311 is_on = ranges->at(j).Contains(i);
1312 OutSet* set = table.Get(i);
1313 CHECK_EQ(is_on, set->Get(0) == false);
1314 }
1315 }
1316 ZoneScope zone_scope(DELETE_ON_EXIT);
1317 ZoneList<CharacterRange>* ranges =
1318 new ZoneList<CharacterRange>(1);
1319 ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
1320 DispatchTable table;
1321 DispatchTableConstructor cons(&table, false);
1322 cons.set_choice_index(0);
1323 cons.AddInverse(ranges);
1324 CHECK(!table.Get(0xFFFE)->Get(0));
1325 CHECK(table.Get(0xFFFF)->Get(0));
1326}
1327
1328
1329static uc32 canonicalize(uc32 c) {
1330 unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1331 int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1332 if (count == 0) {
1333 return c;
1334 } else {
1335 CHECK_EQ(1, count);
1336 return canon[0];
1337 }
1338}
1339
1340
1341TEST(LatinCanonicalize) {
1342 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1343 for (char lower = 'a'; lower <= 'z'; lower++) {
1344 char upper = lower + ('A' - 'a');
1345 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1346 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1347 int length = un_canonicalize.get(lower, '\0', uncanon);
1348 CHECK_EQ(2, length);
1349 CHECK_EQ(upper, uncanon[0]);
1350 CHECK_EQ(lower, uncanon[1]);
1351 }
1352 for (uc32 c = 128; c < (1 << 21); c++)
1353 CHECK_GE(canonicalize(c), 128);
1354 unibrow::Mapping<unibrow::ToUppercase> to_upper;
1355 for (uc32 c = 0; c < (1 << 21); c++) {
1356 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1357 int length = to_upper.get(c, '\0', upper);
1358 if (length == 0) {
1359 length = 1;
1360 upper[0] = c;
1361 }
1362 uc32 u = upper[0];
1363 if (length > 1 || (c >= 128 && u < 128))
1364 u = c;
1365 CHECK_EQ(u, canonicalize(c));
1366 }
1367}
1368
1369
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001370static uc32 CanonRange(uc32 c) {
1371 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1372 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1373 if (count == 0) {
1374 return c;
1375 } else {
1376 CHECK_EQ(1, count);
1377 return canon[0];
1378 }
1379}
1380
1381
1382TEST(RangeCanonicalization) {
1383 CHECK_NE(CanonRange(0) & CharacterRange::kStartMarker, 0);
1384 // Check that we arrive at the same result when using the basic
1385 // range canonicalization primitives as when using immediate
1386 // canonicalization.
1387 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1388 for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
1389 int range = CanonRange(i);
1390 int indirect_length = 0;
1391 unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1392 if ((range & CharacterRange::kStartMarker) == 0) {
1393 indirect_length = un_canonicalize.get(i - range, '\0', indirect);
1394 for (int i = 0; i < indirect_length; i++)
1395 indirect[i] += range;
1396 } else {
1397 indirect_length = un_canonicalize.get(i, '\0', indirect);
1398 }
1399 unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1400 int direct_length = un_canonicalize.get(i, '\0', direct);
1401 CHECK_EQ(direct_length, indirect_length);
1402 }
1403 // Check that we arrive at the same results when skipping over
1404 // canonicalization ranges.
1405 int next_block = 0;
1406 while (next_block < CharacterRange::kRangeCanonicalizeMax) {
1407 uc32 start = CanonRange(next_block);
1408 CHECK_NE((start & CharacterRange::kStartMarker), 0);
1409 unsigned dist = start & CharacterRange::kPayloadMask;
1410 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1411 int first_length = un_canonicalize.get(next_block, '\0', first);
1412 for (unsigned i = 1; i < dist; i++) {
1413 CHECK_EQ(i, CanonRange(next_block + i));
1414 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1415 int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
1416 CHECK_EQ(first_length, succ_length);
1417 for (int j = 0; j < succ_length; j++) {
1418 int calc = first[j] + i;
1419 int found = succ[j];
1420 CHECK_EQ(calc, found);
1421 }
1422 }
1423 next_block = next_block + dist;
1424 }
1425}
1426
1427
1428TEST(UncanonicalizeEquivalence) {
1429 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1430 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1431 for (int i = 0; i < (1 << 16); i++) {
1432 int length = un_canonicalize.get(i, '\0', chars);
1433 for (int j = 0; j < length; j++) {
1434 unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1435 int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1436 CHECK_EQ(length, length2);
1437 for (int k = 0; k < length; k++)
1438 CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1439 }
1440 }
1441}
1442
1443
1444static void TestRangeCaseIndependence(CharacterRange input,
1445 Vector<CharacterRange> expected) {
1446 ZoneScope zone_scope(DELETE_ON_EXIT);
1447 int count = expected.length();
1448 ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
1449 input.AddCaseEquivalents(list);
1450 CHECK_EQ(count, list->length());
1451 for (int i = 0; i < list->length(); i++) {
1452 CHECK_EQ(expected[i].from(), list->at(i).from());
1453 CHECK_EQ(expected[i].to(), list->at(i).to());
1454 }
1455}
1456
1457
1458static void TestSimpleRangeCaseIndependence(CharacterRange input,
1459 CharacterRange expected) {
1460 EmbeddedVector<CharacterRange, 1> vector;
1461 vector[0] = expected;
1462 TestRangeCaseIndependence(input, vector);
1463}
1464
1465
1466TEST(CharacterRangeCaseIndependence) {
1467 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1468 CharacterRange::Singleton('A'));
1469 TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1470 CharacterRange::Singleton('Z'));
1471 TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1472 CharacterRange('A', 'Z'));
1473 TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1474 CharacterRange('C', 'F'));
1475 TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1476 CharacterRange('A', 'B'));
1477 TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1478 CharacterRange('Y', 'Z'));
1479 TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1480 CharacterRange('A', 'Z'));
1481 TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1482 CharacterRange('a', 'z'));
1483 TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1484 CharacterRange('c', 'f'));
1485 TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1486 CharacterRange('a', 'z'));
1487 // Here we need to add [l-z] to complete the case independence of
1488 // [A-Za-z] but we expect [a-z] to be added since we always add a
1489 // whole block at a time.
1490 TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1491 CharacterRange('a', 'z'));
1492}
1493
1494
1495static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1496 if (ranges == NULL)
1497 return false;
1498 for (int i = 0; i < ranges->length(); i++) {
1499 CharacterRange range = ranges->at(i);
1500 if (range.from() <= c && c <= range.to())
1501 return true;
1502 }
1503 return false;
1504}
1505
1506
1507TEST(CharClassDifference) {
1508 ZoneScope zone_scope(DELETE_ON_EXIT);
1509 ZoneList<CharacterRange>* base = new ZoneList<CharacterRange>(1);
1510 base->Add(CharacterRange::Everything());
1511 Vector<const uc16> overlay = CharacterRange::GetWordBounds();
1512 ZoneList<CharacterRange>* included = NULL;
1513 ZoneList<CharacterRange>* excluded = NULL;
1514 CharacterRange::Split(base, overlay, &included, &excluded);
1515 for (int i = 0; i < (1 << 16); i++) {
1516 bool in_base = InClass(i, base);
1517 if (in_base) {
1518 bool in_overlay = false;
1519 for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1520 if (overlay[j] <= i && i <= overlay[j+1])
1521 in_overlay = true;
1522 }
1523 CHECK_EQ(in_overlay, InClass(i, included));
1524 CHECK_EQ(!in_overlay, InClass(i, excluded));
1525 } else {
1526 CHECK(!InClass(i, included));
1527 CHECK(!InClass(i, excluded));
1528 }
1529 }
1530}
1531
1532
1533TEST(Graph) {
1534 V8::Initialize(NULL);
ager@chromium.org32912102009-01-16 10:38:43 +00001535 Execute("(?:(?:x(.))?\1)+$", false, true, true);
ager@chromium.orga74f0da2008-12-03 16:05:52 +00001536}