blob: 2d2d11825d6fbc7015346158573681c1ac6b7ab9 [file] [log] [blame]
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
6
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00007function testRegexpHelper(r) {
8 assertTrue(r.test("foo"));
9 assertTrue(r.test("boo"));
10 assertFalse(r.test("moo"));
11}
12
13
14(function TestUnicodeEscapes() {
15 testRegexpHelper(/(\u0066|\u0062)oo/);
16 testRegexpHelper(/(\u0066|\u0062)oo/u);
17 testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
18 testRegexpHelper(/(\u{66}|\u{000062})oo/u);
19
20 // Note that we need \\ inside a string, otherwise it's interpreted as a
21 // unicode escape inside a string.
22 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
23 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
24 testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
25 testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
26
27 // Though, unicode escapes via strings should work too.
28 testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
29 testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
30 testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
31 testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
32})();
33
34
35(function TestUnicodeEscapesInCharacterClasses() {
36 testRegexpHelper(/[\u0062-\u0066]oo/);
37 testRegexpHelper(/[\u0062-\u0066]oo/u);
38 testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
39 testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
40
41 // Note that we need \\ inside a string, otherwise it's interpreted as a
42 // unicode escape inside a string.
43 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
44 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
45 testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
46 testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
47
48 // Though, unicode escapes via strings should work too.
49 testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
50 testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
51 testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
52 testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
53})();
54
55
56(function TestBraceEscapesWithoutUnicodeFlag() {
57 // \u followed by illegal escape will be parsed as u. {x} will be the
58 // character count.
59 function helper1(r) {
60 assertFalse(r.test("fbar"));
61 assertFalse(r.test("fubar"));
62 assertTrue(r.test("fuubar"));
63 assertFalse(r.test("fuuubar"));
64 }
65 helper1(/f\u{2}bar/);
66 helper1(new RegExp("f\\u{2}bar"));
67
68 function helper2(r) {
69 assertFalse(r.test("fbar"));
70 assertTrue(r.test("fubar"));
71 assertTrue(r.test("fuubar"));
72 assertFalse(r.test("fuuubar"));
73 }
74
75 helper2(/f\u{1,2}bar/);
76 helper2(new RegExp("f\\u{1,2}bar"));
77
78 function helper3(r) {
79 assertTrue(r.test("u"));
80 assertTrue(r.test("{"));
81 assertTrue(r.test("2"));
82 assertTrue(r.test("}"));
83 assertFalse(r.test("q"));
84 assertFalse(r.test("("));
85 assertFalse(r.test(")"));
86 }
87 helper3(/[\u{2}]/);
88 helper3(new RegExp("[\\u{2}]"));
89})();
90
91
92(function TestInvalidEscapes() {
93 // Without the u flag, invalid unicode escapes and other invalid escapes are
94 // treated as identity escapes.
95 function helper1(r) {
96 assertTrue(r.test("firstuxz89second"));
97 }
98 helper1(/first\u\x\z\8\9second/);
99 helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
100
101 function helper2(r) {
102 assertTrue(r.test("u"));
103 assertTrue(r.test("x"));
104 assertTrue(r.test("z"));
105 assertTrue(r.test("8"));
106 assertTrue(r.test("9"));
107 assertFalse(r.test("q"));
108 assertFalse(r.test("7"));
109 }
110 helper2(/[\u\x\z\8\9]/);
111 helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
112
113 // However, with the u flag, these are treated as invalid escapes.
114 assertThrows("/\\u/u", SyntaxError);
115 assertThrows("/\\u12/u", SyntaxError);
116 assertThrows("/\\ufoo/u", SyntaxError);
117 assertThrows("/\\x/u", SyntaxError);
118 assertThrows("/\\xfoo/u", SyntaxError);
119 assertThrows("/\\z/u", SyntaxError);
120 assertThrows("/\\8/u", SyntaxError);
121 assertThrows("/\\9/u", SyntaxError);
122
123 assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
124 assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
125 assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
126 assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
127 assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
128 assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
129 assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
130 assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
131})();
132
133
134(function TestTooBigHexEscape() {
135 // The hex number inside \u{} has a maximum value.
136 /\u{10ffff}/u
137 new RegExp("\\u{10ffff}", "u")
138 assertThrows("/\\u{110000}/u", SyntaxError);
139 assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
140
141 // Without the u flag, they're of course fine ({x} is the count).
142 /\u{110000}/
143 new RegExp("\\u{110000}")
144})();
145
146
147(function TestSyntaxEscapes() {
148 // Syntax escapes work the same with or without the u flag.
149 function helper(r) {
150 assertTrue(r.test("foo[bar"));
151 assertFalse(r.test("foo]bar"));
152 }
153 helper(/foo\[bar/);
154 helper(new RegExp("foo\\[bar"));
155 helper(/foo\[bar/u);
156 helper(new RegExp("foo\\[bar", "u"));
157})();
158
159
160(function TestUnicodeSurrogates() {
161 // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
162 function helper(r) {
163 assertTrue(r.test("foo\u{10e6d}bar"));
164 }
165 helper(/foo\ud803\ude6dbar/u);
166 helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
167})();
168
169
170(function AllFlags() {
171 // Test that we can pass all possible regexp flags and they work properly.
172 function helper1(r) {
173 assertTrue(r.global);
174 assertTrue(r.ignoreCase);
175 assertTrue(r.multiline);
176 assertTrue(r.sticky);
177 assertTrue(r.unicode);
178 }
179
180 helper1(/foo/gimyu);
181 helper1(new RegExp("foo", "gimyu"));
182
183 function helper2(r) {
184 assertFalse(r.global);
185 assertFalse(r.ignoreCase);
186 assertFalse(r.multiline);
187 assertFalse(r.sticky);
188 assertFalse(r.unicode);
189 }
190
191 helper2(/foo/);
192 helper2(new RegExp("foo"));
193})();
194
195
196(function DuplicatedFlags() {
197 // Test that duplicating the u flag is not allowed.
198 assertThrows("/foo/ugu");
199 assertThrows("new RegExp('foo', 'ugu')");
200})();
201
202
203(function ToString() {
204 // Test that the u flag is included in the string representation of regexps.
205 function helper(r) {
206 assertEquals(r.toString(), "/foo/u");
207 }
208 helper(/foo/u);
209 helper(new RegExp("foo", "u"));
210})();
211
212// Non-BMP patterns.
213// Single character atom.
214assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}"));
215assertTrue(/\u{12345}/u.test("\u{12345}"));
216assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45"));
217assertTrue(/\u{12345}/u.test("\ud808\udf45"));
218assertFalse(new RegExp("\u{12345}", "u").test("\udf45"));
219assertFalse(/\u{12345}/u.test("\udf45"));
220
221// Multi-character atom.
222assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b"));
223assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c"));
224assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b"));
225assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c"));
226
227// Disjunction.
228assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test(
229 "a\u{12345}\u{23456}b"));
230assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c"));
231assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test(
232 "a\udf45\u{23456}b"));
233assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c"));
234
235// Alternative.
236assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b"));
237assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c"));
238assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db"));
239assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c"));
240
241// Capture.
242assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
243 "\u{12345}b\u{12345}"));
244assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}"));
245assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
246 "\u{12345}b\u{23456}"));
247assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}"));
248
249// Quantifier.
250assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}"));
251assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}"));
252assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45"));
Ben Murdoch097c5b22016-05-18 11:27:45 +0100253assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45"));
254assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}"));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000255assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45"));
256assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45"));
Ben Murdoch097c5b22016-05-18 11:27:45 +0100257
258// Literal surrogates.
259assertEquals(["\u{10000}\u{10000}"],
260 new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}"));
261assertEquals(["\u{10000}\u{10000}"],
262 new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}"));
263
264assertEquals(["\u{10003}\u{50001}"],
265 new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec(
266 "\u{10003}\u{50001}"));
267assertEquals(["\u{10003}\u{50001}"],
268 new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec(
269 "\u{10003}\u{50001}"));
270
271// Unicode escape sequences to represent a non-BMP character cannot have
272// mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence.
273assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u"));
274assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u"));
275assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}"));
276assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}"));
277
278assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}"));
279assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}"));
280assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}"));
281assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}"));
282
283assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00"));
284assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00"));
285assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00"));