blob: 895e0c6722e8aae7d94a2abdaee201f632293ee2 [file] [log] [blame]
Ben Murdoch4a90d5f2016-03-22 12:00:34 +00001// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
6
7// Flags: --harmony-unicode-regexps --harmony-regexps
8
9function testRegexpHelper(r) {
10 assertTrue(r.test("foo"));
11 assertTrue(r.test("boo"));
12 assertFalse(r.test("moo"));
13}
14
15
16(function TestUnicodeEscapes() {
17 testRegexpHelper(/(\u0066|\u0062)oo/);
18 testRegexpHelper(/(\u0066|\u0062)oo/u);
19 testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
20 testRegexpHelper(/(\u{66}|\u{000062})oo/u);
21
22 // Note that we need \\ inside a string, otherwise it's interpreted as a
23 // unicode escape inside a string.
24 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
25 testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
26 testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
27 testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
28
29 // Though, unicode escapes via strings should work too.
30 testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
31 testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
32 testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
33 testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
34})();
35
36
37(function TestUnicodeEscapesInCharacterClasses() {
38 testRegexpHelper(/[\u0062-\u0066]oo/);
39 testRegexpHelper(/[\u0062-\u0066]oo/u);
40 testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
41 testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
42
43 // Note that we need \\ inside a string, otherwise it's interpreted as a
44 // unicode escape inside a string.
45 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
46 testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
47 testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
48 testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
49
50 // Though, unicode escapes via strings should work too.
51 testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
52 testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
53 testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
54 testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
55})();
56
57
58(function TestBraceEscapesWithoutUnicodeFlag() {
59 // \u followed by illegal escape will be parsed as u. {x} will be the
60 // character count.
61 function helper1(r) {
62 assertFalse(r.test("fbar"));
63 assertFalse(r.test("fubar"));
64 assertTrue(r.test("fuubar"));
65 assertFalse(r.test("fuuubar"));
66 }
67 helper1(/f\u{2}bar/);
68 helper1(new RegExp("f\\u{2}bar"));
69
70 function helper2(r) {
71 assertFalse(r.test("fbar"));
72 assertTrue(r.test("fubar"));
73 assertTrue(r.test("fuubar"));
74 assertFalse(r.test("fuuubar"));
75 }
76
77 helper2(/f\u{1,2}bar/);
78 helper2(new RegExp("f\\u{1,2}bar"));
79
80 function helper3(r) {
81 assertTrue(r.test("u"));
82 assertTrue(r.test("{"));
83 assertTrue(r.test("2"));
84 assertTrue(r.test("}"));
85 assertFalse(r.test("q"));
86 assertFalse(r.test("("));
87 assertFalse(r.test(")"));
88 }
89 helper3(/[\u{2}]/);
90 helper3(new RegExp("[\\u{2}]"));
91})();
92
93
94(function TestInvalidEscapes() {
95 // Without the u flag, invalid unicode escapes and other invalid escapes are
96 // treated as identity escapes.
97 function helper1(r) {
98 assertTrue(r.test("firstuxz89second"));
99 }
100 helper1(/first\u\x\z\8\9second/);
101 helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
102
103 function helper2(r) {
104 assertTrue(r.test("u"));
105 assertTrue(r.test("x"));
106 assertTrue(r.test("z"));
107 assertTrue(r.test("8"));
108 assertTrue(r.test("9"));
109 assertFalse(r.test("q"));
110 assertFalse(r.test("7"));
111 }
112 helper2(/[\u\x\z\8\9]/);
113 helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
114
115 // However, with the u flag, these are treated as invalid escapes.
116 assertThrows("/\\u/u", SyntaxError);
117 assertThrows("/\\u12/u", SyntaxError);
118 assertThrows("/\\ufoo/u", SyntaxError);
119 assertThrows("/\\x/u", SyntaxError);
120 assertThrows("/\\xfoo/u", SyntaxError);
121 assertThrows("/\\z/u", SyntaxError);
122 assertThrows("/\\8/u", SyntaxError);
123 assertThrows("/\\9/u", SyntaxError);
124
125 assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
126 assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
127 assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
128 assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
129 assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
130 assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
131 assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
132 assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
133})();
134
135
136(function TestTooBigHexEscape() {
137 // The hex number inside \u{} has a maximum value.
138 /\u{10ffff}/u
139 new RegExp("\\u{10ffff}", "u")
140 assertThrows("/\\u{110000}/u", SyntaxError);
141 assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
142
143 // Without the u flag, they're of course fine ({x} is the count).
144 /\u{110000}/
145 new RegExp("\\u{110000}")
146})();
147
148
149(function TestSyntaxEscapes() {
150 // Syntax escapes work the same with or without the u flag.
151 function helper(r) {
152 assertTrue(r.test("foo[bar"));
153 assertFalse(r.test("foo]bar"));
154 }
155 helper(/foo\[bar/);
156 helper(new RegExp("foo\\[bar"));
157 helper(/foo\[bar/u);
158 helper(new RegExp("foo\\[bar", "u"));
159})();
160
161
162(function TestUnicodeSurrogates() {
163 // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
164 function helper(r) {
165 assertTrue(r.test("foo\u{10e6d}bar"));
166 }
167 helper(/foo\ud803\ude6dbar/u);
168 helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
169})();
170
171
172(function AllFlags() {
173 // Test that we can pass all possible regexp flags and they work properly.
174 function helper1(r) {
175 assertTrue(r.global);
176 assertTrue(r.ignoreCase);
177 assertTrue(r.multiline);
178 assertTrue(r.sticky);
179 assertTrue(r.unicode);
180 }
181
182 helper1(/foo/gimyu);
183 helper1(new RegExp("foo", "gimyu"));
184
185 function helper2(r) {
186 assertFalse(r.global);
187 assertFalse(r.ignoreCase);
188 assertFalse(r.multiline);
189 assertFalse(r.sticky);
190 assertFalse(r.unicode);
191 }
192
193 helper2(/foo/);
194 helper2(new RegExp("foo"));
195})();
196
197
198(function DuplicatedFlags() {
199 // Test that duplicating the u flag is not allowed.
200 assertThrows("/foo/ugu");
201 assertThrows("new RegExp('foo', 'ugu')");
202})();
203
204
205(function ToString() {
206 // Test that the u flag is included in the string representation of regexps.
207 function helper(r) {
208 assertEquals(r.toString(), "/foo/u");
209 }
210 helper(/foo/u);
211 helper(new RegExp("foo", "u"));
212})();
213
214// Non-BMP patterns.
215// Single character atom.
216assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}"));
217assertTrue(/\u{12345}/u.test("\u{12345}"));
218assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45"));
219assertTrue(/\u{12345}/u.test("\ud808\udf45"));
220assertFalse(new RegExp("\u{12345}", "u").test("\udf45"));
221assertFalse(/\u{12345}/u.test("\udf45"));
222
223// Multi-character atom.
224assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b"));
225assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c"));
226assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b"));
227assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c"));
228
229// Disjunction.
230assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test(
231 "a\u{12345}\u{23456}b"));
232assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c"));
233assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test(
234 "a\udf45\u{23456}b"));
235assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c"));
236
237// Alternative.
238assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b"));
239assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c"));
240assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db"));
241assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c"));
242
243// Capture.
244assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
245 "\u{12345}b\u{12345}"));
246assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}"));
247assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
248 "\u{12345}b\u{23456}"));
249assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}"));
250
251// Quantifier.
252assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}"));
253assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}"));
254assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45"));
Ben Murdoch097c5b22016-05-18 11:27:45 +0100255assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45"));
256assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}"));
Ben Murdoch4a90d5f2016-03-22 12:00:34 +0000257assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45"));
258assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45"));
Ben Murdoch097c5b22016-05-18 11:27:45 +0100259
260// Literal surrogates.
261assertEquals(["\u{10000}\u{10000}"],
262 new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}"));
263assertEquals(["\u{10000}\u{10000}"],
264 new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}"));
265
266assertEquals(["\u{10003}\u{50001}"],
267 new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec(
268 "\u{10003}\u{50001}"));
269assertEquals(["\u{10003}\u{50001}"],
270 new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec(
271 "\u{10003}\u{50001}"));
272
273// Unicode escape sequences to represent a non-BMP character cannot have
274// mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence.
275assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u"));
276assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u"));
277assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}"));
278assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}"));
279
280assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}"));
281assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}"));
282assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}"));
283assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}"));
284
285assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00"));
286assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00"));
287assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00"));