Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 1 | // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // ES6 extends the \uxxxx escape and also allows \u{xxxxx}. |
| 6 | |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 7 | function testRegexpHelper(r) { |
| 8 | assertTrue(r.test("foo")); |
| 9 | assertTrue(r.test("boo")); |
| 10 | assertFalse(r.test("moo")); |
| 11 | } |
| 12 | |
| 13 | |
| 14 | (function TestUnicodeEscapes() { |
| 15 | testRegexpHelper(/(\u0066|\u0062)oo/); |
| 16 | testRegexpHelper(/(\u0066|\u0062)oo/u); |
| 17 | testRegexpHelper(/(\u{0066}|\u{0062})oo/u); |
| 18 | testRegexpHelper(/(\u{66}|\u{000062})oo/u); |
| 19 | |
| 20 | // Note that we need \\ inside a string, otherwise it's interpreted as a |
| 21 | // unicode escape inside a string. |
| 22 | testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo")); |
| 23 | testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u")); |
| 24 | testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u")); |
| 25 | testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u")); |
| 26 | |
| 27 | // Though, unicode escapes via strings should work too. |
| 28 | testRegexpHelper(new RegExp("(\u0066|\u0062)oo")); |
| 29 | testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u")); |
| 30 | testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u")); |
| 31 | testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u")); |
| 32 | })(); |
| 33 | |
| 34 | |
| 35 | (function TestUnicodeEscapesInCharacterClasses() { |
| 36 | testRegexpHelper(/[\u0062-\u0066]oo/); |
| 37 | testRegexpHelper(/[\u0062-\u0066]oo/u); |
| 38 | testRegexpHelper(/[\u{0062}-\u{0066}]oo/u); |
| 39 | testRegexpHelper(/[\u{62}-\u{00000066}]oo/u); |
| 40 | |
| 41 | // Note that we need \\ inside a string, otherwise it's interpreted as a |
| 42 | // unicode escape inside a string. |
| 43 | testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo")); |
| 44 | testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u")); |
| 45 | testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u")); |
| 46 | testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u")); |
| 47 | |
| 48 | // Though, unicode escapes via strings should work too. |
| 49 | testRegexpHelper(new RegExp("[\u0062-\u0066]oo")); |
| 50 | testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u")); |
| 51 | testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u")); |
| 52 | testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u")); |
| 53 | })(); |
| 54 | |
| 55 | |
| 56 | (function TestBraceEscapesWithoutUnicodeFlag() { |
| 57 | // \u followed by illegal escape will be parsed as u. {x} will be the |
| 58 | // character count. |
| 59 | function helper1(r) { |
| 60 | assertFalse(r.test("fbar")); |
| 61 | assertFalse(r.test("fubar")); |
| 62 | assertTrue(r.test("fuubar")); |
| 63 | assertFalse(r.test("fuuubar")); |
| 64 | } |
| 65 | helper1(/f\u{2}bar/); |
| 66 | helper1(new RegExp("f\\u{2}bar")); |
| 67 | |
| 68 | function helper2(r) { |
| 69 | assertFalse(r.test("fbar")); |
| 70 | assertTrue(r.test("fubar")); |
| 71 | assertTrue(r.test("fuubar")); |
| 72 | assertFalse(r.test("fuuubar")); |
| 73 | } |
| 74 | |
| 75 | helper2(/f\u{1,2}bar/); |
| 76 | helper2(new RegExp("f\\u{1,2}bar")); |
| 77 | |
| 78 | function helper3(r) { |
| 79 | assertTrue(r.test("u")); |
| 80 | assertTrue(r.test("{")); |
| 81 | assertTrue(r.test("2")); |
| 82 | assertTrue(r.test("}")); |
| 83 | assertFalse(r.test("q")); |
| 84 | assertFalse(r.test("(")); |
| 85 | assertFalse(r.test(")")); |
| 86 | } |
| 87 | helper3(/[\u{2}]/); |
| 88 | helper3(new RegExp("[\\u{2}]")); |
| 89 | })(); |
| 90 | |
| 91 | |
| 92 | (function TestInvalidEscapes() { |
| 93 | // Without the u flag, invalid unicode escapes and other invalid escapes are |
| 94 | // treated as identity escapes. |
| 95 | function helper1(r) { |
| 96 | assertTrue(r.test("firstuxz89second")); |
| 97 | } |
| 98 | helper1(/first\u\x\z\8\9second/); |
| 99 | helper1(new RegExp("first\\u\\x\\z\\8\\9second")); |
| 100 | |
| 101 | function helper2(r) { |
| 102 | assertTrue(r.test("u")); |
| 103 | assertTrue(r.test("x")); |
| 104 | assertTrue(r.test("z")); |
| 105 | assertTrue(r.test("8")); |
| 106 | assertTrue(r.test("9")); |
| 107 | assertFalse(r.test("q")); |
| 108 | assertFalse(r.test("7")); |
| 109 | } |
| 110 | helper2(/[\u\x\z\8\9]/); |
| 111 | helper2(new RegExp("[\\u\\x\\z\\8\\9]")); |
| 112 | |
| 113 | // However, with the u flag, these are treated as invalid escapes. |
| 114 | assertThrows("/\\u/u", SyntaxError); |
| 115 | assertThrows("/\\u12/u", SyntaxError); |
| 116 | assertThrows("/\\ufoo/u", SyntaxError); |
| 117 | assertThrows("/\\x/u", SyntaxError); |
| 118 | assertThrows("/\\xfoo/u", SyntaxError); |
| 119 | assertThrows("/\\z/u", SyntaxError); |
| 120 | assertThrows("/\\8/u", SyntaxError); |
| 121 | assertThrows("/\\9/u", SyntaxError); |
| 122 | |
| 123 | assertThrows("new RegExp('\\\\u', 'u')", SyntaxError); |
| 124 | assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError); |
| 125 | assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError); |
| 126 | assertThrows("new RegExp('\\\\x', 'u')", SyntaxError); |
| 127 | assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError); |
| 128 | assertThrows("new RegExp('\\\\z', 'u')", SyntaxError); |
| 129 | assertThrows("new RegExp('\\\\8', 'u')", SyntaxError); |
| 130 | assertThrows("new RegExp('\\\\9', 'u')", SyntaxError); |
| 131 | })(); |
| 132 | |
| 133 | |
| 134 | (function TestTooBigHexEscape() { |
| 135 | // The hex number inside \u{} has a maximum value. |
| 136 | /\u{10ffff}/u |
| 137 | new RegExp("\\u{10ffff}", "u") |
| 138 | assertThrows("/\\u{110000}/u", SyntaxError); |
| 139 | assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError); |
| 140 | |
| 141 | // Without the u flag, they're of course fine ({x} is the count). |
| 142 | /\u{110000}/ |
| 143 | new RegExp("\\u{110000}") |
| 144 | })(); |
| 145 | |
| 146 | |
| 147 | (function TestSyntaxEscapes() { |
| 148 | // Syntax escapes work the same with or without the u flag. |
| 149 | function helper(r) { |
| 150 | assertTrue(r.test("foo[bar")); |
| 151 | assertFalse(r.test("foo]bar")); |
| 152 | } |
| 153 | helper(/foo\[bar/); |
| 154 | helper(new RegExp("foo\\[bar")); |
| 155 | helper(/foo\[bar/u); |
| 156 | helper(new RegExp("foo\\[bar", "u")); |
| 157 | })(); |
| 158 | |
| 159 | |
| 160 | (function TestUnicodeSurrogates() { |
| 161 | // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D]. |
| 162 | function helper(r) { |
| 163 | assertTrue(r.test("foo\u{10e6d}bar")); |
| 164 | } |
| 165 | helper(/foo\ud803\ude6dbar/u); |
| 166 | helper(new RegExp("foo\\ud803\\ude6dbar", "u")); |
| 167 | })(); |
| 168 | |
| 169 | |
| 170 | (function AllFlags() { |
| 171 | // Test that we can pass all possible regexp flags and they work properly. |
| 172 | function helper1(r) { |
| 173 | assertTrue(r.global); |
| 174 | assertTrue(r.ignoreCase); |
| 175 | assertTrue(r.multiline); |
| 176 | assertTrue(r.sticky); |
| 177 | assertTrue(r.unicode); |
| 178 | } |
| 179 | |
| 180 | helper1(/foo/gimyu); |
| 181 | helper1(new RegExp("foo", "gimyu")); |
| 182 | |
| 183 | function helper2(r) { |
| 184 | assertFalse(r.global); |
| 185 | assertFalse(r.ignoreCase); |
| 186 | assertFalse(r.multiline); |
| 187 | assertFalse(r.sticky); |
| 188 | assertFalse(r.unicode); |
| 189 | } |
| 190 | |
| 191 | helper2(/foo/); |
| 192 | helper2(new RegExp("foo")); |
| 193 | })(); |
| 194 | |
| 195 | |
| 196 | (function DuplicatedFlags() { |
| 197 | // Test that duplicating the u flag is not allowed. |
| 198 | assertThrows("/foo/ugu"); |
| 199 | assertThrows("new RegExp('foo', 'ugu')"); |
| 200 | })(); |
| 201 | |
| 202 | |
| 203 | (function ToString() { |
| 204 | // Test that the u flag is included in the string representation of regexps. |
| 205 | function helper(r) { |
| 206 | assertEquals(r.toString(), "/foo/u"); |
| 207 | } |
| 208 | helper(/foo/u); |
| 209 | helper(new RegExp("foo", "u")); |
| 210 | })(); |
| 211 | |
| 212 | // Non-BMP patterns. |
| 213 | // Single character atom. |
| 214 | assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}")); |
| 215 | assertTrue(/\u{12345}/u.test("\u{12345}")); |
| 216 | assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45")); |
| 217 | assertTrue(/\u{12345}/u.test("\ud808\udf45")); |
| 218 | assertFalse(new RegExp("\u{12345}", "u").test("\udf45")); |
| 219 | assertFalse(/\u{12345}/u.test("\udf45")); |
| 220 | |
| 221 | // Multi-character atom. |
| 222 | assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b")); |
| 223 | assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c")); |
| 224 | assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b")); |
| 225 | assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c")); |
| 226 | |
| 227 | // Disjunction. |
| 228 | assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test( |
| 229 | "a\u{12345}\u{23456}b")); |
| 230 | assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c")); |
| 231 | assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test( |
| 232 | "a\udf45\u{23456}b")); |
| 233 | assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c")); |
| 234 | |
| 235 | // Alternative. |
| 236 | assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b")); |
| 237 | assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c")); |
| 238 | assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db")); |
| 239 | assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c")); |
| 240 | |
| 241 | // Capture. |
| 242 | assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( |
| 243 | "\u{12345}b\u{12345}")); |
| 244 | assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}")); |
| 245 | assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test( |
| 246 | "\u{12345}b\u{23456}")); |
| 247 | assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}")); |
| 248 | |
| 249 | // Quantifier. |
| 250 | assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}")); |
| 251 | assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}")); |
| 252 | assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45")); |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 253 | assertFalse(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45")); |
| 254 | assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\u{12345}\u{12345}")); |
Ben Murdoch | 4a90d5f | 2016-03-22 12:00:34 +0000 | [diff] [blame] | 255 | assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45")); |
| 256 | assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45")); |
Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 257 | |
| 258 | // Literal surrogates. |
| 259 | assertEquals(["\u{10000}\u{10000}"], |
| 260 | new RegExp("\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); |
| 261 | assertEquals(["\u{10000}\u{10000}"], |
| 262 | new RegExp("\\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); |
| 263 | |
| 264 | assertEquals(["\u{10003}\u{50001}"], |
| 265 | new RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", "u").exec( |
| 266 | "\u{10003}\u{50001}")); |
| 267 | assertEquals(["\u{10003}\u{50001}"], |
| 268 | new RegExp("[\ud800\udc03-\u{50001}\]+", "u").exec( |
| 269 | "\u{10003}\u{50001}")); |
| 270 | |
| 271 | // Unicode escape sequences to represent a non-BMP character cannot have |
| 272 | // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence. |
| 273 | assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); |
| 274 | assertThrows(() => new RegExp("[\\ud800\udc03-\ud900\\udc01\]+", "u")); |
| 275 | assertNull(new RegExp("\\ud800\udc00+", "u").exec("\u{10000}\u{10000}")); |
| 276 | assertNull(new RegExp("\ud800\\udc00+", "u").exec("\u{10000}\u{10000}")); |
| 277 | |
| 278 | assertNull(new RegExp("[\\ud800\udc00]", "u").exec("\u{10000}")); |
| 279 | assertNull(new RegExp("[\\{ud800}\udc00]", "u").exec("\u{10000}")); |
| 280 | assertNull(new RegExp("[\ud800\\udc00]", "u").exec("\u{10000}")); |
| 281 | assertNull(new RegExp("[\ud800\\{udc00}]", "u").exec("\u{10000}")); |
| 282 | |
| 283 | assertNull(/\u{d800}\u{dc00}+/u.exec("\ud800\udc00\udc00")); |
| 284 | assertNull(/\ud800\u{dc00}+/u.exec("\ud800\udc00\udc00")); |
| 285 | assertNull(/\u{d800}\udc00+/u.exec("\ud800\udc00\udc00")); |