Steve Block | a7e24c1 | 2009-10-30 11:49:00 +0000 | [diff] [blame^] | 1 | // Copyright 2008 the V8 project authors. All rights reserved. |
| 2 | // Redistribution and use in source and binary forms, with or without |
| 3 | // modification, are permitted provided that the following conditions are |
| 4 | // met: |
| 5 | // |
| 6 | // * Redistributions of source code must retain the above copyright |
| 7 | // notice, this list of conditions and the following disclaimer. |
| 8 | // * Redistributions in binary form must reproduce the above |
| 9 | // copyright notice, this list of conditions and the following |
| 10 | // disclaimer in the documentation and/or other materials provided |
| 11 | // with the distribution. |
| 12 | // * Neither the name of Google Inc. nor the names of its |
| 13 | // contributors may be used to endorse or promote products derived |
| 14 | // from this software without specific prior written permission. |
| 15 | // |
| 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | |
| 28 | function testEscape(str, regex) { |
| 29 | assertEquals("foo:bar:baz", str.split(regex).join(":")); |
| 30 | } |
| 31 | |
| 32 | testEscape("foo\nbar\nbaz", /\n/); |
| 33 | testEscape("foo bar baz", /\s/); |
| 34 | testEscape("foo\tbar\tbaz", /\s/); |
| 35 | testEscape("foo-bar-baz", /\u002D/); |
| 36 | |
| 37 | // Test containing null char in regexp. |
| 38 | var s = '[' + String.fromCharCode(0) + ']'; |
| 39 | var re = new RegExp(s); |
| 40 | assertEquals(s.match(re).length, 1); |
| 41 | assertEquals(s.match(re)[0], String.fromCharCode(0)); |
| 42 | |
| 43 | // Test strings containing all line separators |
| 44 | s = 'aA\nbB\rcC\r\ndD\u2028eE\u2029fF'; |
| 45 | re = /^./gm; // any non-newline character at the beginning of a line |
| 46 | var result = s.match(re); |
| 47 | assertEquals(result.length, 6); |
| 48 | assertEquals(result[0], 'a'); |
| 49 | assertEquals(result[1], 'b'); |
| 50 | assertEquals(result[2], 'c'); |
| 51 | assertEquals(result[3], 'd'); |
| 52 | assertEquals(result[4], 'e'); |
| 53 | assertEquals(result[5], 'f'); |
| 54 | |
| 55 | re = /.$/gm; // any non-newline character at the end of a line |
| 56 | result = s.match(re); |
| 57 | assertEquals(result.length, 6); |
| 58 | assertEquals(result[0], 'A'); |
| 59 | assertEquals(result[1], 'B'); |
| 60 | assertEquals(result[2], 'C'); |
| 61 | assertEquals(result[3], 'D'); |
| 62 | assertEquals(result[4], 'E'); |
| 63 | assertEquals(result[5], 'F'); |
| 64 | |
| 65 | re = /^[^]/gm; // *any* character at the beginning of a line |
| 66 | result = s.match(re); |
| 67 | assertEquals(result.length, 7); |
| 68 | assertEquals(result[0], 'a'); |
| 69 | assertEquals(result[1], 'b'); |
| 70 | assertEquals(result[2], 'c'); |
| 71 | assertEquals(result[3], '\n'); |
| 72 | assertEquals(result[4], 'd'); |
| 73 | assertEquals(result[5], 'e'); |
| 74 | assertEquals(result[6], 'f'); |
| 75 | |
| 76 | re = /[^]$/gm; // *any* character at the end of a line |
| 77 | result = s.match(re); |
| 78 | assertEquals(result.length, 7); |
| 79 | assertEquals(result[0], 'A'); |
| 80 | assertEquals(result[1], 'B'); |
| 81 | assertEquals(result[2], 'C'); |
| 82 | assertEquals(result[3], '\r'); |
| 83 | assertEquals(result[4], 'D'); |
| 84 | assertEquals(result[5], 'E'); |
| 85 | assertEquals(result[6], 'F'); |
| 86 | |
| 87 | // Some tests from the Mozilla tests, where our behavior differs from |
| 88 | // SpiderMonkey. |
| 89 | // From ecma_3/RegExp/regress-334158.js |
| 90 | assertTrue(/\ca/.test( "\x01" )); |
| 91 | assertFalse(/\ca/.test( "\\ca" )); |
| 92 | // Passes in KJS, fails in IrregularExpressions. |
| 93 | // See http://code.google.com/p/v8/issues/detail?id=152 |
| 94 | //assertTrue(/\c[a/]/.test( "\x1ba/]" )); |
| 95 | |
| 96 | |
| 97 | // Test \c in character class |
| 98 | re = /^[\cM]$/; |
| 99 | assertTrue(re.test("\r")); |
| 100 | assertFalse(re.test("M")); |
| 101 | assertFalse(re.test("c")); |
| 102 | assertFalse(re.test("\\")); |
| 103 | assertFalse(re.test("\x03")); // I.e., read as \cc |
| 104 | |
| 105 | re = /^[\c]]$/; |
| 106 | assertTrue(re.test("c]")); |
| 107 | assertFalse(re.test("\\]")); |
| 108 | assertFalse(re.test("\x1d")); // ']' & 0x1f |
| 109 | assertFalse(re.test("\\]")); |
| 110 | assertFalse(re.test("\x03]")); // I.e., read as \cc |
| 111 | |
| 112 | |
| 113 | // Test that we handle \s and \S correctly inside some bizarre |
| 114 | // character classes. |
| 115 | re = /[\s-:]/; |
| 116 | assertTrue(re.test('-')); |
| 117 | assertTrue(re.test(':')); |
| 118 | assertTrue(re.test(' ')); |
| 119 | assertTrue(re.test('\t')); |
| 120 | assertTrue(re.test('\n')); |
| 121 | assertFalse(re.test('a')); |
| 122 | assertFalse(re.test('Z')); |
| 123 | |
| 124 | re = /[\S-:]/; |
| 125 | assertTrue(re.test('-')); |
| 126 | assertTrue(re.test(':')); |
| 127 | assertFalse(re.test(' ')); |
| 128 | assertFalse(re.test('\t')); |
| 129 | assertFalse(re.test('\n')); |
| 130 | assertTrue(re.test('a')); |
| 131 | assertTrue(re.test('Z')); |
| 132 | |
| 133 | re = /[^\s-:]/; |
| 134 | assertFalse(re.test('-')); |
| 135 | assertFalse(re.test(':')); |
| 136 | assertFalse(re.test(' ')); |
| 137 | assertFalse(re.test('\t')); |
| 138 | assertFalse(re.test('\n')); |
| 139 | assertTrue(re.test('a')); |
| 140 | assertTrue(re.test('Z')); |
| 141 | |
| 142 | re = /[^\S-:]/; |
| 143 | assertFalse(re.test('-')); |
| 144 | assertFalse(re.test(':')); |
| 145 | assertTrue(re.test(' ')); |
| 146 | assertTrue(re.test('\t')); |
| 147 | assertTrue(re.test('\n')); |
| 148 | assertFalse(re.test('a')); |
| 149 | assertFalse(re.test('Z')); |
| 150 | |
| 151 | re = /[\s]/; |
| 152 | assertFalse(re.test('-')); |
| 153 | assertFalse(re.test(':')); |
| 154 | assertTrue(re.test(' ')); |
| 155 | assertTrue(re.test('\t')); |
| 156 | assertTrue(re.test('\n')); |
| 157 | assertFalse(re.test('a')); |
| 158 | assertFalse(re.test('Z')); |
| 159 | |
| 160 | re = /[^\s]/; |
| 161 | assertTrue(re.test('-')); |
| 162 | assertTrue(re.test(':')); |
| 163 | assertFalse(re.test(' ')); |
| 164 | assertFalse(re.test('\t')); |
| 165 | assertFalse(re.test('\n')); |
| 166 | assertTrue(re.test('a')); |
| 167 | assertTrue(re.test('Z')); |
| 168 | |
| 169 | re = /[\S]/; |
| 170 | assertTrue(re.test('-')); |
| 171 | assertTrue(re.test(':')); |
| 172 | assertFalse(re.test(' ')); |
| 173 | assertFalse(re.test('\t')); |
| 174 | assertFalse(re.test('\n')); |
| 175 | assertTrue(re.test('a')); |
| 176 | assertTrue(re.test('Z')); |
| 177 | |
| 178 | re = /[^\S]/; |
| 179 | assertFalse(re.test('-')); |
| 180 | assertFalse(re.test(':')); |
| 181 | assertTrue(re.test(' ')); |
| 182 | assertTrue(re.test('\t')); |
| 183 | assertTrue(re.test('\n')); |
| 184 | assertFalse(re.test('a')); |
| 185 | assertFalse(re.test('Z')); |
| 186 | |
| 187 | re = /[\s\S]/; |
| 188 | assertTrue(re.test('-')); |
| 189 | assertTrue(re.test(':')); |
| 190 | assertTrue(re.test(' ')); |
| 191 | assertTrue(re.test('\t')); |
| 192 | assertTrue(re.test('\n')); |
| 193 | assertTrue(re.test('a')); |
| 194 | assertTrue(re.test('Z')); |
| 195 | |
| 196 | re = /[^\s\S]/; |
| 197 | assertFalse(re.test('-')); |
| 198 | assertFalse(re.test(':')); |
| 199 | assertFalse(re.test(' ')); |
| 200 | assertFalse(re.test('\t')); |
| 201 | assertFalse(re.test('\n')); |
| 202 | assertFalse(re.test('a')); |
| 203 | assertFalse(re.test('Z')); |
| 204 | |
| 205 | // Test beginning and end of line assertions with or without the |
| 206 | // multiline flag. |
| 207 | re = /^\d+/; |
| 208 | assertFalse(re.test("asdf\n123")); |
| 209 | re = /^\d+/m; |
| 210 | assertTrue(re.test("asdf\n123")); |
| 211 | |
| 212 | re = /\d+$/; |
| 213 | assertFalse(re.test("123\nasdf")); |
| 214 | re = /\d+$/m; |
| 215 | assertTrue(re.test("123\nasdf")); |
| 216 | |
| 217 | // Test that empty matches are handled correctly for multiline global |
| 218 | // regexps. |
| 219 | re = /^(.*)/mg; |
| 220 | assertEquals(3, "a\n\rb".match(re).length); |
| 221 | assertEquals("*a\n*b\r*c\n*\r*d\r*\n*e", "a\nb\rc\n\rd\r\ne".replace(re, "*$1")); |
| 222 | |
| 223 | // Test that empty matches advance one character |
| 224 | re = new RegExp("", "g"); |
| 225 | assertEquals("xAx", "A".replace(re, "x")); |
| 226 | assertEquals(3, String.fromCharCode(161).replace(re, "x").length); |
| 227 | |
| 228 | // Test that we match the KJS behavior with regard to undefined constructor |
| 229 | // arguments: |
| 230 | re = new RegExp(); |
| 231 | // KJS actually shows this as '//'. Here we match the Firefox behavior (ie, |
| 232 | // giving a syntactically legal regexp literal). |
| 233 | assertEquals('/(?:)/', re.toString()); |
| 234 | re = new RegExp(void 0); |
| 235 | assertEquals('/(?:)/', re.toString()); |
| 236 | re.compile(); |
| 237 | assertEquals('/(?:)/', re.toString()); |
| 238 | re.compile(void 0); |
| 239 | assertEquals('/undefined/', re.toString()); |
| 240 | |
| 241 | |
| 242 | // Check for lazy RegExp literal creation |
| 243 | function lazyLiteral(doit) { |
| 244 | if (doit) return "".replace(/foo(/gi, ""); |
| 245 | return true; |
| 246 | } |
| 247 | |
| 248 | assertTrue(lazyLiteral(false)); |
| 249 | assertThrows("lazyLiteral(true)"); |
| 250 | |
| 251 | // Check $01 and $10 |
| 252 | re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)"); |
| 253 | assertEquals("t", "123456789t".replace(re, "$10"), "$10"); |
| 254 | assertEquals("15", "123456789t".replace(re, "$15"), "$10"); |
| 255 | assertEquals("1", "123456789t".replace(re, "$01"), "$01"); |
| 256 | assertEquals("$001", "123456789t".replace(re, "$001"), "$001"); |
| 257 | re = new RegExp("foo(.)"); |
| 258 | assertEquals("bar$0", "foox".replace(re, "bar$0"), "$0"); |
| 259 | assertEquals("bar$00", "foox".replace(re, "bar$00"), "$00"); |
| 260 | assertEquals("bar$000", "foox".replace(re, "bar$000"), "$000"); |
| 261 | assertEquals("barx", "foox".replace(re, "bar$01"), "$01 2"); |
| 262 | assertEquals("barx5", "foox".replace(re, "bar$15"), "$15"); |
| 263 | |
| 264 | assertFalse(/()foo$\1/.test("football"), "football1"); |
| 265 | assertFalse(/foo$(?=ball)/.test("football"), "football2"); |
| 266 | assertFalse(/foo$(?!bar)/.test("football"), "football3"); |
| 267 | assertTrue(/()foo$\1/.test("foo"), "football4"); |
| 268 | assertTrue(/foo$(?=(ball)?)/.test("foo"), "football5"); |
| 269 | assertTrue(/()foo$(?!bar)/.test("foo"), "football6"); |
| 270 | assertFalse(/(x?)foo$\1/.test("football"), "football7"); |
| 271 | assertFalse(/foo$(?=ball)/.test("football"), "football8"); |
| 272 | assertFalse(/foo$(?!bar)/.test("football"), "football9"); |
| 273 | assertTrue(/(x?)foo$\1/.test("foo"), "football10"); |
| 274 | assertTrue(/foo$(?=(ball)?)/.test("foo"), "football11"); |
| 275 | assertTrue(/foo$(?!bar)/.test("foo"), "football12"); |
| 276 | |
| 277 | // Check that the back reference has two successors. See |
| 278 | // BackReferenceNode::PropagateForward. |
| 279 | assertFalse(/f(o)\b\1/.test('foo')); |
| 280 | assertTrue(/f(o)\B\1/.test('foo')); |
| 281 | |
| 282 | // Back-reference, ignore case: |
| 283 | // ASCII |
| 284 | assertEquals("xaAx,a", String(/x(a)\1x/i.exec("xaAx")), "backref-ASCII"); |
| 285 | assertFalse(/x(...)\1/i.test("xaaaaa"), "backref-ASCII-short"); |
| 286 | assertTrue(/x((?:))\1\1x/i.test("xx"), "backref-ASCII-empty"); |
| 287 | assertTrue(/x(?:...|(...))\1x/i.test("xabcx"), "backref-ASCII-uncaptured"); |
| 288 | assertTrue(/x(?:...|(...))\1x/i.test("xabcABCx"), "backref-ASCII-backtrack"); |
| 289 | assertEquals("xaBcAbCABCx,aBc", |
| 290 | String(/x(...)\1\1x/i.exec("xaBcAbCABCx")), |
| 291 | "backref-ASCII-twice"); |
| 292 | |
| 293 | for (var i = 0; i < 128; i++) { |
| 294 | var testName = "backref-ASCII-char-" + i + "," + (i^0x20); |
| 295 | var test = /^(.)\1$/i.test(String.fromCharCode(i, i ^ 0x20)) |
| 296 | var c = String.fromCharCode(i); |
| 297 | if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) { |
| 298 | assertTrue(test, testName); |
| 299 | } else { |
| 300 | assertFalse(test, testName); |
| 301 | } |
| 302 | } |
| 303 | |
| 304 | assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end"); |
| 305 | |
| 306 | // Check decimal escapes doesn't overflow. |
| 307 | // (Note: \214 is interpreted as octal). |
| 308 | assertEquals(/\2147483648/.exec("\x8c7483648"), |
| 309 | ["\x8c7483648"], |
| 310 | "Overflow decimal escape"); |
| 311 | |
| 312 | |
| 313 | // Check numbers in quantifiers doesn't overflow and doesn't throw on |
| 314 | // too large numbers. |
| 315 | assertFalse(/a{111111111111111111111111111111111111111111111}/.test('b'), |
| 316 | "overlarge1"); |
| 317 | assertFalse(/a{999999999999999999999999999999999999999999999}/.test('b'), |
| 318 | "overlarge2"); |
| 319 | assertFalse(/a{1,111111111111111111111111111111111111111111111}/.test('b'), |
| 320 | "overlarge3"); |
| 321 | assertFalse(/a{1,999999999999999999999999999999999999999999999}/.test('b'), |
| 322 | "overlarge4"); |
| 323 | assertFalse(/a{2147483648}/.test('b'), |
| 324 | "overlarge5"); |
| 325 | assertFalse(/a{21474836471}/.test('b'), |
| 326 | "overlarge6"); |
| 327 | assertFalse(/a{1,2147483648}/.test('b'), |
| 328 | "overlarge7"); |
| 329 | assertFalse(/a{1,21474836471}/.test('b'), |
| 330 | "overlarge8"); |
| 331 | assertFalse(/a{2147483648,2147483648}/.test('b'), |
| 332 | "overlarge9"); |
| 333 | assertFalse(/a{21474836471,21474836471}/.test('b'), |
| 334 | "overlarge10"); |
| 335 | assertFalse(/a{2147483647}/.test('b'), |
| 336 | "overlarge11"); |
| 337 | assertFalse(/a{1,2147483647}/.test('b'), |
| 338 | "overlarge12"); |
| 339 | assertTrue(/a{1,2147483647}/.test('a'), |
| 340 | "overlarge13"); |
| 341 | assertFalse(/a{2147483647,2147483647}/.test('a'), |
| 342 | "overlarge14"); |
| 343 | |
| 344 | |
| 345 | // Check that we don't read past the end of the string. |
| 346 | assertFalse(/f/.test('b')); |
| 347 | assertFalse(/[abc]f/.test('x')); |
| 348 | assertFalse(/[abc]f/.test('xa')); |
| 349 | assertFalse(/[abc]</.test('x')); |
| 350 | assertFalse(/[abc]</.test('xa')); |
| 351 | assertFalse(/f/i.test('b')); |
| 352 | assertFalse(/[abc]f/i.test('x')); |
| 353 | assertFalse(/[abc]f/i.test('xa')); |
| 354 | assertFalse(/[abc]</i.test('x')); |
| 355 | assertFalse(/[abc]</i.test('xa')); |
| 356 | assertFalse(/f[abc]/.test('x')); |
| 357 | assertFalse(/f[abc]/.test('xa')); |
| 358 | assertFalse(/<[abc]/.test('x')); |
| 359 | assertFalse(/<[abc]/.test('xa')); |
| 360 | assertFalse(/f[abc]/i.test('x')); |
| 361 | assertFalse(/f[abc]/i.test('xa')); |
| 362 | assertFalse(/<[abc]/i.test('x')); |
| 363 | assertFalse(/<[abc]/i.test('xa')); |
| 364 | |
| 365 | // Test that merging of quick test masks gets it right. |
| 366 | assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt'); |
| 367 | assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2'); |
| 368 | assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3'); |
| 369 | assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4'); |
| 370 | assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5'); |
| 371 | assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6'); |
| 372 | assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7'); |
| 373 | assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8'); |
| 374 | |
| 375 | |
| 376 | // Don't hang on this one. |
| 377 | /[^\xfe-\xff]*/.test(""); |
| 378 | |
| 379 | |
| 380 | var long = "a"; |
| 381 | for (var i = 0; i < 100000; i++) { |
| 382 | long = "a?" + long; |
| 383 | } |
| 384 | // Don't crash on this one, but maybe throw an exception. |
| 385 | try { |
| 386 | RegExp(long).exec("a"); |
| 387 | } catch (e) { |
| 388 | assertTrue(String(e).indexOf("Stack overflow") >= 0, "overflow"); |
| 389 | } |
| 390 | |