Steve Block | d0582a6 | 2009-12-15 09:54:21 +0000 | [diff] [blame] | 1 | // Copyright 2009 the V8 project authors. All rights reserved. |
| 2 | // Redistribution and use in source and binary forms, with or without |
| 3 | // modification, are permitted provided that the following conditions are |
| 4 | // met: |
| 5 | // |
| 6 | // * Redistributions of source code must retain the above copyright |
| 7 | // notice, this list of conditions and the following disclaimer. |
| 8 | // * Redistributions in binary form must reproduce the above |
| 9 | // copyright notice, this list of conditions and the following |
| 10 | // disclaimer in the documentation and/or other materials provided |
| 11 | // with the distribution. |
| 12 | // * Neither the name of Google Inc. nor the names of its |
| 13 | // contributors may be used to endorse or promote products derived |
| 14 | // from this software without specific prior written permission. |
| 15 | // |
| 16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 20 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | |
| 28 | // Test Unicode character ranges in regexps. |
| 29 | |
| 30 | |
| 31 | // Cyrillic. |
| 32 | var cyrillic = { |
| 33 | FIRST: "\u0410", // A |
| 34 | first: "\u0430", // a |
| 35 | LAST: "\u042f", // YA |
| 36 | last: "\u044f", // ya |
| 37 | MIDDLE: "\u0427", // CHE |
| 38 | middle: "\u0447", // che |
| 39 | // Actually no characters are between the cases in Cyrillic. |
| 40 | BetweenCases: false}; |
| 41 | |
| 42 | var SIGMA = "\u03a3"; |
| 43 | var sigma = "\u03c3"; |
| 44 | var alternative_sigma = "\u03c2"; |
| 45 | |
| 46 | // Greek. |
| 47 | var greek = { |
| 48 | FIRST: "\u0391", // ALPHA |
| 49 | first: "\u03b1", // alpha |
| 50 | LAST: "\u03a9", // OMEGA |
| 51 | last: "\u03c9", // omega |
| 52 | MIDDLE: SIGMA, // SIGMA |
| 53 | middle: sigma, // sigma |
| 54 | // Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it |
| 55 | // is between OMEGA and alpha. |
| 56 | BetweenCases: "\u03ad"}; |
| 57 | |
| 58 | |
| 59 | function Range(from, to, flags) { |
| 60 | return new RegExp("[" + from + "-" + to + "]", flags); |
| 61 | } |
| 62 | |
| 63 | // Test Cyrillic and Greek separately. |
| 64 | for (var lang = 0; lang < 2; lang++) { |
| 65 | var chars = (lang == 0) ? cyrillic : greek; |
| 66 | |
| 67 | for (var i = 0; i < 2; i++) { |
| 68 | var lc = (i == 0); // Lower case. |
| 69 | var first = lc ? chars.first : chars.FIRST; |
| 70 | var middle = lc ? chars.middle : chars.MIDDLE; |
| 71 | var last = lc ? chars.last : chars.LAST; |
| 72 | var first_other_case = lc ? chars.FIRST : chars.first; |
| 73 | var middle_other_case = lc ? chars.MIDDLE : chars.middle; |
| 74 | var last_other_case = lc ? chars.LAST : chars.last; |
| 75 | |
| 76 | assertTrue(Range(first, last).test(first), 1); |
| 77 | assertTrue(Range(first, last).test(middle), 2); |
| 78 | assertTrue(Range(first, last).test(last), 3); |
| 79 | |
| 80 | assertFalse(Range(first, last).test(first_other_case), 4); |
| 81 | assertFalse(Range(first, last).test(middle_other_case), 5); |
| 82 | assertFalse(Range(first, last).test(last_other_case), 6); |
| 83 | |
| 84 | assertTrue(Range(first, last, "i").test(first), 7); |
| 85 | assertTrue(Range(first, last, "i").test(middle), 8); |
| 86 | assertTrue(Range(first, last, "i").test(last), 9); |
| 87 | |
| 88 | assertTrue(Range(first, last, "i").test(first_other_case), 10); |
| 89 | assertTrue(Range(first, last, "i").test(middle_other_case), 11); |
| 90 | assertTrue(Range(first, last, "i").test(last_other_case), 12); |
| 91 | |
| 92 | if (chars.BetweenCases) { |
| 93 | assertFalse(Range(first, last).test(chars.BetweenCases), 13); |
| 94 | assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14); |
| 95 | } |
| 96 | } |
| 97 | if (chars.BetweenCases) { |
| 98 | assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15); |
| 99 | assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | // Test range that covers both greek and cyrillic characters. |
| 104 | for (key in greek) { |
| 105 | assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key); |
| 106 | if (cyrillic[key]) { |
| 107 | assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key); |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | for (var i = 0; i < 2; i++) { |
| 112 | var ignore_case = (i == 0); |
| 113 | var flag = ignore_case ? "i" : ""; |
| 114 | assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19); |
| 115 | assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20); |
| 116 | assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21); |
| 117 | |
| 118 | assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22); |
| 119 | assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23); |
| 120 | assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24); |
| 121 | |
| 122 | // A range that covers the lower case greek letters and the upper case cyrillic |
| 123 | // letters. |
| 124 | assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25); |
| 125 | assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26); |
| 126 | assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27); |
| 127 | |
| 128 | assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28); |
| 129 | assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29); |
| 130 | assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30); |
| 131 | } |
| 132 | |
| 133 | |
| 134 | // Sigma is special because there are two lower case versions of the same upper |
| 135 | // case character. JS requires that case independece means that you should |
| 136 | // convert everything to upper case, so the two sigma variants are equal to each |
| 137 | // other in a case independt comparison. |
| 138 | for (var i = 0; i < 2; i++) { |
| 139 | var simple = (i != 0); |
| 140 | var name = simple ? "" : "[]"; |
| 141 | var regex = simple ? SIGMA : "[" + SIGMA + "]"; |
| 142 | |
| 143 | assertFalse(new RegExp(regex).test(sigma), 31 + name); |
| 144 | assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name); |
| 145 | assertTrue(new RegExp(regex).test(SIGMA), 33 + name); |
| 146 | |
| 147 | assertTrue(new RegExp(regex, "i").test(sigma), 34 + name); |
| 148 | // JSC and Tracemonkey fail this one. |
| 149 | assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name); |
| 150 | assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name); |
| 151 | |
| 152 | regex = simple ? sigma : "[" + sigma + "]"; |
| 153 | |
| 154 | assertTrue(new RegExp(regex).test(sigma), 41 + name); |
| 155 | assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name); |
| 156 | assertFalse(new RegExp(regex).test(SIGMA), 43 + name); |
| 157 | |
| 158 | assertTrue(new RegExp(regex, "i").test(sigma), 44 + name); |
| 159 | // JSC and Tracemonkey fail this one. |
| 160 | assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name); |
| 161 | assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name); |
| 162 | |
| 163 | regex = simple ? alternative_sigma : "[" + alternative_sigma + "]"; |
| 164 | |
| 165 | assertFalse(new RegExp(regex).test(sigma), 51 + name); |
| 166 | assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name); |
| 167 | assertFalse(new RegExp(regex).test(SIGMA), 53 + name); |
| 168 | |
| 169 | // JSC and Tracemonkey fail this one. |
| 170 | assertTrue(new RegExp(regex, "i").test(sigma), 54 + name); |
| 171 | assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name); |
| 172 | // JSC and Tracemonkey fail this one. |
| 173 | assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name); |
| 174 | } |
| 175 | |
| 176 | |
| 177 | for (var add_non_ascii_character_to_subject = 0; |
| 178 | add_non_ascii_character_to_subject < 2; |
| 179 | add_non_ascii_character_to_subject++) { |
| 180 | var suffix = add_non_ascii_character_to_subject ? "\ufffe" : ""; |
| 181 | // A range that covers both ASCII and non-ASCII. |
| 182 | for (var i = 0; i < 2; i++) { |
| 183 | var full = (i != 0); |
| 184 | var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]"; |
| 185 | var f = full ? "f" : "c"; |
| 186 | for (var j = 0; j < 2; j++) { |
| 187 | var ignore_case = (j == 0); |
| 188 | var flag = ignore_case ? "i" : ""; |
| 189 | var re = new RegExp(mixed, flag); |
Ben Murdoch | 257744e | 2011-11-30 15:57:28 +0000 | [diff] [blame] | 190 | var expected = |
| 191 | ignore_case || (full && !!add_non_ascii_character_to_subject); |
| 192 | assertEquals(expected, re.test("A" + suffix), 58 + flag + f); |
Steve Block | d0582a6 | 2009-12-15 09:54:21 +0000 | [diff] [blame] | 193 | assertTrue(re.test("a" + suffix), 59 + flag + f); |
| 194 | assertTrue(re.test("~" + suffix), 60 + flag + f); |
| 195 | assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f); |
| 196 | assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f); |
| 197 | } |
| 198 | } |
| 199 | } |