blob: 9b21c4f8ecaf573e1f5c6a835b5ae301d004b45e [file] [log] [blame]
Steve Blockd0582a62009-12-15 09:54:21 +00001// Copyright 2009 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6// * Redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer.
8// * Redistributions in binary form must reproduce the above
9// copyright notice, this list of conditions and the following
10// disclaimer in the documentation and/or other materials provided
11// with the distribution.
12// * Neither the name of Google Inc. nor the names of its
13// contributors may be used to endorse or promote products derived
14// from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28// Test Unicode character ranges in regexps.
29
30
31// Cyrillic.
32var cyrillic = {
33 FIRST: "\u0410", // A
34 first: "\u0430", // a
35 LAST: "\u042f", // YA
36 last: "\u044f", // ya
37 MIDDLE: "\u0427", // CHE
38 middle: "\u0447", // che
39 // Actually no characters are between the cases in Cyrillic.
40 BetweenCases: false};
41
42var SIGMA = "\u03a3";
43var sigma = "\u03c3";
44var alternative_sigma = "\u03c2";
45
46// Greek.
47var greek = {
48 FIRST: "\u0391", // ALPHA
49 first: "\u03b1", // alpha
50 LAST: "\u03a9", // OMEGA
51 last: "\u03c9", // omega
52 MIDDLE: SIGMA, // SIGMA
53 middle: sigma, // sigma
54 // Epsilon acute is between ALPHA-OMEGA and alpha-omega, ie it
55 // is between OMEGA and alpha.
56 BetweenCases: "\u03ad"};
57
58
59function Range(from, to, flags) {
60 return new RegExp("[" + from + "-" + to + "]", flags);
61}
62
63// Test Cyrillic and Greek separately.
64for (var lang = 0; lang < 2; lang++) {
65 var chars = (lang == 0) ? cyrillic : greek;
66
67 for (var i = 0; i < 2; i++) {
68 var lc = (i == 0); // Lower case.
69 var first = lc ? chars.first : chars.FIRST;
70 var middle = lc ? chars.middle : chars.MIDDLE;
71 var last = lc ? chars.last : chars.LAST;
72 var first_other_case = lc ? chars.FIRST : chars.first;
73 var middle_other_case = lc ? chars.MIDDLE : chars.middle;
74 var last_other_case = lc ? chars.LAST : chars.last;
75
76 assertTrue(Range(first, last).test(first), 1);
77 assertTrue(Range(first, last).test(middle), 2);
78 assertTrue(Range(first, last).test(last), 3);
79
80 assertFalse(Range(first, last).test(first_other_case), 4);
81 assertFalse(Range(first, last).test(middle_other_case), 5);
82 assertFalse(Range(first, last).test(last_other_case), 6);
83
84 assertTrue(Range(first, last, "i").test(first), 7);
85 assertTrue(Range(first, last, "i").test(middle), 8);
86 assertTrue(Range(first, last, "i").test(last), 9);
87
88 assertTrue(Range(first, last, "i").test(first_other_case), 10);
89 assertTrue(Range(first, last, "i").test(middle_other_case), 11);
90 assertTrue(Range(first, last, "i").test(last_other_case), 12);
91
92 if (chars.BetweenCases) {
93 assertFalse(Range(first, last).test(chars.BetweenCases), 13);
94 assertFalse(Range(first, last, "i").test(chars.BetweenCases), 14);
95 }
96 }
97 if (chars.BetweenCases) {
98 assertTrue(Range(chars.FIRST, chars.last).test(chars.BetweenCases), 15);
99 assertTrue(Range(chars.FIRST, chars.last, "i").test(chars.BetweenCases), 16);
100 }
101}
102
103// Test range that covers both greek and cyrillic characters.
104for (key in greek) {
105 assertTrue(Range(greek.FIRST, cyrillic.last).test(greek[key]), 17 + key);
106 if (cyrillic[key]) {
107 assertTrue(Range(greek.FIRST, cyrillic.last).test(cyrillic[key]), 18 + key);
108 }
109}
110
111for (var i = 0; i < 2; i++) {
112 var ignore_case = (i == 0);
113 var flag = ignore_case ? "i" : "";
114 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.first), 19);
115 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.middle), 20);
116 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(greek.last), 21);
117
118 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.FIRST), 22);
119 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.MIDDLE), 23);
120 assertTrue(Range(greek.first, cyrillic.LAST, flag).test(cyrillic.LAST), 24);
121
122 // A range that covers the lower case greek letters and the upper case cyrillic
123 // letters.
124 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.FIRST), 25);
125 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.MIDDLE), 26);
126 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(greek.LAST), 27);
127
128 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.first), 28);
129 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.middle), 29);
130 assertEquals(ignore_case, Range(greek.first, cyrillic.LAST, flag).test(cyrillic.last), 30);
131}
132
133
134// Sigma is special because there are two lower case versions of the same upper
135// case character. JS requires that case independece means that you should
136// convert everything to upper case, so the two sigma variants are equal to each
137// other in a case independt comparison.
138for (var i = 0; i < 2; i++) {
139 var simple = (i != 0);
140 var name = simple ? "" : "[]";
141 var regex = simple ? SIGMA : "[" + SIGMA + "]";
142
143 assertFalse(new RegExp(regex).test(sigma), 31 + name);
144 assertFalse(new RegExp(regex).test(alternative_sigma), 32 + name);
145 assertTrue(new RegExp(regex).test(SIGMA), 33 + name);
146
147 assertTrue(new RegExp(regex, "i").test(sigma), 34 + name);
148 // JSC and Tracemonkey fail this one.
149 assertTrue(new RegExp(regex, "i").test(alternative_sigma), 35 + name);
150 assertTrue(new RegExp(regex, "i").test(SIGMA), 36 + name);
151
152 regex = simple ? sigma : "[" + sigma + "]";
153
154 assertTrue(new RegExp(regex).test(sigma), 41 + name);
155 assertFalse(new RegExp(regex).test(alternative_sigma), 42 + name);
156 assertFalse(new RegExp(regex).test(SIGMA), 43 + name);
157
158 assertTrue(new RegExp(regex, "i").test(sigma), 44 + name);
159 // JSC and Tracemonkey fail this one.
160 assertTrue(new RegExp(regex, "i").test(alternative_sigma), 45 + name);
161 assertTrue(new RegExp(regex, "i").test(SIGMA), 46 + name);
162
163 regex = simple ? alternative_sigma : "[" + alternative_sigma + "]";
164
165 assertFalse(new RegExp(regex).test(sigma), 51 + name);
166 assertTrue(new RegExp(regex).test(alternative_sigma), 52 + name);
167 assertFalse(new RegExp(regex).test(SIGMA), 53 + name);
168
169 // JSC and Tracemonkey fail this one.
170 assertTrue(new RegExp(regex, "i").test(sigma), 54 + name);
171 assertTrue(new RegExp(regex, "i").test(alternative_sigma), 55 + name);
172 // JSC and Tracemonkey fail this one.
173 assertTrue(new RegExp(regex, "i").test(SIGMA), 56 + name);
174}
175
176
177for (var add_non_ascii_character_to_subject = 0;
178 add_non_ascii_character_to_subject < 2;
179 add_non_ascii_character_to_subject++) {
180 var suffix = add_non_ascii_character_to_subject ? "\ufffe" : "";
181 // A range that covers both ASCII and non-ASCII.
182 for (var i = 0; i < 2; i++) {
183 var full = (i != 0);
184 var mixed = full ? "[a-\uffff]" : "[a-" + cyrillic.LAST + "]";
185 var f = full ? "f" : "c";
186 for (var j = 0; j < 2; j++) {
187 var ignore_case = (j == 0);
188 var flag = ignore_case ? "i" : "";
189 var re = new RegExp(mixed, flag);
Ben Murdoch257744e2011-11-30 15:57:28 +0000190 var expected =
191 ignore_case || (full && !!add_non_ascii_character_to_subject);
192 assertEquals(expected, re.test("A" + suffix), 58 + flag + f);
Steve Blockd0582a62009-12-15 09:54:21 +0000193 assertTrue(re.test("a" + suffix), 59 + flag + f);
194 assertTrue(re.test("~" + suffix), 60 + flag + f);
195 assertTrue(re.test(cyrillic.MIDDLE), 61 + flag + f);
196 assertEquals(ignore_case || full, re.test(cyrillic.middle), 62 + flag + f);
197 }
198 }
199}