blob: a73622bf0deb9cfdf35ae0071737bb5c633cbeb8 [file] [log] [blame]
Ben Murdochc5610432016-08-08 18:44:38 +01001// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Flags: --icu_case_mapping
6
7// Some edge cases that unibrow got wrong
8
9assertEquals("𐐘", "𐑀".toUpperCase());
10assertEquals("𐑀", "𐐘".toLowerCase());
11assertEquals("σ", "Σ".toLowerCase());
12
13// Some different paths in the ICU case conversion fastpath
14
15assertEquals("σς", "\u03A3\u03A3".toLowerCase());
16// Expand sharp s in latin1 fastpath
17assertEquals("ASSB", "A\u00DFB".toUpperCase());
18assertEquals("AB", "Ab".toUpperCase());
19// Find first upper case in fastpath
20assertEquals("ab", "aB".toLowerCase());
21assertEquals("AÜ", "aü".toUpperCase());
22assertEquals("AÜ", "AÜ".toUpperCase());
23assertEquals("aü", "aü".toLowerCase());
24assertEquals("aü", "AÜ".toLowerCase());
25assertEquals("aü", "AÜ".toLowerCase());
26
27// Starts with fastpath, but switches to full Unicode path
28// U+00FF is uppercased to U+0178.
29assertEquals("AŸ", "aÿ".toUpperCase());
30// U+00B5 (µ) is uppercased to U+039C (Μ)
31assertEquals("AΜ", "aµ".toUpperCase());
32
33// Buffer size increase
34assertEquals("CSSBẶ", "cßbặ".toUpperCase());
35assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase());
36// OneByte input with buffer size increase: non-fast path
37assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr"));
38
39// More comprehensive tests for "tr", "az" and "lt" are in
40// test262/intl402/Strings/*
41
42// Buffer size decrease with a single locale or locale list.
43// In Turkic (tr, az), U+0307 preceeded by Capital Letter I is dropped.
44assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr"));
45assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az"));
46assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"]));
47
48// Cons string
49assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr"));
50assertEquals("abcijkl",
51 ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr"));
52assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en"));
53assertEquals("abci\u0307jkl",
54 ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en"));
55assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase());
56assertEquals("abci\u0307jkl",
57 ("aB" + "cI" + "\u0307j" + "kl").toLowerCase());
58
59// "tr" and "az" should behave identically.
60assertEquals("aBcI\u0307".toLocaleLowerCase("tr"),
61 "aBcI\u0307".toLocaleLowerCase("az"));
62// What matters is the first locale in the locale list.
63assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]),
64 "aBcI\u0307".toLocaleLowerCase("tr"));
65assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
66 "aBcI\u0307".toLocaleLowerCase("en"));
67assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]),
68 "aBcI\u0307".toLowerCase());
69
70// An empty locale list is the same as the default locale. Try these tests
71// under Turkish and Greek locale.
72assertEquals("aBcI\u0307".toLocaleLowerCase([]),
73 "aBcI\u0307".toLocaleLowerCase());
74assertEquals("aBcI\u0307".toLocaleLowerCase([]),
75 "aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale));
76assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase());
77assertEquals("άόύώ".toLocaleUpperCase([]),
78 "άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale));
79
80
81// English/root locale keeps U+0307 (combining dot above).
82assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en"));
83assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"]));
84assertEquals("abci\u0307", "aBcI\u0307".toLowerCase());
85
86// Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and
87// other diacritic marks are dropped. This rule is based on the current CLDR's
88// el-Upper transformation, but Greek uppercasing rules are more sophisticated
89// than this. See http://bugs.icu-project.org/trac/ticket/10582 and
90// http://unicode.org/cldr/trac/ticket/7905 .
91assertEquals("Α", "α\u0301".toLocaleUpperCase("el"));
92assertEquals("Α", "α\u0301".toLocaleUpperCase("el-GR"));
93assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek"));
94assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek-GR"));
95assertEquals("Α", "ά".toLocaleUpperCase("el"));
96assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
97assertEquals("ΑΟΥΩ", "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el"));
98assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el"));
99assertEquals("ΟΕ", "Ό\u1f15".toLocaleUpperCase("el"));
100assertEquals("ΟΕ", "Ο\u0301ε\u0314\u0301".toLocaleUpperCase("el"));
101
102// Input and output are identical.
103assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el"));
104assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el"));
105assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el"));
106assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el"));
107// ASCII-only or Latin-1 only: 1-byte
108assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el"));
109assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el"));
110
111// To make sure that the input string is not overwritten in place.
112var strings = ["abCdef", "αβγδε", "άόύώ", "аб"];
113for (var s of strings) {
114 var backupAsArray = s.split("");
115 var uppered = s.toLocaleUpperCase("el");
116 assertEquals(s, backupAsArray.join(""));
117}
118
119// In other locales, U+0301 is preserved.
120assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301",
121 "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en"));
122assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301",
123 "α\u0301ο\u0301υ\u0301ω\u0301".toUpperCase());
124
125// Plane 1; Deseret and Warang Citi Script.
126assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase());
127assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase());
128// Mathematical Bold {Capital, Small} Letter A do not change.
129assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase());
130assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase());
131// Plane 1; New characters in Unicode 8.0
132assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase());
133assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase());
134assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase());
135assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());
136assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
137assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"]));
138assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase());