Ben Murdoch | c561043 | 2016-08-08 18:44:38 +0100 | [diff] [blame^] | 1 | // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // Flags: --icu_case_mapping |
| 6 | |
| 7 | // Some edge cases that unibrow got wrong |
| 8 | |
| 9 | assertEquals("𐐘", "𐑀".toUpperCase()); |
| 10 | assertEquals("𐑀", "𐐘".toLowerCase()); |
| 11 | assertEquals("σ", "Σ".toLowerCase()); |
| 12 | |
| 13 | // Some different paths in the ICU case conversion fastpath |
| 14 | |
| 15 | assertEquals("σς", "\u03A3\u03A3".toLowerCase()); |
| 16 | // Expand sharp s in latin1 fastpath |
| 17 | assertEquals("ASSB", "A\u00DFB".toUpperCase()); |
| 18 | assertEquals("AB", "Ab".toUpperCase()); |
| 19 | // Find first upper case in fastpath |
| 20 | assertEquals("ab", "aB".toLowerCase()); |
| 21 | assertEquals("AÜ", "aü".toUpperCase()); |
| 22 | assertEquals("AÜ", "AÜ".toUpperCase()); |
| 23 | assertEquals("aü", "aü".toLowerCase()); |
| 24 | assertEquals("aü", "AÜ".toLowerCase()); |
| 25 | assertEquals("aü", "AÜ".toLowerCase()); |
| 26 | |
| 27 | // Starts with fastpath, but switches to full Unicode path |
| 28 | // U+00FF is uppercased to U+0178. |
| 29 | assertEquals("AŸ", "aÿ".toUpperCase()); |
| 30 | // U+00B5 (µ) is uppercased to U+039C (Μ) |
| 31 | assertEquals("AΜ", "aµ".toUpperCase()); |
| 32 | |
| 33 | // Buffer size increase |
| 34 | assertEquals("CSSBẶ", "cßbặ".toUpperCase()); |
| 35 | assertEquals("FIFLFFIFFL", "\uFB01\uFB02\uFB03\uFB04".toUpperCase()); |
| 36 | // OneByte input with buffer size increase: non-fast path |
| 37 | assertEquals("ABCSS", "abCß".toLocaleUpperCase("tr")); |
| 38 | |
| 39 | // More comprehensive tests for "tr", "az" and "lt" are in |
| 40 | // test262/intl402/Strings/* |
| 41 | |
| 42 | // Buffer size decrease with a single locale or locale list. |
| 43 | // In Turkic (tr, az), U+0307 preceeded by Capital Letter I is dropped. |
| 44 | assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("tr")); |
| 45 | assertEquals("abci", "aBcI\u0307".toLocaleLowerCase("az")); |
| 46 | assertEquals("abci", "aBcI\u0307".toLocaleLowerCase(["tr", "en"])); |
| 47 | |
| 48 | // Cons string |
| 49 | assertEquals("abcijkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("tr")); |
| 50 | assertEquals("abcijkl", |
| 51 | ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("tr")); |
| 52 | assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLocaleLowerCase("en")); |
| 53 | assertEquals("abci\u0307jkl", |
| 54 | ("aB" + "cI" + "\u0307j" + "kl").toLocaleLowerCase("en")); |
| 55 | assertEquals("abci\u0307jkl", ("aBcI" + "\u0307jkl").toLowerCase()); |
| 56 | assertEquals("abci\u0307jkl", |
| 57 | ("aB" + "cI" + "\u0307j" + "kl").toLowerCase()); |
| 58 | |
| 59 | // "tr" and "az" should behave identically. |
| 60 | assertEquals("aBcI\u0307".toLocaleLowerCase("tr"), |
| 61 | "aBcI\u0307".toLocaleLowerCase("az")); |
| 62 | // What matters is the first locale in the locale list. |
| 63 | assertEquals("aBcI\u0307".toLocaleLowerCase(["tr", "en", "fr"]), |
| 64 | "aBcI\u0307".toLocaleLowerCase("tr")); |
| 65 | assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]), |
| 66 | "aBcI\u0307".toLocaleLowerCase("en")); |
| 67 | assertEquals("aBcI\u0307".toLocaleLowerCase(["en", "tr", "az"]), |
| 68 | "aBcI\u0307".toLowerCase()); |
| 69 | |
| 70 | // An empty locale list is the same as the default locale. Try these tests |
| 71 | // under Turkish and Greek locale. |
| 72 | assertEquals("aBcI\u0307".toLocaleLowerCase([]), |
| 73 | "aBcI\u0307".toLocaleLowerCase()); |
| 74 | assertEquals("aBcI\u0307".toLocaleLowerCase([]), |
| 75 | "aBcI\u0307".toLocaleLowerCase(Intl.GetDefaultLocale)); |
| 76 | assertEquals("άόύώ".toLocaleUpperCase([]), "άόύώ".toLocaleUpperCase()); |
| 77 | assertEquals("άόύώ".toLocaleUpperCase([]), |
| 78 | "άόύώ".toLocaleUpperCase(Intl.GetDefaultLocale)); |
| 79 | |
| 80 | |
| 81 | // English/root locale keeps U+0307 (combining dot above). |
| 82 | assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase("en")); |
| 83 | assertEquals("abci\u0307", "aBcI\u0307".toLocaleLowerCase(["en", "tr"])); |
| 84 | assertEquals("abci\u0307", "aBcI\u0307".toLowerCase()); |
| 85 | |
| 86 | // Greek uppercasing: not covered by intl402/String/*, yet. Tonos (U+0301) and |
| 87 | // other diacritic marks are dropped. This rule is based on the current CLDR's |
| 88 | // el-Upper transformation, but Greek uppercasing rules are more sophisticated |
| 89 | // than this. See http://bugs.icu-project.org/trac/ticket/10582 and |
| 90 | // http://unicode.org/cldr/trac/ticket/7905 . |
| 91 | assertEquals("Α", "α\u0301".toLocaleUpperCase("el")); |
| 92 | assertEquals("Α", "α\u0301".toLocaleUpperCase("el-GR")); |
| 93 | assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek")); |
| 94 | assertEquals("Α", "α\u0301".toLocaleUpperCase("el-Grek-GR")); |
| 95 | assertEquals("Α", "ά".toLocaleUpperCase("el")); |
| 96 | assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el")); |
| 97 | assertEquals("ΑΟΥΩ", "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("el")); |
| 98 | assertEquals("ΑΟΥΩ", "άόύώ".toLocaleUpperCase("el")); |
| 99 | assertEquals("ΟΕ", "Ό\u1f15".toLocaleUpperCase("el")); |
| 100 | assertEquals("ΟΕ", "Ο\u0301ε\u0314\u0301".toLocaleUpperCase("el")); |
| 101 | |
| 102 | // Input and output are identical. |
| 103 | assertEquals("αβγδε", "αβγδε".toLocaleLowerCase("el")); |
| 104 | assertEquals("ΑΒΓΔΕ", "ΑΒΓΔΕ".toLocaleUpperCase("el")); |
| 105 | assertEquals("ΑΒΓΔΕАБ𝐀𝐁", "ΑΒΓΔΕАБ𝐀𝐁".toLocaleUpperCase("el")); |
| 106 | assertEquals("ABCDEÂÓḴ123", "ABCDEÂÓḴ123".toLocaleUpperCase("el")); |
| 107 | // ASCII-only or Latin-1 only: 1-byte |
| 108 | assertEquals("ABCDE123", "ABCDE123".toLocaleUpperCase("el")); |
| 109 | assertEquals("ABCDEÂÓ123", "ABCDEÂÓ123".toLocaleUpperCase("el")); |
| 110 | |
| 111 | // To make sure that the input string is not overwritten in place. |
| 112 | var strings = ["abCdef", "αβγδε", "άόύώ", "аб"]; |
| 113 | for (var s of strings) { |
| 114 | var backupAsArray = s.split(""); |
| 115 | var uppered = s.toLocaleUpperCase("el"); |
| 116 | assertEquals(s, backupAsArray.join("")); |
| 117 | } |
| 118 | |
| 119 | // In other locales, U+0301 is preserved. |
| 120 | assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301", |
| 121 | "α\u0301ο\u0301υ\u0301ω\u0301".toLocaleUpperCase("en")); |
| 122 | assertEquals("Α\u0301Ο\u0301Υ\u0301Ω\u0301", |
| 123 | "α\u0301ο\u0301υ\u0301ω\u0301".toUpperCase()); |
| 124 | |
| 125 | // Plane 1; Deseret and Warang Citi Script. |
| 126 | assertEquals("\u{10400}\u{118A0}", "\u{10428}\u{118C0}".toUpperCase()); |
| 127 | assertEquals("\u{10428}\u{118C0}", "\u{10400}\u{118A0}".toLowerCase()); |
| 128 | // Mathematical Bold {Capital, Small} Letter A do not change. |
| 129 | assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toUpperCase()); |
| 130 | assertEquals("\u{1D400}\u{1D41A}", "\u{1D400}\u{1D41A}".toLowerCase()); |
| 131 | // Plane 1; New characters in Unicode 8.0 |
| 132 | assertEquals("\u{10C80}", "\u{10CC0}".toUpperCase()); |
| 133 | assertEquals("\u{10CC0}", "\u{10C80}".toLowerCase()); |
| 134 | assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase()); |
| 135 | assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase()); |
| 136 | assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"])); |
| 137 | assertEquals("\u{10C80}", "\u{10CC0}".toLocaleUpperCase(["tr"])); |
| 138 | assertEquals("\u{10CC0}", "\u{10C80}".toLocaleLowerCase()); |