bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2013 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "SkOTTable_name.h" |
| 9 | |
| 10 | #include "SkEndian.h" |
Hal Canary | ee08b4a | 2018-03-01 15:56:37 -0500 | [diff] [blame] | 11 | #include "SkStringUtils.h" |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 12 | #include "SkTSearch.h" |
commit-bot@chromium.org | b5e34e2 | 2013-05-07 15:28:15 +0000 | [diff] [blame] | 13 | #include "SkTemplates.h" |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 14 | #include "SkUtils.h" |
| 15 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 16 | static SkUnichar next_unichar_UTF16BE(const uint8_t** srcPtr, size_t* length) { |
| 17 | SkASSERT(srcPtr && *srcPtr && length); |
| 18 | SkASSERT(*length > 0); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 19 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 20 | uint16_t leading; |
| 21 | if (*length < sizeof(leading)) { |
| 22 | *length = 0; |
| 23 | return 0xFFFD; |
| 24 | } |
| 25 | memcpy(&leading, *srcPtr, sizeof(leading)); |
| 26 | *srcPtr += sizeof(leading); |
| 27 | *length -= sizeof(leading); |
| 28 | SkUnichar c = SkEndian_SwapBE16(leading); |
Mike Klein | 475c5e9 | 2018-08-08 10:23:17 -0400 | [diff] [blame] | 29 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 30 | if (SkUTF16_IsTrailingSurrogate(c)) { |
| 31 | return 0xFFFD; |
| 32 | } |
| 33 | if (SkUTF16_IsLeadingSurrogate(c)) { |
| 34 | uint16_t trailing; |
| 35 | if (*length < sizeof(trailing)) { |
| 36 | *length = 0; |
| 37 | return 0xFFFD; |
| 38 | } |
| 39 | memcpy(&trailing, *srcPtr, sizeof(trailing)); |
| 40 | SkUnichar c2 = SkEndian_SwapBE16(trailing); |
| 41 | if (!SkUTF16_IsTrailingSurrogate(c2)) { |
| 42 | return 0xFFFD; |
| 43 | } |
| 44 | *srcPtr += sizeof(trailing); |
| 45 | *length -= sizeof(trailing); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 46 | |
| 47 | c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00); |
| 48 | } |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 49 | return c; |
| 50 | } |
| 51 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 52 | static void SkString_from_UTF16BE(const uint8_t* utf16be, size_t length, SkString& utf8) { |
Mike Klein | 475c5e9 | 2018-08-08 10:23:17 -0400 | [diff] [blame] | 53 | // Note that utf16be may not be 2-byte aligned. |
halcanary | 96fcdcc | 2015-08-27 07:41:13 -0700 | [diff] [blame] | 54 | SkASSERT(utf16be != nullptr); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 55 | |
| 56 | utf8.reset(); |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 57 | while (length) { |
| 58 | utf8.appendUnichar(next_unichar_UTF16BE(&utf16be, &length)); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 59 | } |
| 60 | } |
| 61 | |
| 62 | /** UnicodeFromMacRoman[macRomanPoint - 0x80] -> unicodeCodePoint. |
| 63 | * Derived from http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT . |
| 64 | * In MacRoman the first 128 code points match ASCII code points. |
| 65 | * This maps the second 128 MacRoman code points to unicode code points. |
| 66 | */ |
Bruce Dawson | 3419469 | 2016-12-29 14:05:39 -0800 | [diff] [blame] | 67 | static const uint16_t UnicodeFromMacRoman[0x80] = { |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 68 | 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, |
| 69 | 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, |
| 70 | 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3, |
| 71 | 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, |
| 72 | 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, |
| 73 | 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8, |
| 74 | 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211, |
| 75 | 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, |
| 76 | 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, |
| 77 | 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153, |
| 78 | 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, |
| 79 | 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, |
| 80 | 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, |
| 81 | 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4, |
| 82 | 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, |
| 83 | 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7, |
| 84 | }; |
| 85 | |
| 86 | static void SkStringFromMacRoman(const uint8_t* macRoman, size_t length, SkString& utf8) { |
| 87 | utf8.reset(); |
| 88 | for (size_t i = 0; i < length; ++i) { |
mtklein@google.com | 1e4c4fe | 2013-11-04 21:22:45 +0000 | [diff] [blame] | 89 | utf8.appendUnichar(macRoman[i] < 0x80 ? macRoman[i] |
| 90 | : UnicodeFromMacRoman[macRoman[i] - 0x80]); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 91 | } |
| 92 | } |
| 93 | |
Bruce Dawson | 3419469 | 2016-12-29 14:05:39 -0800 | [diff] [blame] | 94 | static const struct BCP47FromLanguageId { |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 95 | uint16_t languageID; |
| 96 | const char* bcp47; |
| 97 | } |
| 98 | /** The Mac and Windows values do not conflict, so this is currently one single table. */ |
| 99 | BCP47FromLanguageID[] = { |
| 100 | /** A mapping from Mac Language Designators to BCP 47 codes. |
| 101 | * The following list was constructed more or less manually. |
| 102 | * Apple now uses BCP 47 (post OSX10.4), so there will be no new entries. |
| 103 | */ |
| 104 | {0, "en"}, //English |
| 105 | {1, "fr"}, //French |
| 106 | {2, "de"}, //German |
| 107 | {3, "it"}, //Italian |
| 108 | {4, "nl"}, //Dutch |
| 109 | {5, "sv"}, //Swedish |
| 110 | {6, "es"}, //Spanish |
| 111 | {7, "da"}, //Danish |
| 112 | {8, "pt"}, //Portuguese |
| 113 | {9, "nb"}, //Norwegian |
| 114 | {10, "he"}, //Hebrew |
| 115 | {11, "ja"}, //Japanese |
| 116 | {12, "ar"}, //Arabic |
| 117 | {13, "fi"}, //Finnish |
| 118 | {14, "el"}, //Greek |
| 119 | {15, "is"}, //Icelandic |
| 120 | {16, "mt"}, //Maltese |
| 121 | {17, "tr"}, //Turkish |
| 122 | {18, "hr"}, //Croatian |
| 123 | {19, "zh-Hant"}, //Chinese (Traditional) |
| 124 | {20, "ur"}, //Urdu |
| 125 | {21, "hi"}, //Hindi |
| 126 | {22, "th"}, //Thai |
| 127 | {23, "ko"}, //Korean |
| 128 | {24, "lt"}, //Lithuanian |
| 129 | {25, "pl"}, //Polish |
| 130 | {26, "hu"}, //Hungarian |
| 131 | {27, "et"}, //Estonian |
| 132 | {28, "lv"}, //Latvian |
| 133 | {29, "se"}, //Sami |
| 134 | {30, "fo"}, //Faroese |
| 135 | {31, "fa"}, //Farsi (Persian) |
| 136 | {32, "ru"}, //Russian |
| 137 | {33, "zh-Hans"}, //Chinese (Simplified) |
| 138 | {34, "nl"}, //Dutch |
| 139 | {35, "ga"}, //Irish(Gaelic) |
| 140 | {36, "sq"}, //Albanian |
| 141 | {37, "ro"}, //Romanian |
| 142 | {38, "cs"}, //Czech |
| 143 | {39, "sk"}, //Slovak |
| 144 | {40, "sl"}, //Slovenian |
| 145 | {41, "yi"}, //Yiddish |
| 146 | {42, "sr"}, //Serbian |
| 147 | {43, "mk"}, //Macedonian |
| 148 | {44, "bg"}, //Bulgarian |
| 149 | {45, "uk"}, //Ukrainian |
| 150 | {46, "be"}, //Byelorussian |
| 151 | {47, "uz"}, //Uzbek |
| 152 | {48, "kk"}, //Kazakh |
| 153 | {49, "az-Cyrl"}, //Azerbaijani (Cyrillic) |
| 154 | {50, "az-Arab"}, //Azerbaijani (Arabic) |
| 155 | {51, "hy"}, //Armenian |
| 156 | {52, "ka"}, //Georgian |
| 157 | {53, "mo"}, //Moldavian |
| 158 | {54, "ky"}, //Kirghiz |
| 159 | {55, "tg"}, //Tajiki |
| 160 | {56, "tk"}, //Turkmen |
| 161 | {57, "mn-Mong"}, //Mongolian (Traditional) |
| 162 | {58, "mn-Cyrl"}, //Mongolian (Cyrillic) |
| 163 | {59, "ps"}, //Pashto |
| 164 | {60, "ku"}, //Kurdish |
| 165 | {61, "ks"}, //Kashmiri |
| 166 | {62, "sd"}, //Sindhi |
| 167 | {63, "bo"}, //Tibetan |
| 168 | {64, "ne"}, //Nepali |
| 169 | {65, "sa"}, //Sanskrit |
| 170 | {66, "mr"}, //Marathi |
| 171 | {67, "bn"}, //Bengali |
| 172 | {68, "as"}, //Assamese |
| 173 | {69, "gu"}, //Gujarati |
| 174 | {70, "pa"}, //Punjabi |
| 175 | {71, "or"}, //Oriya |
| 176 | {72, "ml"}, //Malayalam |
| 177 | {73, "kn"}, //Kannada |
| 178 | {74, "ta"}, //Tamil |
| 179 | {75, "te"}, //Telugu |
| 180 | {76, "si"}, //Sinhalese |
| 181 | {77, "my"}, //Burmese |
| 182 | {78, "km"}, //Khmer |
| 183 | {79, "lo"}, //Lao |
| 184 | {80, "vi"}, //Vietnamese |
| 185 | {81, "id"}, //Indonesian |
| 186 | {82, "tl"}, //Tagalog |
| 187 | {83, "ms-Latn"}, //Malay (Roman) |
| 188 | {84, "ms-Arab"}, //Malay (Arabic) |
| 189 | {85, "am"}, //Amharic |
| 190 | {86, "ti"}, //Tigrinya |
| 191 | {87, "om"}, //Oromo |
| 192 | {88, "so"}, //Somali |
| 193 | {89, "sw"}, //Swahili |
| 194 | {90, "rw"}, //Kinyarwanda/Ruanda |
| 195 | {91, "rn"}, //Rundi |
| 196 | {92, "ny"}, //Nyanja/Chewa |
| 197 | {93, "mg"}, //Malagasy |
| 198 | {94, "eo"}, //Esperanto |
| 199 | {128, "cy"}, //Welsh |
| 200 | {129, "eu"}, //Basque |
| 201 | {130, "ca"}, //Catalan |
| 202 | {131, "la"}, //Latin |
| 203 | {132, "qu"}, //Quechua |
| 204 | {133, "gn"}, //Guarani |
| 205 | {134, "ay"}, //Aymara |
| 206 | {135, "tt"}, //Tatar |
| 207 | {136, "ug"}, //Uighur |
| 208 | {137, "dz"}, //Dzongkha |
| 209 | {138, "jv-Latn"}, //Javanese (Roman) |
| 210 | {139, "su-Latn"}, //Sundanese (Roman) |
| 211 | {140, "gl"}, //Galician |
| 212 | {141, "af"}, //Afrikaans |
| 213 | {142, "br"}, //Breton |
| 214 | {143, "iu"}, //Inuktitut |
| 215 | {144, "gd"}, //Scottish (Gaelic) |
| 216 | {145, "gv"}, //Manx (Gaelic) |
| 217 | {146, "ga"}, //Irish (Gaelic with Lenition) |
| 218 | {147, "to"}, //Tongan |
| 219 | {148, "el"}, //Greek (Polytonic) Note: ISO 15924 does not have an equivalent script name. |
| 220 | {149, "kl"}, //Greenlandic |
| 221 | {150, "az-Latn"}, //Azerbaijani (Roman) |
| 222 | {151, "nn"}, //Nynorsk |
| 223 | |
| 224 | /** A mapping from Windows LCID to BCP 47 codes. |
| 225 | * This list is the sorted, curated output of tools/win_lcid.cpp. |
| 226 | * Note that these are sorted by value for quick binary lookup, and not logically by lsb. |
| 227 | * The 'bare' language ids (e.g. 0x0001 for Arabic) are ommitted |
| 228 | * as they do not appear as valid language ids in the OpenType specification. |
| 229 | */ |
| 230 | { 0x0401, "ar-SA" }, //Arabic |
| 231 | { 0x0402, "bg-BG" }, //Bulgarian |
| 232 | { 0x0403, "ca-ES" }, //Catalan |
| 233 | { 0x0404, "zh-TW" }, //Chinese (Traditional) |
| 234 | { 0x0405, "cs-CZ" }, //Czech |
| 235 | { 0x0406, "da-DK" }, //Danish |
| 236 | { 0x0407, "de-DE" }, //German |
| 237 | { 0x0408, "el-GR" }, //Greek |
| 238 | { 0x0409, "en-US" }, //English |
| 239 | { 0x040a, "es-ES_tradnl" }, //Spanish |
| 240 | { 0x040b, "fi-FI" }, //Finnish |
| 241 | { 0x040c, "fr-FR" }, //French |
| 242 | { 0x040d, "he-IL" }, //Hebrew |
| 243 | { 0x040d, "he" }, //Hebrew |
| 244 | { 0x040e, "hu-HU" }, //Hungarian |
| 245 | { 0x040e, "hu" }, //Hungarian |
| 246 | { 0x040f, "is-IS" }, //Icelandic |
| 247 | { 0x0410, "it-IT" }, //Italian |
| 248 | { 0x0411, "ja-JP" }, //Japanese |
| 249 | { 0x0412, "ko-KR" }, //Korean |
| 250 | { 0x0413, "nl-NL" }, //Dutch |
Ben Wagner | 4bcb4c7 | 2016-07-19 15:55:16 -0400 | [diff] [blame] | 251 | { 0x0414, "nb-NO" }, //Norwegian (Bokmål) |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 252 | { 0x0415, "pl-PL" }, //Polish |
| 253 | { 0x0416, "pt-BR" }, //Portuguese |
| 254 | { 0x0417, "rm-CH" }, //Romansh |
| 255 | { 0x0418, "ro-RO" }, //Romanian |
| 256 | { 0x0419, "ru-RU" }, //Russian |
| 257 | { 0x041a, "hr-HR" }, //Croatian |
| 258 | { 0x041b, "sk-SK" }, //Slovak |
| 259 | { 0x041c, "sq-AL" }, //Albanian |
| 260 | { 0x041d, "sv-SE" }, //Swedish |
| 261 | { 0x041e, "th-TH" }, //Thai |
| 262 | { 0x041f, "tr-TR" }, //Turkish |
| 263 | { 0x0420, "ur-PK" }, //Urdu |
| 264 | { 0x0421, "id-ID" }, //Indonesian |
| 265 | { 0x0422, "uk-UA" }, //Ukrainian |
| 266 | { 0x0423, "be-BY" }, //Belarusian |
| 267 | { 0x0424, "sl-SI" }, //Slovenian |
| 268 | { 0x0425, "et-EE" }, //Estonian |
| 269 | { 0x0426, "lv-LV" }, //Latvian |
| 270 | { 0x0427, "lt-LT" }, //Lithuanian |
| 271 | { 0x0428, "tg-Cyrl-TJ" }, //Tajik (Cyrillic) |
| 272 | { 0x0429, "fa-IR" }, //Persian |
| 273 | { 0x042a, "vi-VN" }, //Vietnamese |
| 274 | { 0x042b, "hy-AM" }, //Armenian |
| 275 | { 0x042c, "az-Latn-AZ" }, //Azeri (Latin) |
| 276 | { 0x042d, "eu-ES" }, //Basque |
| 277 | { 0x042e, "hsb-DE" }, //Upper Sorbian |
| 278 | { 0x042f, "mk-MK" }, //Macedonian (FYROM) |
| 279 | { 0x0432, "tn-ZA" }, //Setswana |
| 280 | { 0x0434, "xh-ZA" }, //isiXhosa |
| 281 | { 0x0435, "zu-ZA" }, //isiZulu |
| 282 | { 0x0436, "af-ZA" }, //Afrikaans |
| 283 | { 0x0437, "ka-GE" }, //Georgian |
| 284 | { 0x0438, "fo-FO" }, //Faroese |
| 285 | { 0x0439, "hi-IN" }, //Hindi |
| 286 | { 0x043a, "mt-MT" }, //Maltese |
| 287 | { 0x043b, "se-NO" }, //Sami (Northern) |
| 288 | { 0x043e, "ms-MY" }, //Malay |
| 289 | { 0x043f, "kk-KZ" }, //Kazakh |
| 290 | { 0x0440, "ky-KG" }, //Kyrgyz |
| 291 | { 0x0441, "sw-KE" }, //Kiswahili |
| 292 | { 0x0442, "tk-TM" }, //Turkmen |
| 293 | { 0x0443, "uz-Latn-UZ" }, //Uzbek (Latin) |
| 294 | { 0x0443, "uz" }, //Uzbek |
| 295 | { 0x0444, "tt-RU" }, //Tatar |
| 296 | { 0x0445, "bn-IN" }, //Bengali |
| 297 | { 0x0446, "pa-IN" }, //Punjabi |
| 298 | { 0x0447, "gu-IN" }, //Gujarati |
| 299 | { 0x0448, "or-IN" }, //Oriya |
| 300 | { 0x0449, "ta-IN" }, //Tamil |
| 301 | { 0x044a, "te-IN" }, //Telugu |
| 302 | { 0x044b, "kn-IN" }, //Kannada |
| 303 | { 0x044c, "ml-IN" }, //Malayalam |
| 304 | { 0x044d, "as-IN" }, //Assamese |
| 305 | { 0x044e, "mr-IN" }, //Marathi |
| 306 | { 0x044f, "sa-IN" }, //Sanskrit |
| 307 | { 0x0450, "mn-Cyrl" }, //Mongolian (Cyrillic) |
| 308 | { 0x0451, "bo-CN" }, //Tibetan |
| 309 | { 0x0452, "cy-GB" }, //Welsh |
| 310 | { 0x0453, "km-KH" }, //Khmer |
| 311 | { 0x0454, "lo-LA" }, //Lao |
| 312 | { 0x0456, "gl-ES" }, //Galician |
| 313 | { 0x0457, "kok-IN" }, //Konkani |
| 314 | { 0x045a, "syr-SY" }, //Syriac |
| 315 | { 0x045b, "si-LK" }, //Sinhala |
| 316 | { 0x045d, "iu-Cans-CA" }, //Inuktitut (Syllabics) |
| 317 | { 0x045e, "am-ET" }, //Amharic |
| 318 | { 0x0461, "ne-NP" }, //Nepali |
| 319 | { 0x0462, "fy-NL" }, //Frisian |
| 320 | { 0x0463, "ps-AF" }, //Pashto |
| 321 | { 0x0464, "fil-PH" }, //Filipino |
| 322 | { 0x0465, "dv-MV" }, //Divehi |
| 323 | { 0x0468, "ha-Latn-NG" }, //Hausa (Latin) |
| 324 | { 0x046a, "yo-NG" }, //Yoruba |
| 325 | { 0x046b, "quz-BO" }, //Quechua |
| 326 | { 0x046c, "nso-ZA" }, //Sesotho sa Leboa |
| 327 | { 0x046d, "ba-RU" }, //Bashkir |
| 328 | { 0x046e, "lb-LU" }, //Luxembourgish |
| 329 | { 0x046f, "kl-GL" }, //Greenlandic |
| 330 | { 0x0470, "ig-NG" }, //Igbo |
| 331 | { 0x0478, "ii-CN" }, //Yi |
| 332 | { 0x047a, "arn-CL" }, //Mapudungun |
| 333 | { 0x047c, "moh-CA" }, //Mohawk |
| 334 | { 0x047e, "br-FR" }, //Breton |
| 335 | { 0x0480, "ug-CN" }, //Uyghur |
| 336 | { 0x0481, "mi-NZ" }, //Maori |
| 337 | { 0x0482, "oc-FR" }, //Occitan |
| 338 | { 0x0483, "co-FR" }, //Corsican |
| 339 | { 0x0484, "gsw-FR" }, //Alsatian |
| 340 | { 0x0485, "sah-RU" }, //Yakut |
| 341 | { 0x0486, "qut-GT" }, //K'iche |
| 342 | { 0x0487, "rw-RW" }, //Kinyarwanda |
| 343 | { 0x0488, "wo-SN" }, //Wolof |
| 344 | { 0x048c, "prs-AF" }, //Dari |
| 345 | { 0x0491, "gd-GB" }, //Scottish Gaelic |
| 346 | { 0x0801, "ar-IQ" }, //Arabic |
| 347 | { 0x0804, "zh-Hans" }, //Chinese (Simplified) |
| 348 | { 0x0807, "de-CH" }, //German |
| 349 | { 0x0809, "en-GB" }, //English |
| 350 | { 0x080a, "es-MX" }, //Spanish |
| 351 | { 0x080c, "fr-BE" }, //French |
| 352 | { 0x0810, "it-CH" }, //Italian |
| 353 | { 0x0813, "nl-BE" }, //Dutch |
| 354 | { 0x0814, "nn-NO" }, //Norwegian (Nynorsk) |
| 355 | { 0x0816, "pt-PT" }, //Portuguese |
| 356 | { 0x081a, "sr-Latn-CS" }, //Serbian (Latin) |
| 357 | { 0x081d, "sv-FI" }, //Swedish |
| 358 | { 0x082c, "az-Cyrl-AZ" }, //Azeri (Cyrillic) |
| 359 | { 0x082e, "dsb-DE" }, //Lower Sorbian |
| 360 | { 0x082e, "dsb" }, //Lower Sorbian |
| 361 | { 0x083b, "se-SE" }, //Sami (Northern) |
| 362 | { 0x083c, "ga-IE" }, //Irish |
| 363 | { 0x083e, "ms-BN" }, //Malay |
| 364 | { 0x0843, "uz-Cyrl-UZ" }, //Uzbek (Cyrillic) |
| 365 | { 0x0845, "bn-BD" }, //Bengali |
| 366 | { 0x0850, "mn-Mong-CN" }, //Mongolian (Traditional Mongolian) |
| 367 | { 0x085d, "iu-Latn-CA" }, //Inuktitut (Latin) |
| 368 | { 0x085f, "tzm-Latn-DZ" }, //Tamazight (Latin) |
| 369 | { 0x086b, "quz-EC" }, //Quechua |
| 370 | { 0x0c01, "ar-EG" }, //Arabic |
| 371 | { 0x0c04, "zh-Hant" }, //Chinese (Traditional) |
| 372 | { 0x0c07, "de-AT" }, //German |
| 373 | { 0x0c09, "en-AU" }, //English |
| 374 | { 0x0c0a, "es-ES" }, //Spanish |
| 375 | { 0x0c0c, "fr-CA" }, //French |
| 376 | { 0x0c1a, "sr-Cyrl-CS" }, //Serbian (Cyrillic) |
| 377 | { 0x0c3b, "se-FI" }, //Sami (Northern) |
| 378 | { 0x0c6b, "quz-PE" }, //Quechua |
| 379 | { 0x1001, "ar-LY" }, //Arabic |
| 380 | { 0x1004, "zh-SG" }, //Chinese (Simplified) |
| 381 | { 0x1007, "de-LU" }, //German |
| 382 | { 0x1009, "en-CA" }, //English |
| 383 | { 0x100a, "es-GT" }, //Spanish |
| 384 | { 0x100c, "fr-CH" }, //French |
| 385 | { 0x101a, "hr-BA" }, //Croatian (Latin) |
| 386 | { 0x103b, "smj-NO" }, //Sami (Lule) |
| 387 | { 0x1401, "ar-DZ" }, //Arabic |
| 388 | { 0x1404, "zh-MO" }, //Chinese (Traditional) |
| 389 | { 0x1407, "de-LI" }, //German |
| 390 | { 0x1409, "en-NZ" }, //English |
| 391 | { 0x140a, "es-CR" }, //Spanish |
| 392 | { 0x140c, "fr-LU" }, //French |
| 393 | { 0x141a, "bs-Latn-BA" }, //Bosnian (Latin) |
| 394 | { 0x141a, "bs" }, //Bosnian |
| 395 | { 0x143b, "smj-SE" }, //Sami (Lule) |
| 396 | { 0x143b, "smj" }, //Sami (Lule) |
| 397 | { 0x1801, "ar-MA" }, //Arabic |
| 398 | { 0x1809, "en-IE" }, //English |
| 399 | { 0x180a, "es-PA" }, //Spanish |
| 400 | { 0x180c, "fr-MC" }, //French |
| 401 | { 0x181a, "sr-Latn-BA" }, //Serbian (Latin) |
| 402 | { 0x183b, "sma-NO" }, //Sami (Southern) |
| 403 | { 0x1c01, "ar-TN" }, //Arabic |
| 404 | { 0x1c09, "en-ZA" }, //English |
| 405 | { 0x1c0a, "es-DO" }, //Spanish |
| 406 | { 0x1c1a, "sr-Cyrl-BA" }, //Serbian (Cyrillic) |
| 407 | { 0x1c3b, "sma-SE" }, //Sami (Southern) |
| 408 | { 0x1c3b, "sma" }, //Sami (Southern) |
| 409 | { 0x2001, "ar-OM" }, //Arabic |
| 410 | { 0x2009, "en-JM" }, //English |
| 411 | { 0x200a, "es-VE" }, //Spanish |
| 412 | { 0x201a, "bs-Cyrl-BA" }, //Bosnian (Cyrillic) |
| 413 | { 0x201a, "bs-Cyrl" }, //Bosnian (Cyrillic) |
| 414 | { 0x203b, "sms-FI" }, //Sami (Skolt) |
| 415 | { 0x203b, "sms" }, //Sami (Skolt) |
| 416 | { 0x2401, "ar-YE" }, //Arabic |
| 417 | { 0x2409, "en-029" }, //English |
| 418 | { 0x240a, "es-CO" }, //Spanish |
| 419 | { 0x241a, "sr-Latn-RS" }, //Serbian (Latin) |
| 420 | { 0x243b, "smn-FI" }, //Sami (Inari) |
| 421 | { 0x2801, "ar-SY" }, //Arabic |
| 422 | { 0x2809, "en-BZ" }, //English |
| 423 | { 0x280a, "es-PE" }, //Spanish |
| 424 | { 0x281a, "sr-Cyrl-RS" }, //Serbian (Cyrillic) |
| 425 | { 0x2c01, "ar-JO" }, //Arabic |
| 426 | { 0x2c09, "en-TT" }, //English |
| 427 | { 0x2c0a, "es-AR" }, //Spanish |
| 428 | { 0x2c1a, "sr-Latn-ME" }, //Serbian (Latin) |
| 429 | { 0x3001, "ar-LB" }, //Arabic |
| 430 | { 0x3009, "en-ZW" }, //English |
| 431 | { 0x300a, "es-EC" }, //Spanish |
| 432 | { 0x301a, "sr-Cyrl-ME" }, //Serbian (Cyrillic) |
| 433 | { 0x3401, "ar-KW" }, //Arabic |
| 434 | { 0x3409, "en-PH" }, //English |
| 435 | { 0x340a, "es-CL" }, //Spanish |
| 436 | { 0x3801, "ar-AE" }, //Arabic |
| 437 | { 0x380a, "es-UY" }, //Spanish |
| 438 | { 0x3c01, "ar-BH" }, //Arabic |
| 439 | { 0x3c0a, "es-PY" }, //Spanish |
| 440 | { 0x4001, "ar-QA" }, //Arabic |
| 441 | { 0x4009, "en-IN" }, //English |
| 442 | { 0x400a, "es-BO" }, //Spanish |
| 443 | { 0x4409, "en-MY" }, //English |
| 444 | { 0x440a, "es-SV" }, //Spanish |
| 445 | { 0x4809, "en-SG" }, //English |
| 446 | { 0x480a, "es-HN" }, //Spanish |
| 447 | { 0x4c0a, "es-NI" }, //Spanish |
| 448 | { 0x500a, "es-PR" }, //Spanish |
| 449 | { 0x540a, "es-US" }, //Spanish |
| 450 | }; |
| 451 | |
bungeman@google.com | 5df7434 | 2013-04-02 14:40:44 +0000 | [diff] [blame] | 452 | namespace { |
bsalomon@google.com | 20f7f17 | 2013-05-17 19:05:03 +0000 | [diff] [blame] | 453 | bool BCP47FromLanguageIdLess(const BCP47FromLanguageId& a, const BCP47FromLanguageId& b) { |
| 454 | return a.languageID < b.languageID; |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 455 | } |
bungeman@google.com | 5df7434 | 2013-04-02 14:40:44 +0000 | [diff] [blame] | 456 | } |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 457 | |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 458 | bool SkOTTableName::Iterator::next(SkOTTableName::Iterator::Record& record) { |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 459 | SkOTTableName nameTable; |
| 460 | if (fNameTableSize < sizeof(nameTable)) { |
| 461 | return false; |
| 462 | } |
| 463 | memcpy(&nameTable, fNameTable, sizeof(nameTable)); |
| 464 | |
| 465 | const uint8_t* nameRecords = fNameTable + sizeof(nameTable); |
| 466 | const size_t nameRecordsSize = fNameTableSize - sizeof(nameTable); |
| 467 | |
| 468 | const size_t stringTableOffset = SkEndian_SwapBE16(nameTable.stringOffset); |
| 469 | if (fNameTableSize < stringTableOffset) { |
| 470 | return false; |
| 471 | } |
| 472 | const uint8_t* stringTable = fNameTable + stringTableOffset; |
| 473 | const size_t stringTableSize = fNameTableSize - stringTableOffset; |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 474 | |
| 475 | // Find the next record which matches the requested type. |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 476 | SkOTTableName::Record nameRecord; |
| 477 | const size_t nameRecordsCount = SkEndian_SwapBE16(nameTable.count); |
| 478 | const size_t nameRecordsMax = SkTMin(nameRecordsCount, nameRecordsSize / sizeof(nameRecord)); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 479 | do { |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 480 | if (fIndex >= nameRecordsMax) { |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 481 | return false; |
| 482 | } |
| 483 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 484 | memcpy(&nameRecord, nameRecords + sizeof(nameRecord)*fIndex, sizeof(nameRecord)); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 485 | ++fIndex; |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 486 | } while (fType != -1 && nameRecord.nameID.fontSpecific != fType); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 487 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 488 | record.type = nameRecord.nameID.fontSpecific; |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 489 | |
| 490 | // Decode the name into UTF-8. |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 491 | const size_t nameOffset = SkEndian_SwapBE16(nameRecord.offset); |
| 492 | const size_t nameLength = SkEndian_SwapBE16(nameRecord.length); |
| 493 | if (stringTableSize < nameOffset + nameLength) { |
| 494 | return false; // continue? |
| 495 | } |
| 496 | const uint8_t* nameString = stringTable + nameOffset; |
| 497 | switch (nameRecord.platformID.value) { |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 498 | case SkOTTableName::Record::PlatformID::Windows: |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 499 | if (SkOTTableName::Record::EncodingID::Windows::UnicodeBMPUCS2 |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 500 | != nameRecord.encodingID.windows.value |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 501 | && SkOTTableName::Record::EncodingID::Windows::UnicodeUCS4 |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 502 | != nameRecord.encodingID.windows.value |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 503 | && SkOTTableName::Record::EncodingID::Windows::Symbol |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 504 | != nameRecord.encodingID.windows.value) |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 505 | { |
| 506 | record.name.reset(); |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 507 | break; // continue? |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 508 | } |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 509 | case SkOTTableName::Record::PlatformID::Unicode: |
| 510 | case SkOTTableName::Record::PlatformID::ISO: |
Mike Klein | 475c5e9 | 2018-08-08 10:23:17 -0400 | [diff] [blame] | 511 | SkString_from_UTF16BE(nameString, nameLength, record.name); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 512 | break; |
| 513 | |
| 514 | case SkOTTableName::Record::PlatformID::Macintosh: |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 515 | // TODO: need better decoding, especially on Mac. |
| 516 | if (SkOTTableName::Record::EncodingID::Macintosh::Roman |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 517 | != nameRecord.encodingID.macintosh.value) |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 518 | { |
| 519 | record.name.reset(); |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 520 | break; // continue? |
bungeman@google.com | a980269 | 2013-08-07 02:45:25 +0000 | [diff] [blame] | 521 | } |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 522 | SkStringFromMacRoman(nameString, nameLength, record.name); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 523 | break; |
| 524 | |
| 525 | case SkOTTableName::Record::PlatformID::Custom: |
| 526 | // These should never appear in a 'name' table. |
| 527 | default: |
| 528 | SkASSERT(false); |
| 529 | record.name.reset(); |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 530 | break; // continue? |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 531 | } |
| 532 | |
| 533 | // Determine the language. |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 534 | const uint16_t languageID = SkEndian_SwapBE16(nameRecord.languageID.languageTagID); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 535 | |
| 536 | // Handle format 1 languages. |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 537 | if (SkOTTableName::format_1 == nameTable.format && languageID >= 0x8000) { |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 538 | const uint16_t languageTagRecordIndex = languageID - 0x8000; |
| 539 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 540 | if (nameRecordsSize < sizeof(nameRecord)*nameRecordsCount) { |
| 541 | return false; //"und" or break? |
| 542 | } |
| 543 | const uint8_t* format1extData = nameRecords + sizeof(nameRecord)*nameRecordsCount; |
| 544 | size_t format1extSize = nameRecordsSize - sizeof(nameRecord)*nameRecordsCount; |
| 545 | SkOTTableName::Format1Ext format1ext; |
| 546 | if (format1extSize < sizeof(format1ext)) { |
| 547 | return false; // "und" or break? |
| 548 | } |
| 549 | memcpy(&format1ext, format1extData, sizeof(format1ext)); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 550 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 551 | const uint8_t* languageTagRecords = format1extData + sizeof(format1ext); |
| 552 | size_t languageTagRecordsSize = format1extSize - sizeof(format1ext); |
| 553 | if (languageTagRecordIndex < SkEndian_SwapBE16(format1ext.langTagCount)) { |
| 554 | SkOTTableName::Format1Ext::LangTagRecord languageTagRecord; |
| 555 | if (languageTagRecordsSize < sizeof(languageTagRecord)*(languageTagRecordIndex+1)) { |
| 556 | return false; // "und"? |
| 557 | } |
| 558 | const uint8_t* languageTagData = languageTagRecords |
| 559 | + sizeof(languageTagRecord)*languageTagRecordIndex; |
| 560 | memcpy(&languageTagRecord, languageTagData, sizeof(languageTagRecord)); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 561 | |
Ben Wagner | ad031f5 | 2018-08-20 13:45:57 -0400 | [diff] [blame^] | 562 | uint16_t languageOffset = SkEndian_SwapBE16(languageTagRecord.offset); |
| 563 | uint16_t languageLength = SkEndian_SwapBE16(languageTagRecord.length); |
| 564 | |
| 565 | if (fNameTableSize < stringTableOffset + languageOffset + languageLength) { |
| 566 | return false; // "und"? |
| 567 | } |
| 568 | const uint8_t* languageString = stringTable + languageOffset; |
| 569 | SkString_from_UTF16BE(languageString, languageLength, record.language); |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 570 | return true; |
| 571 | } |
| 572 | } |
| 573 | |
| 574 | // Handle format 0 languages, translating them into BCP 47. |
| 575 | const BCP47FromLanguageId target = { languageID, "" }; |
bsalomon@google.com | 20f7f17 | 2013-05-17 19:05:03 +0000 | [diff] [blame] | 576 | int languageIndex = SkTSearch<BCP47FromLanguageId, BCP47FromLanguageIdLess>( |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 577 | BCP47FromLanguageID, SK_ARRAY_COUNT(BCP47FromLanguageID), target, sizeof(target)); |
| 578 | if (languageIndex >= 0) { |
| 579 | record.language = BCP47FromLanguageID[languageIndex].bcp47; |
| 580 | return true; |
| 581 | } |
| 582 | |
| 583 | // Unknown language, return the BCP 47 code 'und' for 'undetermined'. |
bungeman@google.com | 07a69f8 | 2013-04-02 14:12:38 +0000 | [diff] [blame] | 584 | record.language = "und"; |
| 585 | return true; |
| 586 | } |