blob: 05230f4a801f5ffe22e076f9cf27ab2717308904 [file] [log] [blame]
bungeman@google.com07a69f82013-04-02 14:12:38 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8#include "SkOTTable_name.h"
9
10#include "SkEndian.h"
Hal Canaryee08b4a2018-03-01 15:56:37 -050011#include "SkStringUtils.h"
bungeman@google.com07a69f82013-04-02 14:12:38 +000012#include "SkTSearch.h"
commit-bot@chromium.orgb5e34e22013-05-07 15:28:15 +000013#include "SkTemplates.h"
bungeman@google.com07a69f82013-04-02 14:12:38 +000014#include "SkUtils.h"
15
Mike Klein475c5e92018-08-08 10:23:17 -040016static SkUnichar next_unichar_UTF16BE(const char** srcPtr) {
bungeman@google.com07a69f82013-04-02 14:12:38 +000017 SkASSERT(srcPtr && *srcPtr);
18
Mike Klein475c5e92018-08-08 10:23:17 -040019 const char* src = *srcPtr;
20 uint16_t lo;
21 memcpy(&lo, src, 2);
22 src += 2;
23
24 SkUnichar c = SkEndian_SwapBE16(lo);
bungeman@google.com07a69f82013-04-02 14:12:38 +000025
26 SkASSERT(!SkUTF16_IsLowSurrogate(c));
27 if (SkUTF16_IsHighSurrogate(c)) {
Mike Klein475c5e92018-08-08 10:23:17 -040028 uint16_t hi;
29 memcpy(&hi, src, 2);
30 src += 2;
31 unsigned c2 = SkEndian_SwapBE16(hi);
bungeman@google.com07a69f82013-04-02 14:12:38 +000032 SkASSERT(SkUTF16_IsLowSurrogate(c2));
33
34 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
35 }
36 *srcPtr = src;
37 return c;
38}
39
Mike Klein475c5e92018-08-08 10:23:17 -040040static void SkString_from_UTF16BE(const char* utf16be, size_t length, SkString& utf8) {
41 // Note that utf16be may not be 2-byte aligned.
halcanary96fcdcc2015-08-27 07:41:13 -070042 SkASSERT(utf16be != nullptr);
bungeman@google.com07a69f82013-04-02 14:12:38 +000043
44 utf8.reset();
45 size_t numberOf16BitValues = length / 2;
Mike Klein475c5e92018-08-08 10:23:17 -040046 const char* end = utf16be + numberOf16BitValues*2;
bungeman@google.com07a69f82013-04-02 14:12:38 +000047 while (utf16be < end) {
Mike Klein475c5e92018-08-08 10:23:17 -040048 utf8.appendUnichar(next_unichar_UTF16BE(&utf16be));
bungeman@google.com07a69f82013-04-02 14:12:38 +000049 }
50}
51
52/** UnicodeFromMacRoman[macRomanPoint - 0x80] -> unicodeCodePoint.
53 * Derived from http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT .
54 * In MacRoman the first 128 code points match ASCII code points.
55 * This maps the second 128 MacRoman code points to unicode code points.
56 */
Bruce Dawson34194692016-12-29 14:05:39 -080057static const uint16_t UnicodeFromMacRoman[0x80] = {
bungeman@google.com07a69f82013-04-02 14:12:38 +000058 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
59 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
60 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
61 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
62 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
63 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
64 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
65 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
66 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
67 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
68 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
69 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
70 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
71 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
72 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
73 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
74};
75
76static void SkStringFromMacRoman(const uint8_t* macRoman, size_t length, SkString& utf8) {
77 utf8.reset();
78 for (size_t i = 0; i < length; ++i) {
mtklein@google.com1e4c4fe2013-11-04 21:22:45 +000079 utf8.appendUnichar(macRoman[i] < 0x80 ? macRoman[i]
80 : UnicodeFromMacRoman[macRoman[i] - 0x80]);
bungeman@google.com07a69f82013-04-02 14:12:38 +000081 }
82}
83
Bruce Dawson34194692016-12-29 14:05:39 -080084static const struct BCP47FromLanguageId {
bungeman@google.com07a69f82013-04-02 14:12:38 +000085 uint16_t languageID;
86 const char* bcp47;
87}
88/** The Mac and Windows values do not conflict, so this is currently one single table. */
89BCP47FromLanguageID[] = {
90 /** A mapping from Mac Language Designators to BCP 47 codes.
91 * The following list was constructed more or less manually.
92 * Apple now uses BCP 47 (post OSX10.4), so there will be no new entries.
93 */
94 {0, "en"}, //English
95 {1, "fr"}, //French
96 {2, "de"}, //German
97 {3, "it"}, //Italian
98 {4, "nl"}, //Dutch
99 {5, "sv"}, //Swedish
100 {6, "es"}, //Spanish
101 {7, "da"}, //Danish
102 {8, "pt"}, //Portuguese
103 {9, "nb"}, //Norwegian
104 {10, "he"}, //Hebrew
105 {11, "ja"}, //Japanese
106 {12, "ar"}, //Arabic
107 {13, "fi"}, //Finnish
108 {14, "el"}, //Greek
109 {15, "is"}, //Icelandic
110 {16, "mt"}, //Maltese
111 {17, "tr"}, //Turkish
112 {18, "hr"}, //Croatian
113 {19, "zh-Hant"}, //Chinese (Traditional)
114 {20, "ur"}, //Urdu
115 {21, "hi"}, //Hindi
116 {22, "th"}, //Thai
117 {23, "ko"}, //Korean
118 {24, "lt"}, //Lithuanian
119 {25, "pl"}, //Polish
120 {26, "hu"}, //Hungarian
121 {27, "et"}, //Estonian
122 {28, "lv"}, //Latvian
123 {29, "se"}, //Sami
124 {30, "fo"}, //Faroese
125 {31, "fa"}, //Farsi (Persian)
126 {32, "ru"}, //Russian
127 {33, "zh-Hans"}, //Chinese (Simplified)
128 {34, "nl"}, //Dutch
129 {35, "ga"}, //Irish(Gaelic)
130 {36, "sq"}, //Albanian
131 {37, "ro"}, //Romanian
132 {38, "cs"}, //Czech
133 {39, "sk"}, //Slovak
134 {40, "sl"}, //Slovenian
135 {41, "yi"}, //Yiddish
136 {42, "sr"}, //Serbian
137 {43, "mk"}, //Macedonian
138 {44, "bg"}, //Bulgarian
139 {45, "uk"}, //Ukrainian
140 {46, "be"}, //Byelorussian
141 {47, "uz"}, //Uzbek
142 {48, "kk"}, //Kazakh
143 {49, "az-Cyrl"}, //Azerbaijani (Cyrillic)
144 {50, "az-Arab"}, //Azerbaijani (Arabic)
145 {51, "hy"}, //Armenian
146 {52, "ka"}, //Georgian
147 {53, "mo"}, //Moldavian
148 {54, "ky"}, //Kirghiz
149 {55, "tg"}, //Tajiki
150 {56, "tk"}, //Turkmen
151 {57, "mn-Mong"}, //Mongolian (Traditional)
152 {58, "mn-Cyrl"}, //Mongolian (Cyrillic)
153 {59, "ps"}, //Pashto
154 {60, "ku"}, //Kurdish
155 {61, "ks"}, //Kashmiri
156 {62, "sd"}, //Sindhi
157 {63, "bo"}, //Tibetan
158 {64, "ne"}, //Nepali
159 {65, "sa"}, //Sanskrit
160 {66, "mr"}, //Marathi
161 {67, "bn"}, //Bengali
162 {68, "as"}, //Assamese
163 {69, "gu"}, //Gujarati
164 {70, "pa"}, //Punjabi
165 {71, "or"}, //Oriya
166 {72, "ml"}, //Malayalam
167 {73, "kn"}, //Kannada
168 {74, "ta"}, //Tamil
169 {75, "te"}, //Telugu
170 {76, "si"}, //Sinhalese
171 {77, "my"}, //Burmese
172 {78, "km"}, //Khmer
173 {79, "lo"}, //Lao
174 {80, "vi"}, //Vietnamese
175 {81, "id"}, //Indonesian
176 {82, "tl"}, //Tagalog
177 {83, "ms-Latn"}, //Malay (Roman)
178 {84, "ms-Arab"}, //Malay (Arabic)
179 {85, "am"}, //Amharic
180 {86, "ti"}, //Tigrinya
181 {87, "om"}, //Oromo
182 {88, "so"}, //Somali
183 {89, "sw"}, //Swahili
184 {90, "rw"}, //Kinyarwanda/Ruanda
185 {91, "rn"}, //Rundi
186 {92, "ny"}, //Nyanja/Chewa
187 {93, "mg"}, //Malagasy
188 {94, "eo"}, //Esperanto
189 {128, "cy"}, //Welsh
190 {129, "eu"}, //Basque
191 {130, "ca"}, //Catalan
192 {131, "la"}, //Latin
193 {132, "qu"}, //Quechua
194 {133, "gn"}, //Guarani
195 {134, "ay"}, //Aymara
196 {135, "tt"}, //Tatar
197 {136, "ug"}, //Uighur
198 {137, "dz"}, //Dzongkha
199 {138, "jv-Latn"}, //Javanese (Roman)
200 {139, "su-Latn"}, //Sundanese (Roman)
201 {140, "gl"}, //Galician
202 {141, "af"}, //Afrikaans
203 {142, "br"}, //Breton
204 {143, "iu"}, //Inuktitut
205 {144, "gd"}, //Scottish (Gaelic)
206 {145, "gv"}, //Manx (Gaelic)
207 {146, "ga"}, //Irish (Gaelic with Lenition)
208 {147, "to"}, //Tongan
209 {148, "el"}, //Greek (Polytonic) Note: ISO 15924 does not have an equivalent script name.
210 {149, "kl"}, //Greenlandic
211 {150, "az-Latn"}, //Azerbaijani (Roman)
212 {151, "nn"}, //Nynorsk
213
214 /** A mapping from Windows LCID to BCP 47 codes.
215 * This list is the sorted, curated output of tools/win_lcid.cpp.
216 * Note that these are sorted by value for quick binary lookup, and not logically by lsb.
217 * The 'bare' language ids (e.g. 0x0001 for Arabic) are ommitted
218 * as they do not appear as valid language ids in the OpenType specification.
219 */
220 { 0x0401, "ar-SA" }, //Arabic
221 { 0x0402, "bg-BG" }, //Bulgarian
222 { 0x0403, "ca-ES" }, //Catalan
223 { 0x0404, "zh-TW" }, //Chinese (Traditional)
224 { 0x0405, "cs-CZ" }, //Czech
225 { 0x0406, "da-DK" }, //Danish
226 { 0x0407, "de-DE" }, //German
227 { 0x0408, "el-GR" }, //Greek
228 { 0x0409, "en-US" }, //English
229 { 0x040a, "es-ES_tradnl" }, //Spanish
230 { 0x040b, "fi-FI" }, //Finnish
231 { 0x040c, "fr-FR" }, //French
232 { 0x040d, "he-IL" }, //Hebrew
233 { 0x040d, "he" }, //Hebrew
234 { 0x040e, "hu-HU" }, //Hungarian
235 { 0x040e, "hu" }, //Hungarian
236 { 0x040f, "is-IS" }, //Icelandic
237 { 0x0410, "it-IT" }, //Italian
238 { 0x0411, "ja-JP" }, //Japanese
239 { 0x0412, "ko-KR" }, //Korean
240 { 0x0413, "nl-NL" }, //Dutch
Ben Wagner4bcb4c72016-07-19 15:55:16 -0400241 { 0x0414, "nb-NO" }, //Norwegian (Bokmål)
bungeman@google.com07a69f82013-04-02 14:12:38 +0000242 { 0x0415, "pl-PL" }, //Polish
243 { 0x0416, "pt-BR" }, //Portuguese
244 { 0x0417, "rm-CH" }, //Romansh
245 { 0x0418, "ro-RO" }, //Romanian
246 { 0x0419, "ru-RU" }, //Russian
247 { 0x041a, "hr-HR" }, //Croatian
248 { 0x041b, "sk-SK" }, //Slovak
249 { 0x041c, "sq-AL" }, //Albanian
250 { 0x041d, "sv-SE" }, //Swedish
251 { 0x041e, "th-TH" }, //Thai
252 { 0x041f, "tr-TR" }, //Turkish
253 { 0x0420, "ur-PK" }, //Urdu
254 { 0x0421, "id-ID" }, //Indonesian
255 { 0x0422, "uk-UA" }, //Ukrainian
256 { 0x0423, "be-BY" }, //Belarusian
257 { 0x0424, "sl-SI" }, //Slovenian
258 { 0x0425, "et-EE" }, //Estonian
259 { 0x0426, "lv-LV" }, //Latvian
260 { 0x0427, "lt-LT" }, //Lithuanian
261 { 0x0428, "tg-Cyrl-TJ" }, //Tajik (Cyrillic)
262 { 0x0429, "fa-IR" }, //Persian
263 { 0x042a, "vi-VN" }, //Vietnamese
264 { 0x042b, "hy-AM" }, //Armenian
265 { 0x042c, "az-Latn-AZ" }, //Azeri (Latin)
266 { 0x042d, "eu-ES" }, //Basque
267 { 0x042e, "hsb-DE" }, //Upper Sorbian
268 { 0x042f, "mk-MK" }, //Macedonian (FYROM)
269 { 0x0432, "tn-ZA" }, //Setswana
270 { 0x0434, "xh-ZA" }, //isiXhosa
271 { 0x0435, "zu-ZA" }, //isiZulu
272 { 0x0436, "af-ZA" }, //Afrikaans
273 { 0x0437, "ka-GE" }, //Georgian
274 { 0x0438, "fo-FO" }, //Faroese
275 { 0x0439, "hi-IN" }, //Hindi
276 { 0x043a, "mt-MT" }, //Maltese
277 { 0x043b, "se-NO" }, //Sami (Northern)
278 { 0x043e, "ms-MY" }, //Malay
279 { 0x043f, "kk-KZ" }, //Kazakh
280 { 0x0440, "ky-KG" }, //Kyrgyz
281 { 0x0441, "sw-KE" }, //Kiswahili
282 { 0x0442, "tk-TM" }, //Turkmen
283 { 0x0443, "uz-Latn-UZ" }, //Uzbek (Latin)
284 { 0x0443, "uz" }, //Uzbek
285 { 0x0444, "tt-RU" }, //Tatar
286 { 0x0445, "bn-IN" }, //Bengali
287 { 0x0446, "pa-IN" }, //Punjabi
288 { 0x0447, "gu-IN" }, //Gujarati
289 { 0x0448, "or-IN" }, //Oriya
290 { 0x0449, "ta-IN" }, //Tamil
291 { 0x044a, "te-IN" }, //Telugu
292 { 0x044b, "kn-IN" }, //Kannada
293 { 0x044c, "ml-IN" }, //Malayalam
294 { 0x044d, "as-IN" }, //Assamese
295 { 0x044e, "mr-IN" }, //Marathi
296 { 0x044f, "sa-IN" }, //Sanskrit
297 { 0x0450, "mn-Cyrl" }, //Mongolian (Cyrillic)
298 { 0x0451, "bo-CN" }, //Tibetan
299 { 0x0452, "cy-GB" }, //Welsh
300 { 0x0453, "km-KH" }, //Khmer
301 { 0x0454, "lo-LA" }, //Lao
302 { 0x0456, "gl-ES" }, //Galician
303 { 0x0457, "kok-IN" }, //Konkani
304 { 0x045a, "syr-SY" }, //Syriac
305 { 0x045b, "si-LK" }, //Sinhala
306 { 0x045d, "iu-Cans-CA" }, //Inuktitut (Syllabics)
307 { 0x045e, "am-ET" }, //Amharic
308 { 0x0461, "ne-NP" }, //Nepali
309 { 0x0462, "fy-NL" }, //Frisian
310 { 0x0463, "ps-AF" }, //Pashto
311 { 0x0464, "fil-PH" }, //Filipino
312 { 0x0465, "dv-MV" }, //Divehi
313 { 0x0468, "ha-Latn-NG" }, //Hausa (Latin)
314 { 0x046a, "yo-NG" }, //Yoruba
315 { 0x046b, "quz-BO" }, //Quechua
316 { 0x046c, "nso-ZA" }, //Sesotho sa Leboa
317 { 0x046d, "ba-RU" }, //Bashkir
318 { 0x046e, "lb-LU" }, //Luxembourgish
319 { 0x046f, "kl-GL" }, //Greenlandic
320 { 0x0470, "ig-NG" }, //Igbo
321 { 0x0478, "ii-CN" }, //Yi
322 { 0x047a, "arn-CL" }, //Mapudungun
323 { 0x047c, "moh-CA" }, //Mohawk
324 { 0x047e, "br-FR" }, //Breton
325 { 0x0480, "ug-CN" }, //Uyghur
326 { 0x0481, "mi-NZ" }, //Maori
327 { 0x0482, "oc-FR" }, //Occitan
328 { 0x0483, "co-FR" }, //Corsican
329 { 0x0484, "gsw-FR" }, //Alsatian
330 { 0x0485, "sah-RU" }, //Yakut
331 { 0x0486, "qut-GT" }, //K'iche
332 { 0x0487, "rw-RW" }, //Kinyarwanda
333 { 0x0488, "wo-SN" }, //Wolof
334 { 0x048c, "prs-AF" }, //Dari
335 { 0x0491, "gd-GB" }, //Scottish Gaelic
336 { 0x0801, "ar-IQ" }, //Arabic
337 { 0x0804, "zh-Hans" }, //Chinese (Simplified)
338 { 0x0807, "de-CH" }, //German
339 { 0x0809, "en-GB" }, //English
340 { 0x080a, "es-MX" }, //Spanish
341 { 0x080c, "fr-BE" }, //French
342 { 0x0810, "it-CH" }, //Italian
343 { 0x0813, "nl-BE" }, //Dutch
344 { 0x0814, "nn-NO" }, //Norwegian (Nynorsk)
345 { 0x0816, "pt-PT" }, //Portuguese
346 { 0x081a, "sr-Latn-CS" }, //Serbian (Latin)
347 { 0x081d, "sv-FI" }, //Swedish
348 { 0x082c, "az-Cyrl-AZ" }, //Azeri (Cyrillic)
349 { 0x082e, "dsb-DE" }, //Lower Sorbian
350 { 0x082e, "dsb" }, //Lower Sorbian
351 { 0x083b, "se-SE" }, //Sami (Northern)
352 { 0x083c, "ga-IE" }, //Irish
353 { 0x083e, "ms-BN" }, //Malay
354 { 0x0843, "uz-Cyrl-UZ" }, //Uzbek (Cyrillic)
355 { 0x0845, "bn-BD" }, //Bengali
356 { 0x0850, "mn-Mong-CN" }, //Mongolian (Traditional Mongolian)
357 { 0x085d, "iu-Latn-CA" }, //Inuktitut (Latin)
358 { 0x085f, "tzm-Latn-DZ" }, //Tamazight (Latin)
359 { 0x086b, "quz-EC" }, //Quechua
360 { 0x0c01, "ar-EG" }, //Arabic
361 { 0x0c04, "zh-Hant" }, //Chinese (Traditional)
362 { 0x0c07, "de-AT" }, //German
363 { 0x0c09, "en-AU" }, //English
364 { 0x0c0a, "es-ES" }, //Spanish
365 { 0x0c0c, "fr-CA" }, //French
366 { 0x0c1a, "sr-Cyrl-CS" }, //Serbian (Cyrillic)
367 { 0x0c3b, "se-FI" }, //Sami (Northern)
368 { 0x0c6b, "quz-PE" }, //Quechua
369 { 0x1001, "ar-LY" }, //Arabic
370 { 0x1004, "zh-SG" }, //Chinese (Simplified)
371 { 0x1007, "de-LU" }, //German
372 { 0x1009, "en-CA" }, //English
373 { 0x100a, "es-GT" }, //Spanish
374 { 0x100c, "fr-CH" }, //French
375 { 0x101a, "hr-BA" }, //Croatian (Latin)
376 { 0x103b, "smj-NO" }, //Sami (Lule)
377 { 0x1401, "ar-DZ" }, //Arabic
378 { 0x1404, "zh-MO" }, //Chinese (Traditional)
379 { 0x1407, "de-LI" }, //German
380 { 0x1409, "en-NZ" }, //English
381 { 0x140a, "es-CR" }, //Spanish
382 { 0x140c, "fr-LU" }, //French
383 { 0x141a, "bs-Latn-BA" }, //Bosnian (Latin)
384 { 0x141a, "bs" }, //Bosnian
385 { 0x143b, "smj-SE" }, //Sami (Lule)
386 { 0x143b, "smj" }, //Sami (Lule)
387 { 0x1801, "ar-MA" }, //Arabic
388 { 0x1809, "en-IE" }, //English
389 { 0x180a, "es-PA" }, //Spanish
390 { 0x180c, "fr-MC" }, //French
391 { 0x181a, "sr-Latn-BA" }, //Serbian (Latin)
392 { 0x183b, "sma-NO" }, //Sami (Southern)
393 { 0x1c01, "ar-TN" }, //Arabic
394 { 0x1c09, "en-ZA" }, //English
395 { 0x1c0a, "es-DO" }, //Spanish
396 { 0x1c1a, "sr-Cyrl-BA" }, //Serbian (Cyrillic)
397 { 0x1c3b, "sma-SE" }, //Sami (Southern)
398 { 0x1c3b, "sma" }, //Sami (Southern)
399 { 0x2001, "ar-OM" }, //Arabic
400 { 0x2009, "en-JM" }, //English
401 { 0x200a, "es-VE" }, //Spanish
402 { 0x201a, "bs-Cyrl-BA" }, //Bosnian (Cyrillic)
403 { 0x201a, "bs-Cyrl" }, //Bosnian (Cyrillic)
404 { 0x203b, "sms-FI" }, //Sami (Skolt)
405 { 0x203b, "sms" }, //Sami (Skolt)
406 { 0x2401, "ar-YE" }, //Arabic
407 { 0x2409, "en-029" }, //English
408 { 0x240a, "es-CO" }, //Spanish
409 { 0x241a, "sr-Latn-RS" }, //Serbian (Latin)
410 { 0x243b, "smn-FI" }, //Sami (Inari)
411 { 0x2801, "ar-SY" }, //Arabic
412 { 0x2809, "en-BZ" }, //English
413 { 0x280a, "es-PE" }, //Spanish
414 { 0x281a, "sr-Cyrl-RS" }, //Serbian (Cyrillic)
415 { 0x2c01, "ar-JO" }, //Arabic
416 { 0x2c09, "en-TT" }, //English
417 { 0x2c0a, "es-AR" }, //Spanish
418 { 0x2c1a, "sr-Latn-ME" }, //Serbian (Latin)
419 { 0x3001, "ar-LB" }, //Arabic
420 { 0x3009, "en-ZW" }, //English
421 { 0x300a, "es-EC" }, //Spanish
422 { 0x301a, "sr-Cyrl-ME" }, //Serbian (Cyrillic)
423 { 0x3401, "ar-KW" }, //Arabic
424 { 0x3409, "en-PH" }, //English
425 { 0x340a, "es-CL" }, //Spanish
426 { 0x3801, "ar-AE" }, //Arabic
427 { 0x380a, "es-UY" }, //Spanish
428 { 0x3c01, "ar-BH" }, //Arabic
429 { 0x3c0a, "es-PY" }, //Spanish
430 { 0x4001, "ar-QA" }, //Arabic
431 { 0x4009, "en-IN" }, //English
432 { 0x400a, "es-BO" }, //Spanish
433 { 0x4409, "en-MY" }, //English
434 { 0x440a, "es-SV" }, //Spanish
435 { 0x4809, "en-SG" }, //English
436 { 0x480a, "es-HN" }, //Spanish
437 { 0x4c0a, "es-NI" }, //Spanish
438 { 0x500a, "es-PR" }, //Spanish
439 { 0x540a, "es-US" }, //Spanish
440};
441
bungeman@google.com5df74342013-04-02 14:40:44 +0000442namespace {
bsalomon@google.com20f7f172013-05-17 19:05:03 +0000443bool BCP47FromLanguageIdLess(const BCP47FromLanguageId& a, const BCP47FromLanguageId& b) {
444 return a.languageID < b.languageID;
bungeman@google.com07a69f82013-04-02 14:12:38 +0000445}
bungeman@google.com5df74342013-04-02 14:40:44 +0000446}
bungeman@google.com07a69f82013-04-02 14:12:38 +0000447
bungeman@google.com07a69f82013-04-02 14:12:38 +0000448bool SkOTTableName::Iterator::next(SkOTTableName::Iterator::Record& record) {
449 const size_t nameRecordsCount = SkEndian_SwapBE16(fName.count);
450 const SkOTTableName::Record* nameRecords = SkTAfter<const SkOTTableName::Record>(&fName);
451 const SkOTTableName::Record* nameRecord;
452
453 // Find the next record which matches the requested type.
454 do {
455 if (fIndex >= nameRecordsCount) {
456 return false;
457 }
458
459 nameRecord = &nameRecords[fIndex];
460 ++fIndex;
461 } while (fType != -1 && nameRecord->nameID.fontSpecific != fType);
462
bungeman@google.coma9802692013-08-07 02:45:25 +0000463 record.type = nameRecord->nameID.fontSpecific;
464
bungeman@google.com07a69f82013-04-02 14:12:38 +0000465 const uint16_t stringTableOffset = SkEndian_SwapBE16(fName.stringOffset);
commit-bot@chromium.orgb5e34e22013-05-07 15:28:15 +0000466 const char* stringTable = SkTAddOffset<const char>(&fName, stringTableOffset);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000467
468 // Decode the name into UTF-8.
469 const uint16_t nameOffset = SkEndian_SwapBE16(nameRecord->offset);
470 const uint16_t nameLength = SkEndian_SwapBE16(nameRecord->length);
commit-bot@chromium.orgb5e34e22013-05-07 15:28:15 +0000471 const char* nameString = SkTAddOffset<const char>(stringTable, nameOffset);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000472 switch (nameRecord->platformID.value) {
473 case SkOTTableName::Record::PlatformID::Windows:
bungeman@google.coma9802692013-08-07 02:45:25 +0000474 if (SkOTTableName::Record::EncodingID::Windows::UnicodeBMPUCS2
475 != nameRecord->encodingID.windows.value
476 && SkOTTableName::Record::EncodingID::Windows::UnicodeUCS4
477 != nameRecord->encodingID.windows.value
478 && SkOTTableName::Record::EncodingID::Windows::Symbol
479 != nameRecord->encodingID.windows.value)
480 {
481 record.name.reset();
482 break;
483 }
bungeman@google.com07a69f82013-04-02 14:12:38 +0000484 case SkOTTableName::Record::PlatformID::Unicode:
485 case SkOTTableName::Record::PlatformID::ISO:
Mike Klein475c5e92018-08-08 10:23:17 -0400486 SkString_from_UTF16BE(nameString, nameLength, record.name);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000487 break;
488
489 case SkOTTableName::Record::PlatformID::Macintosh:
bungeman@google.coma9802692013-08-07 02:45:25 +0000490 // TODO: need better decoding, especially on Mac.
491 if (SkOTTableName::Record::EncodingID::Macintosh::Roman
492 != nameRecord->encodingID.macintosh.value)
493 {
494 record.name.reset();
495 break;
496 }
bungeman@google.com07a69f82013-04-02 14:12:38 +0000497 SkStringFromMacRoman((const uint8_t*)nameString, nameLength, record.name);
498 break;
499
500 case SkOTTableName::Record::PlatformID::Custom:
501 // These should never appear in a 'name' table.
502 default:
503 SkASSERT(false);
504 record.name.reset();
505 break;
506 }
507
508 // Determine the language.
509 const uint16_t languageID = SkEndian_SwapBE16(nameRecord->languageID.languageTagID);
510
511 // Handle format 1 languages.
512 if (SkOTTableName::format_1 == fName.format && languageID >= 0x8000) {
513 const uint16_t languageTagRecordIndex = languageID - 0x8000;
514
515 const SkOTTableName::Format1Ext* format1ext =
516 SkTAfter<const SkOTTableName::Format1Ext>(nameRecords, nameRecordsCount);
517
bungeman@google.coma3aaf162013-07-29 22:25:02 +0000518 if (languageTagRecordIndex < SkEndian_SwapBE16(format1ext->langTagCount)) {
bungeman@google.com07a69f82013-04-02 14:12:38 +0000519 const SkOTTableName::Format1Ext::LangTagRecord* languageTagRecord =
520 SkTAfter<const SkOTTableName::Format1Ext::LangTagRecord>(format1ext);
521
522 uint16_t offset = SkEndian_SwapBE16(languageTagRecord[languageTagRecordIndex].offset);
523 uint16_t length = SkEndian_SwapBE16(languageTagRecord[languageTagRecordIndex].length);
Mike Klein475c5e92018-08-08 10:23:17 -0400524 const char* string = SkTAddOffset<const char>(stringTable, offset);
525 SkString_from_UTF16BE(string, length, record.language);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000526 return true;
527 }
528 }
529
530 // Handle format 0 languages, translating them into BCP 47.
531 const BCP47FromLanguageId target = { languageID, "" };
bsalomon@google.com20f7f172013-05-17 19:05:03 +0000532 int languageIndex = SkTSearch<BCP47FromLanguageId, BCP47FromLanguageIdLess>(
bungeman@google.com07a69f82013-04-02 14:12:38 +0000533 BCP47FromLanguageID, SK_ARRAY_COUNT(BCP47FromLanguageID), target, sizeof(target));
534 if (languageIndex >= 0) {
535 record.language = BCP47FromLanguageID[languageIndex].bcp47;
536 return true;
537 }
538
539 // Unknown language, return the BCP 47 code 'und' for 'undetermined'.
bungeman@google.com07a69f82013-04-02 14:12:38 +0000540 record.language = "und";
541 return true;
542}