blob: 4d93674398355d642f3a1cb3a944a87c38500e5b [file] [log] [blame]
bungeman@google.com07a69f82013-04-02 14:12:38 +00001/*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
Mike Kleinc0bd9f92019-04-23 12:05:21 -05008#include "src/sfnt/SkOTTable_name.h"
bungeman@google.com07a69f82013-04-02 14:12:38 +00009
Mike Kleinc0bd9f92019-04-23 12:05:21 -050010#include "include/private/SkTemplates.h"
11#include "src/core/SkEndian.h"
12#include "src/core/SkStringUtils.h"
Ben Wagner8bd6e8f2019-05-15 09:28:52 -040013#include "src/core/SkTSearch.h"
Mike Kleinc0bd9f92019-04-23 12:05:21 -050014#include "src/core/SkUtils.h"
bungeman@google.com07a69f82013-04-02 14:12:38 +000015
Ben Wagnerad031f52018-08-20 13:45:57 -040016static SkUnichar next_unichar_UTF16BE(const uint8_t** srcPtr, size_t* length) {
17 SkASSERT(srcPtr && *srcPtr && length);
18 SkASSERT(*length > 0);
bungeman@google.com07a69f82013-04-02 14:12:38 +000019
Ben Wagnerad031f52018-08-20 13:45:57 -040020 uint16_t leading;
21 if (*length < sizeof(leading)) {
22 *length = 0;
23 return 0xFFFD;
24 }
25 memcpy(&leading, *srcPtr, sizeof(leading));
26 *srcPtr += sizeof(leading);
27 *length -= sizeof(leading);
28 SkUnichar c = SkEndian_SwapBE16(leading);
Mike Klein475c5e92018-08-08 10:23:17 -040029
Ben Wagnerad031f52018-08-20 13:45:57 -040030 if (SkUTF16_IsTrailingSurrogate(c)) {
31 return 0xFFFD;
32 }
33 if (SkUTF16_IsLeadingSurrogate(c)) {
34 uint16_t trailing;
35 if (*length < sizeof(trailing)) {
36 *length = 0;
37 return 0xFFFD;
38 }
39 memcpy(&trailing, *srcPtr, sizeof(trailing));
40 SkUnichar c2 = SkEndian_SwapBE16(trailing);
41 if (!SkUTF16_IsTrailingSurrogate(c2)) {
42 return 0xFFFD;
43 }
44 *srcPtr += sizeof(trailing);
45 *length -= sizeof(trailing);
bungeman@google.com07a69f82013-04-02 14:12:38 +000046
47 c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00);
48 }
bungeman@google.com07a69f82013-04-02 14:12:38 +000049 return c;
50}
51
Ben Wagnerad031f52018-08-20 13:45:57 -040052static void SkString_from_UTF16BE(const uint8_t* utf16be, size_t length, SkString& utf8) {
Mike Klein475c5e92018-08-08 10:23:17 -040053 // Note that utf16be may not be 2-byte aligned.
halcanary96fcdcc2015-08-27 07:41:13 -070054 SkASSERT(utf16be != nullptr);
bungeman@google.com07a69f82013-04-02 14:12:38 +000055
56 utf8.reset();
Ben Wagnerad031f52018-08-20 13:45:57 -040057 while (length) {
58 utf8.appendUnichar(next_unichar_UTF16BE(&utf16be, &length));
bungeman@google.com07a69f82013-04-02 14:12:38 +000059 }
60}
61
62/** UnicodeFromMacRoman[macRomanPoint - 0x80] -> unicodeCodePoint.
63 * Derived from http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT .
64 * In MacRoman the first 128 code points match ASCII code points.
65 * This maps the second 128 MacRoman code points to unicode code points.
66 */
Bruce Dawson34194692016-12-29 14:05:39 -080067static const uint16_t UnicodeFromMacRoman[0x80] = {
bungeman@google.com07a69f82013-04-02 14:12:38 +000068 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
69 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
70 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
71 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
72 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
73 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
74 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
75 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
76 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
77 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
78 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
79 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
80 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
81 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
82 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
83 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
84};
85
86static void SkStringFromMacRoman(const uint8_t* macRoman, size_t length, SkString& utf8) {
87 utf8.reset();
88 for (size_t i = 0; i < length; ++i) {
mtklein@google.com1e4c4fe2013-11-04 21:22:45 +000089 utf8.appendUnichar(macRoman[i] < 0x80 ? macRoman[i]
90 : UnicodeFromMacRoman[macRoman[i] - 0x80]);
bungeman@google.com07a69f82013-04-02 14:12:38 +000091 }
92}
93
Bruce Dawson34194692016-12-29 14:05:39 -080094static const struct BCP47FromLanguageId {
bungeman@google.com07a69f82013-04-02 14:12:38 +000095 uint16_t languageID;
96 const char* bcp47;
97}
98/** The Mac and Windows values do not conflict, so this is currently one single table. */
99BCP47FromLanguageID[] = {
100 /** A mapping from Mac Language Designators to BCP 47 codes.
101 * The following list was constructed more or less manually.
102 * Apple now uses BCP 47 (post OSX10.4), so there will be no new entries.
103 */
104 {0, "en"}, //English
105 {1, "fr"}, //French
106 {2, "de"}, //German
107 {3, "it"}, //Italian
108 {4, "nl"}, //Dutch
109 {5, "sv"}, //Swedish
110 {6, "es"}, //Spanish
111 {7, "da"}, //Danish
112 {8, "pt"}, //Portuguese
113 {9, "nb"}, //Norwegian
114 {10, "he"}, //Hebrew
115 {11, "ja"}, //Japanese
116 {12, "ar"}, //Arabic
117 {13, "fi"}, //Finnish
118 {14, "el"}, //Greek
119 {15, "is"}, //Icelandic
120 {16, "mt"}, //Maltese
121 {17, "tr"}, //Turkish
122 {18, "hr"}, //Croatian
123 {19, "zh-Hant"}, //Chinese (Traditional)
124 {20, "ur"}, //Urdu
125 {21, "hi"}, //Hindi
126 {22, "th"}, //Thai
127 {23, "ko"}, //Korean
128 {24, "lt"}, //Lithuanian
129 {25, "pl"}, //Polish
130 {26, "hu"}, //Hungarian
131 {27, "et"}, //Estonian
132 {28, "lv"}, //Latvian
133 {29, "se"}, //Sami
134 {30, "fo"}, //Faroese
135 {31, "fa"}, //Farsi (Persian)
136 {32, "ru"}, //Russian
137 {33, "zh-Hans"}, //Chinese (Simplified)
138 {34, "nl"}, //Dutch
139 {35, "ga"}, //Irish(Gaelic)
140 {36, "sq"}, //Albanian
141 {37, "ro"}, //Romanian
142 {38, "cs"}, //Czech
143 {39, "sk"}, //Slovak
144 {40, "sl"}, //Slovenian
145 {41, "yi"}, //Yiddish
146 {42, "sr"}, //Serbian
147 {43, "mk"}, //Macedonian
148 {44, "bg"}, //Bulgarian
149 {45, "uk"}, //Ukrainian
150 {46, "be"}, //Byelorussian
151 {47, "uz"}, //Uzbek
152 {48, "kk"}, //Kazakh
153 {49, "az-Cyrl"}, //Azerbaijani (Cyrillic)
154 {50, "az-Arab"}, //Azerbaijani (Arabic)
155 {51, "hy"}, //Armenian
156 {52, "ka"}, //Georgian
157 {53, "mo"}, //Moldavian
158 {54, "ky"}, //Kirghiz
159 {55, "tg"}, //Tajiki
160 {56, "tk"}, //Turkmen
161 {57, "mn-Mong"}, //Mongolian (Traditional)
162 {58, "mn-Cyrl"}, //Mongolian (Cyrillic)
163 {59, "ps"}, //Pashto
164 {60, "ku"}, //Kurdish
165 {61, "ks"}, //Kashmiri
166 {62, "sd"}, //Sindhi
167 {63, "bo"}, //Tibetan
168 {64, "ne"}, //Nepali
169 {65, "sa"}, //Sanskrit
170 {66, "mr"}, //Marathi
171 {67, "bn"}, //Bengali
172 {68, "as"}, //Assamese
173 {69, "gu"}, //Gujarati
174 {70, "pa"}, //Punjabi
175 {71, "or"}, //Oriya
176 {72, "ml"}, //Malayalam
177 {73, "kn"}, //Kannada
178 {74, "ta"}, //Tamil
179 {75, "te"}, //Telugu
180 {76, "si"}, //Sinhalese
181 {77, "my"}, //Burmese
182 {78, "km"}, //Khmer
183 {79, "lo"}, //Lao
184 {80, "vi"}, //Vietnamese
185 {81, "id"}, //Indonesian
186 {82, "tl"}, //Tagalog
187 {83, "ms-Latn"}, //Malay (Roman)
188 {84, "ms-Arab"}, //Malay (Arabic)
189 {85, "am"}, //Amharic
190 {86, "ti"}, //Tigrinya
191 {87, "om"}, //Oromo
192 {88, "so"}, //Somali
193 {89, "sw"}, //Swahili
194 {90, "rw"}, //Kinyarwanda/Ruanda
195 {91, "rn"}, //Rundi
196 {92, "ny"}, //Nyanja/Chewa
197 {93, "mg"}, //Malagasy
198 {94, "eo"}, //Esperanto
199 {128, "cy"}, //Welsh
200 {129, "eu"}, //Basque
201 {130, "ca"}, //Catalan
202 {131, "la"}, //Latin
203 {132, "qu"}, //Quechua
204 {133, "gn"}, //Guarani
205 {134, "ay"}, //Aymara
206 {135, "tt"}, //Tatar
207 {136, "ug"}, //Uighur
208 {137, "dz"}, //Dzongkha
209 {138, "jv-Latn"}, //Javanese (Roman)
210 {139, "su-Latn"}, //Sundanese (Roman)
211 {140, "gl"}, //Galician
212 {141, "af"}, //Afrikaans
213 {142, "br"}, //Breton
214 {143, "iu"}, //Inuktitut
215 {144, "gd"}, //Scottish (Gaelic)
216 {145, "gv"}, //Manx (Gaelic)
217 {146, "ga"}, //Irish (Gaelic with Lenition)
218 {147, "to"}, //Tongan
219 {148, "el"}, //Greek (Polytonic) Note: ISO 15924 does not have an equivalent script name.
220 {149, "kl"}, //Greenlandic
221 {150, "az-Latn"}, //Azerbaijani (Roman)
222 {151, "nn"}, //Nynorsk
223
224 /** A mapping from Windows LCID to BCP 47 codes.
225 * This list is the sorted, curated output of tools/win_lcid.cpp.
226 * Note that these are sorted by value for quick binary lookup, and not logically by lsb.
227 * The 'bare' language ids (e.g. 0x0001 for Arabic) are ommitted
228 * as they do not appear as valid language ids in the OpenType specification.
229 */
230 { 0x0401, "ar-SA" }, //Arabic
231 { 0x0402, "bg-BG" }, //Bulgarian
232 { 0x0403, "ca-ES" }, //Catalan
233 { 0x0404, "zh-TW" }, //Chinese (Traditional)
234 { 0x0405, "cs-CZ" }, //Czech
235 { 0x0406, "da-DK" }, //Danish
236 { 0x0407, "de-DE" }, //German
237 { 0x0408, "el-GR" }, //Greek
238 { 0x0409, "en-US" }, //English
239 { 0x040a, "es-ES_tradnl" }, //Spanish
240 { 0x040b, "fi-FI" }, //Finnish
241 { 0x040c, "fr-FR" }, //French
242 { 0x040d, "he-IL" }, //Hebrew
243 { 0x040d, "he" }, //Hebrew
244 { 0x040e, "hu-HU" }, //Hungarian
245 { 0x040e, "hu" }, //Hungarian
246 { 0x040f, "is-IS" }, //Icelandic
247 { 0x0410, "it-IT" }, //Italian
248 { 0x0411, "ja-JP" }, //Japanese
249 { 0x0412, "ko-KR" }, //Korean
250 { 0x0413, "nl-NL" }, //Dutch
Ben Wagner4bcb4c72016-07-19 15:55:16 -0400251 { 0x0414, "nb-NO" }, //Norwegian (Bokmål)
bungeman@google.com07a69f82013-04-02 14:12:38 +0000252 { 0x0415, "pl-PL" }, //Polish
253 { 0x0416, "pt-BR" }, //Portuguese
254 { 0x0417, "rm-CH" }, //Romansh
255 { 0x0418, "ro-RO" }, //Romanian
256 { 0x0419, "ru-RU" }, //Russian
257 { 0x041a, "hr-HR" }, //Croatian
258 { 0x041b, "sk-SK" }, //Slovak
259 { 0x041c, "sq-AL" }, //Albanian
260 { 0x041d, "sv-SE" }, //Swedish
261 { 0x041e, "th-TH" }, //Thai
262 { 0x041f, "tr-TR" }, //Turkish
263 { 0x0420, "ur-PK" }, //Urdu
264 { 0x0421, "id-ID" }, //Indonesian
265 { 0x0422, "uk-UA" }, //Ukrainian
266 { 0x0423, "be-BY" }, //Belarusian
267 { 0x0424, "sl-SI" }, //Slovenian
268 { 0x0425, "et-EE" }, //Estonian
269 { 0x0426, "lv-LV" }, //Latvian
270 { 0x0427, "lt-LT" }, //Lithuanian
271 { 0x0428, "tg-Cyrl-TJ" }, //Tajik (Cyrillic)
272 { 0x0429, "fa-IR" }, //Persian
273 { 0x042a, "vi-VN" }, //Vietnamese
274 { 0x042b, "hy-AM" }, //Armenian
275 { 0x042c, "az-Latn-AZ" }, //Azeri (Latin)
276 { 0x042d, "eu-ES" }, //Basque
277 { 0x042e, "hsb-DE" }, //Upper Sorbian
278 { 0x042f, "mk-MK" }, //Macedonian (FYROM)
279 { 0x0432, "tn-ZA" }, //Setswana
280 { 0x0434, "xh-ZA" }, //isiXhosa
281 { 0x0435, "zu-ZA" }, //isiZulu
282 { 0x0436, "af-ZA" }, //Afrikaans
283 { 0x0437, "ka-GE" }, //Georgian
284 { 0x0438, "fo-FO" }, //Faroese
285 { 0x0439, "hi-IN" }, //Hindi
286 { 0x043a, "mt-MT" }, //Maltese
287 { 0x043b, "se-NO" }, //Sami (Northern)
288 { 0x043e, "ms-MY" }, //Malay
289 { 0x043f, "kk-KZ" }, //Kazakh
290 { 0x0440, "ky-KG" }, //Kyrgyz
291 { 0x0441, "sw-KE" }, //Kiswahili
292 { 0x0442, "tk-TM" }, //Turkmen
293 { 0x0443, "uz-Latn-UZ" }, //Uzbek (Latin)
294 { 0x0443, "uz" }, //Uzbek
295 { 0x0444, "tt-RU" }, //Tatar
296 { 0x0445, "bn-IN" }, //Bengali
297 { 0x0446, "pa-IN" }, //Punjabi
298 { 0x0447, "gu-IN" }, //Gujarati
299 { 0x0448, "or-IN" }, //Oriya
300 { 0x0449, "ta-IN" }, //Tamil
301 { 0x044a, "te-IN" }, //Telugu
302 { 0x044b, "kn-IN" }, //Kannada
303 { 0x044c, "ml-IN" }, //Malayalam
304 { 0x044d, "as-IN" }, //Assamese
305 { 0x044e, "mr-IN" }, //Marathi
306 { 0x044f, "sa-IN" }, //Sanskrit
307 { 0x0450, "mn-Cyrl" }, //Mongolian (Cyrillic)
308 { 0x0451, "bo-CN" }, //Tibetan
309 { 0x0452, "cy-GB" }, //Welsh
310 { 0x0453, "km-KH" }, //Khmer
311 { 0x0454, "lo-LA" }, //Lao
312 { 0x0456, "gl-ES" }, //Galician
313 { 0x0457, "kok-IN" }, //Konkani
314 { 0x045a, "syr-SY" }, //Syriac
315 { 0x045b, "si-LK" }, //Sinhala
316 { 0x045d, "iu-Cans-CA" }, //Inuktitut (Syllabics)
317 { 0x045e, "am-ET" }, //Amharic
318 { 0x0461, "ne-NP" }, //Nepali
319 { 0x0462, "fy-NL" }, //Frisian
320 { 0x0463, "ps-AF" }, //Pashto
321 { 0x0464, "fil-PH" }, //Filipino
322 { 0x0465, "dv-MV" }, //Divehi
323 { 0x0468, "ha-Latn-NG" }, //Hausa (Latin)
324 { 0x046a, "yo-NG" }, //Yoruba
325 { 0x046b, "quz-BO" }, //Quechua
326 { 0x046c, "nso-ZA" }, //Sesotho sa Leboa
327 { 0x046d, "ba-RU" }, //Bashkir
328 { 0x046e, "lb-LU" }, //Luxembourgish
329 { 0x046f, "kl-GL" }, //Greenlandic
330 { 0x0470, "ig-NG" }, //Igbo
331 { 0x0478, "ii-CN" }, //Yi
332 { 0x047a, "arn-CL" }, //Mapudungun
333 { 0x047c, "moh-CA" }, //Mohawk
334 { 0x047e, "br-FR" }, //Breton
335 { 0x0480, "ug-CN" }, //Uyghur
336 { 0x0481, "mi-NZ" }, //Maori
337 { 0x0482, "oc-FR" }, //Occitan
338 { 0x0483, "co-FR" }, //Corsican
339 { 0x0484, "gsw-FR" }, //Alsatian
340 { 0x0485, "sah-RU" }, //Yakut
341 { 0x0486, "qut-GT" }, //K'iche
342 { 0x0487, "rw-RW" }, //Kinyarwanda
343 { 0x0488, "wo-SN" }, //Wolof
344 { 0x048c, "prs-AF" }, //Dari
345 { 0x0491, "gd-GB" }, //Scottish Gaelic
346 { 0x0801, "ar-IQ" }, //Arabic
347 { 0x0804, "zh-Hans" }, //Chinese (Simplified)
348 { 0x0807, "de-CH" }, //German
349 { 0x0809, "en-GB" }, //English
350 { 0x080a, "es-MX" }, //Spanish
351 { 0x080c, "fr-BE" }, //French
352 { 0x0810, "it-CH" }, //Italian
353 { 0x0813, "nl-BE" }, //Dutch
354 { 0x0814, "nn-NO" }, //Norwegian (Nynorsk)
355 { 0x0816, "pt-PT" }, //Portuguese
356 { 0x081a, "sr-Latn-CS" }, //Serbian (Latin)
357 { 0x081d, "sv-FI" }, //Swedish
358 { 0x082c, "az-Cyrl-AZ" }, //Azeri (Cyrillic)
359 { 0x082e, "dsb-DE" }, //Lower Sorbian
360 { 0x082e, "dsb" }, //Lower Sorbian
361 { 0x083b, "se-SE" }, //Sami (Northern)
362 { 0x083c, "ga-IE" }, //Irish
363 { 0x083e, "ms-BN" }, //Malay
364 { 0x0843, "uz-Cyrl-UZ" }, //Uzbek (Cyrillic)
365 { 0x0845, "bn-BD" }, //Bengali
366 { 0x0850, "mn-Mong-CN" }, //Mongolian (Traditional Mongolian)
367 { 0x085d, "iu-Latn-CA" }, //Inuktitut (Latin)
368 { 0x085f, "tzm-Latn-DZ" }, //Tamazight (Latin)
369 { 0x086b, "quz-EC" }, //Quechua
370 { 0x0c01, "ar-EG" }, //Arabic
371 { 0x0c04, "zh-Hant" }, //Chinese (Traditional)
372 { 0x0c07, "de-AT" }, //German
373 { 0x0c09, "en-AU" }, //English
374 { 0x0c0a, "es-ES" }, //Spanish
375 { 0x0c0c, "fr-CA" }, //French
376 { 0x0c1a, "sr-Cyrl-CS" }, //Serbian (Cyrillic)
377 { 0x0c3b, "se-FI" }, //Sami (Northern)
378 { 0x0c6b, "quz-PE" }, //Quechua
379 { 0x1001, "ar-LY" }, //Arabic
380 { 0x1004, "zh-SG" }, //Chinese (Simplified)
381 { 0x1007, "de-LU" }, //German
382 { 0x1009, "en-CA" }, //English
383 { 0x100a, "es-GT" }, //Spanish
384 { 0x100c, "fr-CH" }, //French
385 { 0x101a, "hr-BA" }, //Croatian (Latin)
386 { 0x103b, "smj-NO" }, //Sami (Lule)
387 { 0x1401, "ar-DZ" }, //Arabic
388 { 0x1404, "zh-MO" }, //Chinese (Traditional)
389 { 0x1407, "de-LI" }, //German
390 { 0x1409, "en-NZ" }, //English
391 { 0x140a, "es-CR" }, //Spanish
392 { 0x140c, "fr-LU" }, //French
393 { 0x141a, "bs-Latn-BA" }, //Bosnian (Latin)
394 { 0x141a, "bs" }, //Bosnian
395 { 0x143b, "smj-SE" }, //Sami (Lule)
396 { 0x143b, "smj" }, //Sami (Lule)
397 { 0x1801, "ar-MA" }, //Arabic
398 { 0x1809, "en-IE" }, //English
399 { 0x180a, "es-PA" }, //Spanish
400 { 0x180c, "fr-MC" }, //French
401 { 0x181a, "sr-Latn-BA" }, //Serbian (Latin)
402 { 0x183b, "sma-NO" }, //Sami (Southern)
403 { 0x1c01, "ar-TN" }, //Arabic
404 { 0x1c09, "en-ZA" }, //English
405 { 0x1c0a, "es-DO" }, //Spanish
406 { 0x1c1a, "sr-Cyrl-BA" }, //Serbian (Cyrillic)
407 { 0x1c3b, "sma-SE" }, //Sami (Southern)
408 { 0x1c3b, "sma" }, //Sami (Southern)
409 { 0x2001, "ar-OM" }, //Arabic
410 { 0x2009, "en-JM" }, //English
411 { 0x200a, "es-VE" }, //Spanish
412 { 0x201a, "bs-Cyrl-BA" }, //Bosnian (Cyrillic)
413 { 0x201a, "bs-Cyrl" }, //Bosnian (Cyrillic)
414 { 0x203b, "sms-FI" }, //Sami (Skolt)
415 { 0x203b, "sms" }, //Sami (Skolt)
416 { 0x2401, "ar-YE" }, //Arabic
417 { 0x2409, "en-029" }, //English
418 { 0x240a, "es-CO" }, //Spanish
419 { 0x241a, "sr-Latn-RS" }, //Serbian (Latin)
420 { 0x243b, "smn-FI" }, //Sami (Inari)
421 { 0x2801, "ar-SY" }, //Arabic
422 { 0x2809, "en-BZ" }, //English
423 { 0x280a, "es-PE" }, //Spanish
424 { 0x281a, "sr-Cyrl-RS" }, //Serbian (Cyrillic)
425 { 0x2c01, "ar-JO" }, //Arabic
426 { 0x2c09, "en-TT" }, //English
427 { 0x2c0a, "es-AR" }, //Spanish
428 { 0x2c1a, "sr-Latn-ME" }, //Serbian (Latin)
429 { 0x3001, "ar-LB" }, //Arabic
430 { 0x3009, "en-ZW" }, //English
431 { 0x300a, "es-EC" }, //Spanish
432 { 0x301a, "sr-Cyrl-ME" }, //Serbian (Cyrillic)
433 { 0x3401, "ar-KW" }, //Arabic
434 { 0x3409, "en-PH" }, //English
435 { 0x340a, "es-CL" }, //Spanish
436 { 0x3801, "ar-AE" }, //Arabic
437 { 0x380a, "es-UY" }, //Spanish
438 { 0x3c01, "ar-BH" }, //Arabic
439 { 0x3c0a, "es-PY" }, //Spanish
440 { 0x4001, "ar-QA" }, //Arabic
441 { 0x4009, "en-IN" }, //English
442 { 0x400a, "es-BO" }, //Spanish
443 { 0x4409, "en-MY" }, //English
444 { 0x440a, "es-SV" }, //Spanish
445 { 0x4809, "en-SG" }, //English
446 { 0x480a, "es-HN" }, //Spanish
447 { 0x4c0a, "es-NI" }, //Spanish
448 { 0x500a, "es-PR" }, //Spanish
449 { 0x540a, "es-US" }, //Spanish
450};
451
bungeman@google.com5df74342013-04-02 14:40:44 +0000452namespace {
bsalomon@google.com20f7f172013-05-17 19:05:03 +0000453bool BCP47FromLanguageIdLess(const BCP47FromLanguageId& a, const BCP47FromLanguageId& b) {
454 return a.languageID < b.languageID;
bungeman@google.com07a69f82013-04-02 14:12:38 +0000455}
John Stilesa6841be2020-08-06 14:11:56 -0400456} // namespace
bungeman@google.com07a69f82013-04-02 14:12:38 +0000457
bungeman@google.com07a69f82013-04-02 14:12:38 +0000458bool SkOTTableName::Iterator::next(SkOTTableName::Iterator::Record& record) {
Ben Wagnerad031f52018-08-20 13:45:57 -0400459 SkOTTableName nameTable;
460 if (fNameTableSize < sizeof(nameTable)) {
461 return false;
462 }
463 memcpy(&nameTable, fNameTable, sizeof(nameTable));
464
465 const uint8_t* nameRecords = fNameTable + sizeof(nameTable);
466 const size_t nameRecordsSize = fNameTableSize - sizeof(nameTable);
467
468 const size_t stringTableOffset = SkEndian_SwapBE16(nameTable.stringOffset);
469 if (fNameTableSize < stringTableOffset) {
470 return false;
471 }
472 const uint8_t* stringTable = fNameTable + stringTableOffset;
473 const size_t stringTableSize = fNameTableSize - stringTableOffset;
bungeman@google.com07a69f82013-04-02 14:12:38 +0000474
475 // Find the next record which matches the requested type.
Ben Wagnerad031f52018-08-20 13:45:57 -0400476 SkOTTableName::Record nameRecord;
477 const size_t nameRecordsCount = SkEndian_SwapBE16(nameTable.count);
Brian Osman788b9162020-02-07 10:36:46 -0500478 const size_t nameRecordsMax = std::min(nameRecordsCount, nameRecordsSize / sizeof(nameRecord));
bungeman@google.com07a69f82013-04-02 14:12:38 +0000479 do {
Ben Wagnerad031f52018-08-20 13:45:57 -0400480 if (fIndex >= nameRecordsMax) {
bungeman@google.com07a69f82013-04-02 14:12:38 +0000481 return false;
482 }
483
Ben Wagnerad031f52018-08-20 13:45:57 -0400484 memcpy(&nameRecord, nameRecords + sizeof(nameRecord)*fIndex, sizeof(nameRecord));
bungeman@google.com07a69f82013-04-02 14:12:38 +0000485 ++fIndex;
Ben Wagnerad031f52018-08-20 13:45:57 -0400486 } while (fType != -1 && nameRecord.nameID.fontSpecific != fType);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000487
Ben Wagnerad031f52018-08-20 13:45:57 -0400488 record.type = nameRecord.nameID.fontSpecific;
bungeman@google.com07a69f82013-04-02 14:12:38 +0000489
490 // Decode the name into UTF-8.
Ben Wagnerad031f52018-08-20 13:45:57 -0400491 const size_t nameOffset = SkEndian_SwapBE16(nameRecord.offset);
492 const size_t nameLength = SkEndian_SwapBE16(nameRecord.length);
493 if (stringTableSize < nameOffset + nameLength) {
494 return false; // continue?
495 }
496 const uint8_t* nameString = stringTable + nameOffset;
497 switch (nameRecord.platformID.value) {
bungeman@google.com07a69f82013-04-02 14:12:38 +0000498 case SkOTTableName::Record::PlatformID::Windows:
bungeman@google.coma9802692013-08-07 02:45:25 +0000499 if (SkOTTableName::Record::EncodingID::Windows::UnicodeBMPUCS2
Ben Wagnerad031f52018-08-20 13:45:57 -0400500 != nameRecord.encodingID.windows.value
bungeman@google.coma9802692013-08-07 02:45:25 +0000501 && SkOTTableName::Record::EncodingID::Windows::UnicodeUCS4
Ben Wagnerad031f52018-08-20 13:45:57 -0400502 != nameRecord.encodingID.windows.value
bungeman@google.coma9802692013-08-07 02:45:25 +0000503 && SkOTTableName::Record::EncodingID::Windows::Symbol
Ben Wagnerad031f52018-08-20 13:45:57 -0400504 != nameRecord.encodingID.windows.value)
bungeman@google.coma9802692013-08-07 02:45:25 +0000505 {
506 record.name.reset();
Ben Wagnerad031f52018-08-20 13:45:57 -0400507 break; // continue?
bungeman@google.coma9802692013-08-07 02:45:25 +0000508 }
John Stiles30212b72020-06-11 17:55:07 -0400509 [[fallthrough]];
bungeman@google.com07a69f82013-04-02 14:12:38 +0000510 case SkOTTableName::Record::PlatformID::Unicode:
511 case SkOTTableName::Record::PlatformID::ISO:
Mike Klein475c5e92018-08-08 10:23:17 -0400512 SkString_from_UTF16BE(nameString, nameLength, record.name);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000513 break;
514
515 case SkOTTableName::Record::PlatformID::Macintosh:
bungeman@google.coma9802692013-08-07 02:45:25 +0000516 // TODO: need better decoding, especially on Mac.
517 if (SkOTTableName::Record::EncodingID::Macintosh::Roman
Ben Wagnerad031f52018-08-20 13:45:57 -0400518 != nameRecord.encodingID.macintosh.value)
bungeman@google.coma9802692013-08-07 02:45:25 +0000519 {
520 record.name.reset();
Ben Wagnerad031f52018-08-20 13:45:57 -0400521 break; // continue?
bungeman@google.coma9802692013-08-07 02:45:25 +0000522 }
Ben Wagnerad031f52018-08-20 13:45:57 -0400523 SkStringFromMacRoman(nameString, nameLength, record.name);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000524 break;
525
526 case SkOTTableName::Record::PlatformID::Custom:
527 // These should never appear in a 'name' table.
528 default:
529 SkASSERT(false);
530 record.name.reset();
Ben Wagnerad031f52018-08-20 13:45:57 -0400531 break; // continue?
bungeman@google.com07a69f82013-04-02 14:12:38 +0000532 }
533
534 // Determine the language.
Ben Wagnerad031f52018-08-20 13:45:57 -0400535 const uint16_t languageID = SkEndian_SwapBE16(nameRecord.languageID.languageTagID);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000536
537 // Handle format 1 languages.
Ben Wagnerad031f52018-08-20 13:45:57 -0400538 if (SkOTTableName::format_1 == nameTable.format && languageID >= 0x8000) {
bungeman@google.com07a69f82013-04-02 14:12:38 +0000539 const uint16_t languageTagRecordIndex = languageID - 0x8000;
540
Ben Wagnerad031f52018-08-20 13:45:57 -0400541 if (nameRecordsSize < sizeof(nameRecord)*nameRecordsCount) {
542 return false; //"und" or break?
543 }
544 const uint8_t* format1extData = nameRecords + sizeof(nameRecord)*nameRecordsCount;
545 size_t format1extSize = nameRecordsSize - sizeof(nameRecord)*nameRecordsCount;
546 SkOTTableName::Format1Ext format1ext;
547 if (format1extSize < sizeof(format1ext)) {
548 return false; // "und" or break?
549 }
550 memcpy(&format1ext, format1extData, sizeof(format1ext));
bungeman@google.com07a69f82013-04-02 14:12:38 +0000551
Ben Wagnerad031f52018-08-20 13:45:57 -0400552 const uint8_t* languageTagRecords = format1extData + sizeof(format1ext);
553 size_t languageTagRecordsSize = format1extSize - sizeof(format1ext);
554 if (languageTagRecordIndex < SkEndian_SwapBE16(format1ext.langTagCount)) {
555 SkOTTableName::Format1Ext::LangTagRecord languageTagRecord;
556 if (languageTagRecordsSize < sizeof(languageTagRecord)*(languageTagRecordIndex+1)) {
557 return false; // "und"?
558 }
559 const uint8_t* languageTagData = languageTagRecords
560 + sizeof(languageTagRecord)*languageTagRecordIndex;
561 memcpy(&languageTagRecord, languageTagData, sizeof(languageTagRecord));
bungeman@google.com07a69f82013-04-02 14:12:38 +0000562
Ben Wagnerad031f52018-08-20 13:45:57 -0400563 uint16_t languageOffset = SkEndian_SwapBE16(languageTagRecord.offset);
564 uint16_t languageLength = SkEndian_SwapBE16(languageTagRecord.length);
565
566 if (fNameTableSize < stringTableOffset + languageOffset + languageLength) {
567 return false; // "und"?
568 }
569 const uint8_t* languageString = stringTable + languageOffset;
570 SkString_from_UTF16BE(languageString, languageLength, record.language);
bungeman@google.com07a69f82013-04-02 14:12:38 +0000571 return true;
572 }
573 }
574
575 // Handle format 0 languages, translating them into BCP 47.
576 const BCP47FromLanguageId target = { languageID, "" };
bsalomon@google.com20f7f172013-05-17 19:05:03 +0000577 int languageIndex = SkTSearch<BCP47FromLanguageId, BCP47FromLanguageIdLess>(
bungeman@google.com07a69f82013-04-02 14:12:38 +0000578 BCP47FromLanguageID, SK_ARRAY_COUNT(BCP47FromLanguageID), target, sizeof(target));
579 if (languageIndex >= 0) {
580 record.language = BCP47FromLanguageID[languageIndex].bcp47;
581 return true;
582 }
583
584 // Unknown language, return the BCP 47 code 'und' for 'undetermined'.
bungeman@google.com07a69f82013-04-02 14:12:38 +0000585 record.language = "und";
586 return true;
587}