blob: 5f83839de84971676f467e7e34c6ed36961039cc [file] [log] [blame]
yusukes@chromium.orgd257d182009-11-04 04:56:32 +00001// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "cmap.h"
6
bashi@chromium.org79602bd2011-04-05 20:20:15 +00007#include <algorithm>
yusukes@chromium.orga4099a32009-11-12 01:43:51 +00008#include <set>
9#include <utility>
10#include <vector>
11
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000012#include "maxp.h"
yusukes@chromium.orga4099a32009-11-12 01:43:51 +000013#include "os2.h"
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000014
15// cmap - Character To Glyph Index Mapping Table
16// http://www.microsoft.com/opentype/otspec/cmap.htm
17
18namespace {
19
20struct CMAPSubtableHeader {
21 uint16_t platform;
22 uint16_t encoding;
23 uint32_t offset;
24 uint16_t format;
25 uint32_t length;
26};
27
28struct Subtable314Range {
29 uint16_t start_range;
30 uint16_t end_range;
31 int16_t id_delta;
32 uint16_t id_range_offset;
33 uint32_t id_range_offset_offset;
34};
35
bashi@google.com93aedf72010-12-24 00:29:23 +000036// The maximum number of groups in format 12, 13 or 14 subtables.
yusukes@chromium.orga4099a32009-11-12 01:43:51 +000037// Note: 0xFFFF is the maximum number of glyphs in a single font file.
38const unsigned kMaxCMAPGroups = 0xFFFF;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000039
40// Glyph array size for the Mac Roman (format 0) table.
41const size_t kFormat0ArraySize = 256;
42
yusukes@chromium.orga4099a32009-11-12 01:43:51 +000043// The upper limit of the Unicode code point.
44const uint32_t kUnicodeUpperLimit = 0x10FFFF;
45
bashi@google.com93aedf72010-12-24 00:29:23 +000046// The maximum number of UVS records (See below).
47const uint32_t kMaxCMAPSelectorRecords = 259;
48// The range of UVSes are:
49// 0x180B-0x180D (3 code points)
50// 0xFE00-0xFE0F (16 code points)
51// 0xE0100-0xE01EF (240 code points)
52const uint32_t kMongolianVSStart = 0x180B;
53const uint32_t kMongolianVSEnd = 0x180D;
54const uint32_t kVSStart = 0xFE00;
55const uint32_t kVSEnd = 0xFE0F;
56const uint32_t kIVSStart = 0xE0100;
57const uint32_t kIVSEnd = 0xE01EF;
58const uint32_t kUVSUpperLimit = 0xFFFFFF;
59
60// Parses Format 4 tables
61bool ParseFormat4(ots::OpenTypeFile *file, int platform, int encoding,
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000062 const uint8_t *data, size_t length, uint16_t num_glyphs) {
63 ots::Buffer subtable(data, length);
64
bashi@google.com93aedf72010-12-24 00:29:23 +000065 // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
yusukes@chromium.orga4099a32009-11-12 01:43:51 +000066 // whole thing and recompacting it, we validate it and include it verbatim
67 // in the output.
68
69 if (!file->os2) {
70 return OTS_FAILURE();
71 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000072
73 if (!subtable.Skip(4)) {
74 return OTS_FAILURE();
75 }
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +000076 uint16_t language = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000077 if (!subtable.ReadU16(&language)) {
78 return OTS_FAILURE();
79 }
80 if (language) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +000081 // Platform ID 3 (windows) subtables should have language '0'.
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000082 return OTS_FAILURE();
83 }
84
85 uint16_t segcountx2, search_range, entry_selector, range_shift;
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +000086 segcountx2 = search_range = entry_selector = range_shift = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +000087 if (!subtable.ReadU16(&segcountx2) ||
88 !subtable.ReadU16(&search_range) ||
89 !subtable.ReadU16(&entry_selector) ||
90 !subtable.ReadU16(&range_shift)) {
91 return OTS_FAILURE();
92 }
93
94 if (segcountx2 & 1 || search_range & 1) {
95 return OTS_FAILURE();
96 }
97 const uint16_t segcount = segcountx2 >> 1;
98 // There must be at least one segment according the spec.
99 if (segcount < 1) {
100 return OTS_FAILURE();
101 }
102
103 // log2segcount is the maximal x s.t. 2^x < segcount
104 unsigned log2segcount = 0;
105 while (1u << (log2segcount + 1) <= segcount) {
106 log2segcount++;
107 }
108
109 const uint16_t expected_search_range = 2 * 1u << log2segcount;
110 if (expected_search_range != search_range) {
111 return OTS_FAILURE();
112 }
113
114 if (entry_selector != log2segcount) {
115 return OTS_FAILURE();
116 }
117
118 const uint16_t expected_range_shift = segcountx2 - search_range;
119 if (range_shift != expected_range_shift) {
120 return OTS_FAILURE();
121 }
122
123 std::vector<Subtable314Range> ranges(segcount);
124
125 for (unsigned i = 0; i < segcount; ++i) {
126 if (!subtable.ReadU16(&ranges[i].end_range)) {
127 return OTS_FAILURE();
128 }
129 }
130
131 uint16_t padding;
132 if (!subtable.ReadU16(&padding)) {
133 return OTS_FAILURE();
134 }
135 if (padding) {
136 return OTS_FAILURE();
137 }
138
139 for (unsigned i = 0; i < segcount; ++i) {
140 if (!subtable.ReadU16(&ranges[i].start_range)) {
141 return OTS_FAILURE();
142 }
143 }
144 for (unsigned i = 0; i < segcount; ++i) {
145 if (!subtable.ReadS16(&ranges[i].id_delta)) {
146 return OTS_FAILURE();
147 }
148 }
149 for (unsigned i = 0; i < segcount; ++i) {
150 ranges[i].id_range_offset_offset = subtable.offset();
151 if (!subtable.ReadU16(&ranges[i].id_range_offset)) {
152 return OTS_FAILURE();
153 }
154
155 if (ranges[i].id_range_offset & 1) {
156 // Some font generators seem to put 65535 on id_range_offset
157 // for 0xFFFF-0xFFFF range.
158 // (e.g., many fonts in http://www.princexml.com/fonts/)
159 if (i == segcount - 1u) {
160 OTS_WARNING("bad id_range_offset");
161 ranges[i].id_range_offset = 0;
162 // The id_range_offset value in the transcoded font will not change
163 // since this table is not actually "transcoded" yet.
164 } else {
165 return OTS_FAILURE();
166 }
167 }
168 }
169
170 // ranges must be ascending order, based on the end_code. Ranges may not
171 // overlap.
172 for (unsigned i = 1; i < segcount; ++i) {
173 if ((i == segcount - 1u) &&
174 (ranges[i - 1].start_range == 0xffff) &&
175 (ranges[i - 1].end_range == 0xffff) &&
176 (ranges[i].start_range == 0xffff) &&
177 (ranges[i].end_range == 0xffff)) {
178 // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
179 // We'll accept them as an exception.
180 OTS_WARNING("multiple 0xffff terminators found");
181 continue;
182 }
183
184 // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
185 // unsorted table...
186 if (ranges[i].end_range <= ranges[i - 1].end_range) {
187 return OTS_FAILURE();
188 }
189 if (ranges[i].start_range <= ranges[i - 1].end_range) {
190 return OTS_FAILURE();
191 }
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000192
193 // On many fonts, the value of {first, last}_char_index are incorrect.
194 // Fix them.
195 if (file->os2->first_char_index != 0xFFFF &&
196 ranges[i].start_range != 0xFFFF &&
197 file->os2->first_char_index > ranges[i].start_range) {
198 file->os2->first_char_index = ranges[i].start_range;
199 }
200 if (file->os2->last_char_index != 0xFFFF &&
201 ranges[i].end_range != 0xFFFF &&
202 file->os2->last_char_index < ranges[i].end_range) {
203 file->os2->last_char_index = ranges[i].end_range;
204 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000205 }
206
207 // The last range must end at 0xffff
208 if (ranges[segcount - 1].end_range != 0xffff) {
209 return OTS_FAILURE();
210 }
211
212 // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
213 // each code-point defined in the table and make sure that they are all valid
214 // glyphs and that we don't access anything out-of-bounds.
bashi@chromium.org54380c82012-09-04 04:36:11 +0000215 for (unsigned i = 0; i < segcount; ++i) {
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000216 for (unsigned cp = ranges[i].start_range; cp <= ranges[i].end_range; ++cp) {
217 const uint16_t code_point = cp;
218 if (ranges[i].id_range_offset == 0) {
219 // this is explictly allowed to overflow in the spec
220 const uint16_t glyph = code_point + ranges[i].id_delta;
221 if (glyph >= num_glyphs) {
222 return OTS_FAILURE();
223 }
224 } else {
225 const uint16_t range_delta = code_point - ranges[i].start_range;
226 // this might seem odd, but it's true. The offset is relative to the
227 // location of the offset value itself.
228 const uint32_t glyph_id_offset = ranges[i].id_range_offset_offset +
229 ranges[i].id_range_offset +
230 range_delta * 2;
231 // We need to be able to access a 16-bit value from this offset
232 if (glyph_id_offset + 1 >= length) {
233 return OTS_FAILURE();
234 }
235 uint16_t glyph;
ksakamoto@chromium.org59f812a2013-11-01 05:47:23 +0000236 std::memcpy(&glyph, data + glyph_id_offset, 2);
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000237 glyph = ntohs(glyph);
238 if (glyph >= num_glyphs) {
239 return OTS_FAILURE();
240 }
241 }
242 }
243 }
244
245 // We accept the table.
246 // TODO(yusukes): transcode the subtable.
bashi@google.com93aedf72010-12-24 00:29:23 +0000247 if (platform == 3 && encoding == 0) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000248 file->cmap->subtable_3_0_4_data = data;
249 file->cmap->subtable_3_0_4_length = length;
bashi@google.com93aedf72010-12-24 00:29:23 +0000250 } else if (platform == 3 && encoding == 1) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000251 file->cmap->subtable_3_1_4_data = data;
252 file->cmap->subtable_3_1_4_length = length;
bashi@google.com93aedf72010-12-24 00:29:23 +0000253 } else if (platform == 0 && encoding == 3) {
254 file->cmap->subtable_0_3_4_data = data;
255 file->cmap->subtable_0_3_4_length = length;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000256 } else {
257 return OTS_FAILURE();
258 }
259
260 return true;
261}
262
263bool Parse31012(ots::OpenTypeFile *file,
264 const uint8_t *data, size_t length, uint16_t num_glyphs) {
265 ots::Buffer subtable(data, length);
266
267 // Format 12 tables are simple. We parse these and fully serialise them
268 // later.
269
270 if (!subtable.Skip(8)) {
271 return OTS_FAILURE();
272 }
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +0000273 uint32_t language = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000274 if (!subtable.ReadU32(&language)) {
275 return OTS_FAILURE();
276 }
277 if (language) {
278 return OTS_FAILURE();
279 }
280
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +0000281 uint32_t num_groups = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000282 if (!subtable.ReadU32(&num_groups)) {
283 return OTS_FAILURE();
284 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000285 if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
286 return OTS_FAILURE();
287 }
288
289 std::vector<ots::OpenTypeCMAPSubtableRange> &groups
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000290 = file->cmap->subtable_3_10_12;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000291 groups.resize(num_groups);
292
293 for (unsigned i = 0; i < num_groups; ++i) {
294 if (!subtable.ReadU32(&groups[i].start_range) ||
295 !subtable.ReadU32(&groups[i].end_range) ||
296 !subtable.ReadU32(&groups[i].start_glyph_id)) {
297 return OTS_FAILURE();
298 }
299
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000300 if (groups[i].start_range > kUnicodeUpperLimit ||
301 groups[i].end_range > kUnicodeUpperLimit ||
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000302 groups[i].start_glyph_id > 0xFFFF) {
303 return OTS_FAILURE();
304 }
305
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000306 // [0xD800, 0xDFFF] are surrogate code points.
307 if (groups[i].start_range >= 0xD800 &&
308 groups[i].start_range <= 0xDFFF) {
309 return OTS_FAILURE();
310 }
311 if (groups[i].end_range >= 0xD800 &&
312 groups[i].end_range <= 0xDFFF) {
313 return OTS_FAILURE();
314 }
315 if (groups[i].start_range < 0xD800 &&
316 groups[i].end_range > 0xDFFF) {
317 return OTS_FAILURE();
318 }
319
320 // We assert that the glyph value is within range. Because of the range
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000321 // limits, above, we don't need to worry about overflow.
322 if (groups[i].end_range < groups[i].start_range) {
323 return OTS_FAILURE();
324 }
325 if ((groups[i].end_range - groups[i].start_range) +
326 groups[i].start_glyph_id > num_glyphs) {
327 return OTS_FAILURE();
328 }
329 }
330
331 // the groups must be sorted by start code and may not overlap
332 for (unsigned i = 1; i < num_groups; ++i) {
333 if (groups[i].start_range <= groups[i - 1].start_range) {
334 return OTS_FAILURE();
335 }
336 if (groups[i].start_range <= groups[i - 1].end_range) {
337 return OTS_FAILURE();
338 }
339 }
340
341 return true;
342}
343
344bool Parse31013(ots::OpenTypeFile *file,
345 const uint8_t *data, size_t length, uint16_t num_glyphs) {
346 ots::Buffer subtable(data, length);
347
348 // Format 13 tables are simple. We parse these and fully serialise them
349 // later.
350
351 if (!subtable.Skip(8)) {
352 return OTS_FAILURE();
353 }
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +0000354 uint16_t language = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000355 if (!subtable.ReadU16(&language)) {
356 return OTS_FAILURE();
357 }
358 if (language) {
359 return OTS_FAILURE();
360 }
361
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +0000362 uint32_t num_groups = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000363 if (!subtable.ReadU32(&num_groups)) {
364 return OTS_FAILURE();
365 }
366
367 // We limit the number of groups in the same way as in 3.10.12 tables. See
368 // the comment there in
369 if (num_groups == 0 || num_groups > kMaxCMAPGroups) {
370 return OTS_FAILURE();
371 }
372
373 std::vector<ots::OpenTypeCMAPSubtableRange> &groups
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000374 = file->cmap->subtable_3_10_13;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000375 groups.resize(num_groups);
376
377 for (unsigned i = 0; i < num_groups; ++i) {
378 if (!subtable.ReadU32(&groups[i].start_range) ||
379 !subtable.ReadU32(&groups[i].end_range) ||
380 !subtable.ReadU32(&groups[i].start_glyph_id)) {
381 return OTS_FAILURE();
382 }
383
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000384 // We conservatively limit all of the values to protect some parsers from
385 // overflows
386 if (groups[i].start_range > kUnicodeUpperLimit ||
387 groups[i].end_range > kUnicodeUpperLimit ||
388 groups[i].start_glyph_id > 0xFFFF) {
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000389 return OTS_FAILURE();
390 }
391
392 if (groups[i].start_glyph_id >= num_glyphs) {
393 return OTS_FAILURE();
394 }
395 }
396
397 // the groups must be sorted by start code and may not overlap
398 for (unsigned i = 1; i < num_groups; ++i) {
399 if (groups[i].start_range <= groups[i - 1].start_range) {
400 return OTS_FAILURE();
401 }
402 if (groups[i].start_range <= groups[i - 1].end_range) {
403 return OTS_FAILURE();
404 }
405 }
406
407 return true;
408}
409
bashi@google.com93aedf72010-12-24 00:29:23 +0000410bool Parse0514(ots::OpenTypeFile *file,
411 const uint8_t *data, size_t length, uint16_t num_glyphs) {
412 // Unicode Variation Selector table
413 ots::Buffer subtable(data, length);
414
415 // Format 14 tables are simple. We parse these and fully serialise them
416 // later.
417
418 // Skip format (USHORT) and length (ULONG)
419 if (!subtable.Skip(6)) {
420 return OTS_FAILURE();
421 }
422
423 uint32_t num_records = 0;
424 if (!subtable.ReadU32(&num_records)) {
425 return OTS_FAILURE();
426 }
427 if (num_records == 0 || num_records > kMaxCMAPSelectorRecords) {
428 return OTS_FAILURE();
429 }
430
431 std::vector<ots::OpenTypeCMAPSubtableVSRecord>& records
432 = file->cmap->subtable_0_5_14;
433 records.resize(num_records);
434
435 for (unsigned i = 0; i < num_records; ++i) {
436 if (!subtable.ReadU24(&records[i].var_selector) ||
437 !subtable.ReadU32(&records[i].default_offset) ||
438 !subtable.ReadU32(&records[i].non_default_offset)) {
439 return OTS_FAILURE();
440 }
441 // Checks the value of variation selector
442 if (!((records[i].var_selector >= kMongolianVSStart &&
443 records[i].var_selector <= kMongolianVSEnd) ||
444 (records[i].var_selector >= kVSStart &&
445 records[i].var_selector <= kVSEnd) ||
446 (records[i].var_selector >= kIVSStart &&
447 records[i].var_selector <= kIVSEnd))) {
448 return OTS_FAILURE();
449 }
450 if (i > 0 &&
451 records[i-1].var_selector >= records[i].var_selector) {
452 return OTS_FAILURE();
453 }
454
455 // Checks offsets
456 if (!records[i].default_offset && !records[i].non_default_offset) {
457 return OTS_FAILURE();
458 }
459 if (records[i].default_offset &&
460 records[i].default_offset >= length) {
461 return OTS_FAILURE();
462 }
463 if (records[i].non_default_offset &&
464 records[i].non_default_offset >= length) {
465 return OTS_FAILURE();
466 }
467 }
468
469 for (unsigned i = 0; i < num_records; ++i) {
470 // Checks default UVS table
471 if (records[i].default_offset) {
472 subtable.set_offset(records[i].default_offset);
473 uint32_t num_ranges = 0;
474 if (!subtable.ReadU32(&num_ranges)) {
475 return OTS_FAILURE();
476 }
477 if (!num_ranges || num_ranges > kMaxCMAPGroups) {
478 return OTS_FAILURE();
479 }
480
481 uint32_t last_unicode_value = 0;
482 std::vector<ots::OpenTypeCMAPSubtableVSRange>& ranges
483 = records[i].ranges;
484 ranges.resize(num_ranges);
485
486 for (unsigned j = 0; j < num_ranges; ++j) {
487 if (!subtable.ReadU24(&ranges[j].unicode_value) ||
488 !subtable.ReadU8(&ranges[j].additional_count)) {
489 return OTS_FAILURE();
490 }
491 const uint32_t check_value =
bashi@chromium.org26afae82011-08-25 05:38:57 +0000492 ranges[j].unicode_value + ranges[j].additional_count;
bashi@google.com93aedf72010-12-24 00:29:23 +0000493 if (ranges[j].unicode_value == 0 ||
494 ranges[j].unicode_value > kUnicodeUpperLimit ||
495 check_value > kUVSUpperLimit ||
496 (last_unicode_value &&
497 ranges[j].unicode_value <= last_unicode_value)) {
498 return OTS_FAILURE();
499 }
500 last_unicode_value = check_value;
501 }
502 }
503
504 // Checks non default UVS table
505 if (records[i].non_default_offset) {
506 subtable.set_offset(records[i].non_default_offset);
507 uint32_t num_mappings = 0;
508 if (!subtable.ReadU32(&num_mappings)) {
509 return OTS_FAILURE();
510 }
511 if (!num_mappings || num_mappings > kMaxCMAPGroups) {
512 return OTS_FAILURE();
513 }
514
515 uint32_t last_unicode_value = 0;
516 std::vector<ots::OpenTypeCMAPSubtableVSMapping>& mappings
517 = records[i].mappings;
518 mappings.resize(num_mappings);
519
520 for (unsigned j = 0; j < num_mappings; ++j) {
521 if (!subtable.ReadU24(&mappings[j].unicode_value) ||
522 !subtable.ReadU16(&mappings[j].glyph_id)) {
523 return OTS_FAILURE();
524 }
525 if (mappings[j].glyph_id == 0 ||
526 mappings[j].unicode_value == 0 ||
527 mappings[j].unicode_value > kUnicodeUpperLimit ||
528 (last_unicode_value &&
529 mappings[j].unicode_value <= last_unicode_value)) {
530 return OTS_FAILURE();
531 }
532 last_unicode_value = mappings[j].unicode_value;
533 }
534 }
535 }
536
537 if (subtable.offset() != length) {
538 return OTS_FAILURE();
539 }
540 file->cmap->subtable_0_5_14_length = subtable.offset();
541 return true;
542}
543
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000544bool Parse100(ots::OpenTypeFile *file, const uint8_t *data, size_t length) {
545 // Mac Roman table
546 ots::Buffer subtable(data, length);
547
548 if (!subtable.Skip(4)) {
549 return OTS_FAILURE();
550 }
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +0000551 uint16_t language = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000552 if (!subtable.ReadU16(&language)) {
553 return OTS_FAILURE();
554 }
555 if (language) {
556 // simsun.ttf has non-zero language id.
557 OTS_WARNING("language id should be zero: %u", language);
558 }
559
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000560 file->cmap->subtable_1_0_0.reserve(kFormat0ArraySize);
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000561 for (size_t i = 0; i < kFormat0ArraySize; ++i) {
562 uint8_t glyph_id = 0;
563 if (!subtable.ReadU8(&glyph_id)) {
564 return OTS_FAILURE();
565 }
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000566 file->cmap->subtable_1_0_0.push_back(glyph_id);
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000567 }
568
569 return true;
570}
571
572} // namespace
573
574namespace ots {
575
576bool ots_cmap_parse(OpenTypeFile *file, const uint8_t *data, size_t length) {
577 Buffer table(data, length);
578 file->cmap = new OpenTypeCMAP;
579
yusukes@chromium.org8ad0a172009-11-04 06:07:58 +0000580 uint16_t version = 0;
581 uint16_t num_tables = 0;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000582 if (!table.ReadU16(&version) ||
583 !table.ReadU16(&num_tables)) {
584 return OTS_FAILURE();
585 }
586
587 if (version != 0) {
588 return OTS_FAILURE();
589 }
590 if (!num_tables) {
591 return OTS_FAILURE();
592 }
593
594 std::vector<CMAPSubtableHeader> subtable_headers;
595
596 // read the subtable headers
597 subtable_headers.reserve(num_tables);
598 for (unsigned i = 0; i < num_tables; ++i) {
599 CMAPSubtableHeader subt;
600
601 if (!table.ReadU16(&subt.platform) ||
602 !table.ReadU16(&subt.encoding) ||
603 !table.ReadU32(&subt.offset)) {
604 return OTS_FAILURE();
605 }
606
607 subtable_headers.push_back(subt);
608 }
609
610 const size_t data_offset = table.offset();
611
612 // make sure that all the offsets are valid.
613 uint32_t last_id = 0;
614 for (unsigned i = 0; i < num_tables; ++i) {
615 if (subtable_headers[i].offset > 1024 * 1024 * 1024) {
616 return OTS_FAILURE();
617 }
618 if (subtable_headers[i].offset < data_offset ||
619 subtable_headers[i].offset >= length) {
620 return OTS_FAILURE();
621 }
622
623 // check if the table is sorted first by platform ID, then by encoding ID.
624 uint32_t current_id
625 = (subtable_headers[i].platform << 16) + subtable_headers[i].encoding;
626 if ((i != 0) && (last_id >= current_id)) {
627 return OTS_FAILURE();
628 }
629 last_id = current_id;
630 }
631
632 // the format of the table is the first couple of bytes in the table. The
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000633 // length of the table is stored in a format-specific way.
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000634 for (unsigned i = 0; i < num_tables; ++i) {
635 table.set_offset(subtable_headers[i].offset);
636 if (!table.ReadU16(&subtable_headers[i].format)) {
637 return OTS_FAILURE();
638 }
639
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000640 uint16_t len = 0;
641 switch (subtable_headers[i].format) {
642 case 0:
643 case 4:
644 if (!table.ReadU16(&len)) {
645 return OTS_FAILURE();
646 }
647 subtable_headers[i].length = len;
648 break;
649 case 12:
650 case 13:
651 if (!table.Skip(2)) {
652 return OTS_FAILURE();
653 }
654 if (!table.ReadU32(&subtable_headers[i].length)) {
655 return OTS_FAILURE();
656 }
657 break;
bashi@google.com93aedf72010-12-24 00:29:23 +0000658 case 14:
659 if (!table.ReadU32(&subtable_headers[i].length)) {
660 return OTS_FAILURE();
661 }
662 break;
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000663 default:
664 subtable_headers[i].length = 0;
665 break;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000666 }
667 }
668
669 // Now, verify that all the lengths are sane
670 for (unsigned i = 0; i < num_tables; ++i) {
671 if (!subtable_headers[i].length) continue;
672 if (subtable_headers[i].length > 1024 * 1024 * 1024) {
673 return OTS_FAILURE();
674 }
675 // We know that both the offset and length are < 1GB, so the following
676 // addition doesn't overflow
677 const uint32_t end_byte
678 = subtable_headers[i].offset + subtable_headers[i].length;
679 if (end_byte > length) {
680 return OTS_FAILURE();
681 }
682 }
683
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000684 // check that the cmap subtables are not overlapping.
685 std::set<std::pair<uint32_t, uint32_t> > uniq_checker;
686 std::vector<std::pair<uint32_t, uint8_t> > overlap_checker;
687 for (unsigned i = 0; i < num_tables; ++i) {
688 const uint32_t end_byte
689 = subtable_headers[i].offset + subtable_headers[i].length;
690
691 if (!uniq_checker.insert(std::make_pair(subtable_headers[i].offset,
692 end_byte)).second) {
693 // Sometimes Unicode table and MS table share exactly the same data.
694 // We'll allow this.
695 continue;
696 }
697 overlap_checker.push_back(
bashi@chromium.org79602bd2011-04-05 20:20:15 +0000698 std::make_pair(subtable_headers[i].offset,
699 static_cast<uint8_t>(1) /* start */));
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000700 overlap_checker.push_back(
bashi@chromium.org79602bd2011-04-05 20:20:15 +0000701 std::make_pair(end_byte, static_cast<uint8_t>(0) /* end */));
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000702 }
703 std::sort(overlap_checker.begin(), overlap_checker.end());
704 int overlap_count = 0;
705 for (unsigned i = 0; i < overlap_checker.size(); ++i) {
706 overlap_count += (overlap_checker[i].second ? 1 : -1);
707 if (overlap_count > 1) {
708 return OTS_FAILURE();
709 }
710 }
711
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000712 // we grab the number of glyphs in the file from the maxp table to make sure
713 // that the character map isn't referencing anything beyound this range.
714 if (!file->maxp) {
715 return OTS_FAILURE();
716 }
717 const uint16_t num_glyphs = file->maxp->num_glyphs;
718
719 // We only support a subset of the possible character map tables. Microsoft
720 // 'strongly recommends' that everyone supports the Unicode BMP table with
721 // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
722 // Platform ID Encoding ID Format
723 // 0 0 4 (Unicode Default)
724 // 0 3 4 (Unicode BMP)
725 // 0 3 12 (Unicode UCS-4)
bashi@google.com93aedf72010-12-24 00:29:23 +0000726 // 0 5 14 (Unicode Variation Sequences)
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000727 // 1 0 0 (Mac Roman)
728 // 3 0 4 (MS Symbol)
729 // 3 1 4 (MS Unicode BMP)
730 // 3 10 12 (MS Unicode UCS-4)
731 // 3 10 13 (MS UCS-4 Fallback mapping)
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000732 //
733 // Note:
734 // * 0-0-4 table is (usually) written as a 3-1-4 table. If 3-1-4 table
735 // also exists, the 0-0-4 table is ignored.
bashi@google.com93aedf72010-12-24 00:29:23 +0000736 // * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
737 // Some fonts which include 0-5-14 table seems to be required 0-3-4
738 // table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000739 // * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
740 // exists, the 0-3-12 table is ignored.
741 //
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000742
743 for (unsigned i = 0; i < num_tables; ++i) {
744 if (subtable_headers[i].platform == 0) {
745 // Unicode platform
746
747 if ((subtable_headers[i].encoding == 0) &&
748 (subtable_headers[i].format == 4)) {
749 // parse and output the 0-0-4 table as 3-1-4 table. Sometimes the 0-0-4
750 // table actually points to MS symbol data and thus should be parsed as
751 // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
752 // recovered in ots_cmap_serialise().
bashi@google.com93aedf72010-12-24 00:29:23 +0000753 if (!ParseFormat4(file, 3, 1, data + subtable_headers[i].offset,
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000754 subtable_headers[i].length, num_glyphs)) {
755 return OTS_FAILURE();
756 }
757 } else if ((subtable_headers[i].encoding == 3) &&
758 (subtable_headers[i].format == 4)) {
bashi@google.com93aedf72010-12-24 00:29:23 +0000759 // parse and output the 0-3-4 table as 0-3-4 table.
760 if (!ParseFormat4(file, 0, 3, data + subtable_headers[i].offset,
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000761 subtable_headers[i].length, num_glyphs)) {
762 return OTS_FAILURE();
763 }
764 } else if ((subtable_headers[i].encoding == 3) &&
765 (subtable_headers[i].format == 12)) {
766 // parse and output the 0-3-12 table as 3-10-12 table.
767 if (!Parse31012(file, data + subtable_headers[i].offset,
768 subtable_headers[i].length, num_glyphs)) {
769 return OTS_FAILURE();
770 }
bashi@google.com93aedf72010-12-24 00:29:23 +0000771 } else if ((subtable_headers[i].encoding == 5) &&
772 (subtable_headers[i].format == 14)) {
773 if (!Parse0514(file, data + subtable_headers[i].offset,
774 subtable_headers[i].length, num_glyphs)) {
775 return OTS_FAILURE();
776 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000777 }
778 } else if (subtable_headers[i].platform == 1) {
779 // Mac platform
780
781 if ((subtable_headers[i].encoding == 0) &&
782 (subtable_headers[i].format == 0)) {
783 // parse and output the 1-0-0 table.
784 if (!Parse100(file, data + subtable_headers[i].offset,
785 subtable_headers[i].length)) {
786 return OTS_FAILURE();
787 }
788 }
789 } else if (subtable_headers[i].platform == 3) {
790 // MS platform
791
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000792 switch (subtable_headers[i].encoding) {
793 case 0:
794 case 1:
795 if (subtable_headers[i].format == 4) {
796 // parse 3-0-4 or 3-1-4 table.
bashi@google.com93aedf72010-12-24 00:29:23 +0000797 if (!ParseFormat4(file, subtable_headers[i].platform,
798 subtable_headers[i].encoding,
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000799 data + subtable_headers[i].offset,
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000800 subtable_headers[i].length, num_glyphs)) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000801 return OTS_FAILURE();
802 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000803 }
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000804 break;
805 case 10:
806 if (subtable_headers[i].format == 12) {
807 file->cmap->subtable_3_10_12.clear();
808 if (!Parse31012(file, data + subtable_headers[i].offset,
809 subtable_headers[i].length, num_glyphs)) {
810 return OTS_FAILURE();
811 }
812 } else if (subtable_headers[i].format == 13) {
813 file->cmap->subtable_3_10_13.clear();
814 if (!Parse31013(file, data + subtable_headers[i].offset,
815 subtable_headers[i].length, num_glyphs)) {
816 return OTS_FAILURE();
817 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000818 }
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000819 break;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000820 }
821 }
822 }
823
824 return true;
825}
826
827bool ots_cmap_should_serialise(OpenTypeFile *file) {
agl@chromium.org2beaf1d2011-01-21 17:19:34 +0000828 return file->cmap != NULL;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000829}
830
831bool ots_cmap_serialise(OTSStream *out, OpenTypeFile *file) {
agl@chromium.org2beaf1d2011-01-21 17:19:34 +0000832 const bool have_034 = file->cmap->subtable_0_3_4_data != NULL;
833 const bool have_0514 = file->cmap->subtable_0_5_14.size() != 0;
834 const bool have_100 = file->cmap->subtable_1_0_0.size() != 0;
835 const bool have_304 = file->cmap->subtable_3_0_4_data != NULL;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000836 // MS Symbol and MS Unicode tables should not co-exist.
837 // See the comment above in 0-0-4 parser.
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000838 const bool have_314 = (!have_304) && file->cmap->subtable_3_1_4_data;
agl@chromium.org2beaf1d2011-01-21 17:19:34 +0000839 const bool have_31012 = file->cmap->subtable_3_10_12.size() != 0;
840 const bool have_31013 = file->cmap->subtable_3_10_13.size() != 0;
bashi@google.com93aedf72010-12-24 00:29:23 +0000841 const unsigned num_subtables = static_cast<unsigned>(have_034) +
842 static_cast<unsigned>(have_0514) +
843 static_cast<unsigned>(have_100) +
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000844 static_cast<unsigned>(have_304) +
845 static_cast<unsigned>(have_314) +
846 static_cast<unsigned>(have_31012) +
847 static_cast<unsigned>(have_31013);
848 const off_t table_start = out->Tell();
849
850 // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
bashi@chromium.org6462c582014-06-27 06:24:49 +0000851 // (e.g., old fonts for Mac). We don't support them except for color bitmap
852 // fonts.
bashi@google.com93aedf72010-12-24 00:29:23 +0000853 if (!have_304 && !have_314 && !have_034) {
bashi@chromium.org6462c582014-06-27 06:24:49 +0000854 if (!(file->cbdt && file->cblc)) {
855 return OTS_FAILURE();
856 }
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000857 }
858
859 if (!out->WriteU16(0) ||
860 !out->WriteU16(num_subtables)) {
861 return OTS_FAILURE();
862 }
863
864 const off_t record_offset = out->Tell();
865 if (!out->Pad(num_subtables * 8)) {
866 return OTS_FAILURE();
867 }
868
bashi@google.com93aedf72010-12-24 00:29:23 +0000869 const off_t offset_034 = out->Tell();
870 if (have_034) {
871 if (!out->Write(file->cmap->subtable_0_3_4_data,
872 file->cmap->subtable_0_3_4_length)) {
873 return OTS_FAILURE();
874 }
875 }
876
877 const off_t offset_0514 = out->Tell();
878 if (have_0514) {
879 const std::vector<ots::OpenTypeCMAPSubtableVSRecord> &records
880 = file->cmap->subtable_0_5_14;
881 const unsigned num_records = records.size();
882 if (!out->WriteU16(14) ||
883 !out->WriteU32(file->cmap->subtable_0_5_14_length) ||
884 !out->WriteU32(num_records)) {
885 return OTS_FAILURE();
886 }
887 for (unsigned i = 0; i < num_records; ++i) {
888 if (!out->WriteU24(records[i].var_selector) ||
889 !out->WriteU32(records[i].default_offset) ||
890 !out->WriteU32(records[i].non_default_offset)) {
891 return OTS_FAILURE();
892 }
893 }
894 for (unsigned i = 0; i < num_records; ++i) {
895 if (records[i].default_offset) {
896 const std::vector<ots::OpenTypeCMAPSubtableVSRange> &ranges
897 = records[i].ranges;
898 const unsigned num_ranges = ranges.size();
899 if (!out->Seek(records[i].default_offset + offset_0514) ||
900 !out->WriteU32(num_ranges)) {
901 return OTS_FAILURE();
902 }
903 for (unsigned j = 0; j < num_ranges; ++j) {
904 if (!out->WriteU24(ranges[j].unicode_value) ||
905 !out->WriteU8(ranges[j].additional_count)) {
906 return OTS_FAILURE();
907 }
908 }
909 }
910 if (records[i].non_default_offset) {
911 const std::vector<ots::OpenTypeCMAPSubtableVSMapping> &mappings
912 = records[i].mappings;
913 const unsigned num_mappings = mappings.size();
914 if (!out->Seek(records[i].non_default_offset + offset_0514) ||
915 !out->WriteU32(num_mappings)) {
916 return OTS_FAILURE();
917 }
918 for (unsigned j = 0; j < num_mappings; ++j) {
919 if (!out->WriteU24(mappings[j].unicode_value) ||
920 !out->WriteU16(mappings[j].glyph_id)) {
921 return OTS_FAILURE();
922 }
923 }
924 }
925 }
926 }
927
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000928 const off_t offset_100 = out->Tell();
929 if (have_100) {
930 if (!out->WriteU16(0) || // format
931 !out->WriteU16(6 + kFormat0ArraySize) || // length
932 !out->WriteU16(0)) { // language
933 return OTS_FAILURE();
934 }
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000935 if (!out->Write(&(file->cmap->subtable_1_0_0[0]), kFormat0ArraySize)) {
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000936 return OTS_FAILURE();
937 }
938 }
939
940 const off_t offset_304 = out->Tell();
941 if (have_304) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000942 if (!out->Write(file->cmap->subtable_3_0_4_data,
943 file->cmap->subtable_3_0_4_length)) {
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000944 return OTS_FAILURE();
945 }
946 }
947
948 const off_t offset_314 = out->Tell();
949 if (have_314) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000950 if (!out->Write(file->cmap->subtable_3_1_4_data,
951 file->cmap->subtable_3_1_4_length)) {
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000952 return OTS_FAILURE();
953 }
954 }
955
956 const off_t offset_31012 = out->Tell();
957 if (have_31012) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000958 std::vector<OpenTypeCMAPSubtableRange> &groups
959 = file->cmap->subtable_3_10_12;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000960 const unsigned num_groups = groups.size();
961 if (!out->WriteU16(12) ||
962 !out->WriteU16(0) ||
963 !out->WriteU32(num_groups * 12 + 16) ||
964 !out->WriteU32(0) ||
965 !out->WriteU32(num_groups)) {
966 return OTS_FAILURE();
967 }
968
969 for (unsigned i = 0; i < num_groups; ++i) {
970 if (!out->WriteU32(groups[i].start_range) ||
971 !out->WriteU32(groups[i].end_range) ||
972 !out->WriteU32(groups[i].start_glyph_id)) {
973 return OTS_FAILURE();
974 }
975 }
976 }
977
978 const off_t offset_31013 = out->Tell();
979 if (have_31013) {
yusukes@chromium.orga4099a32009-11-12 01:43:51 +0000980 std::vector<OpenTypeCMAPSubtableRange> &groups
981 = file->cmap->subtable_3_10_13;
yusukes@chromium.orgd257d182009-11-04 04:56:32 +0000982 const unsigned num_groups = groups.size();
983 if (!out->WriteU16(13) ||
984 !out->WriteU16(0) ||
985 !out->WriteU32(num_groups * 12 + 14) ||
986 !out->WriteU32(0) ||
987 !out->WriteU32(num_groups)) {
988 return OTS_FAILURE();
989 }
990
991 for (unsigned i = 0; i < num_groups; ++i) {
992 if (!out->WriteU32(groups[i].start_range) ||
993 !out->WriteU32(groups[i].end_range) ||
994 !out->WriteU32(groups[i].start_glyph_id)) {
995 return OTS_FAILURE();
996 }
997 }
998 }
999
1000 const off_t table_end = out->Tell();
1001 // We might have hanging bytes from the above's checksum which the OTSStream
1002 // then merges into the table of offsets.
1003 OTSStream::ChecksumState saved_checksum = out->SaveChecksumState();
1004 out->ResetChecksum();
1005
1006 // Now seek back and write the table of offsets
1007 if (!out->Seek(record_offset)) {
1008 return OTS_FAILURE();
1009 }
1010
bashi@google.com93aedf72010-12-24 00:29:23 +00001011 if (have_034) {
1012 if (!out->WriteU16(0) ||
1013 !out->WriteU16(3) ||
1014 !out->WriteU32(offset_034 - table_start)) {
1015 return OTS_FAILURE();
1016 }
1017 }
1018
1019 if (have_0514) {
1020 if (!out->WriteU16(0) ||
1021 !out->WriteU16(5) ||
1022 !out->WriteU32(offset_0514 - table_start)) {
1023 return OTS_FAILURE();
1024 }
1025 }
1026
yusukes@chromium.orgd257d182009-11-04 04:56:32 +00001027 if (have_100) {
1028 if (!out->WriteU16(1) ||
1029 !out->WriteU16(0) ||
1030 !out->WriteU32(offset_100 - table_start)) {
1031 return OTS_FAILURE();
1032 }
1033 }
1034
1035 if (have_304) {
1036 if (!out->WriteU16(3) ||
1037 !out->WriteU16(0) ||
1038 !out->WriteU32(offset_304 - table_start)) {
1039 return OTS_FAILURE();
1040 }
1041 }
1042
1043 if (have_314) {
1044 if (!out->WriteU16(3) ||
1045 !out->WriteU16(1) ||
1046 !out->WriteU32(offset_314 - table_start)) {
1047 return OTS_FAILURE();
1048 }
1049 }
1050
1051 if (have_31012) {
1052 if (!out->WriteU16(3) ||
1053 !out->WriteU16(10) ||
1054 !out->WriteU32(offset_31012 - table_start)) {
1055 return OTS_FAILURE();
1056 }
1057 }
1058
1059 if (have_31013) {
1060 if (!out->WriteU16(3) ||
1061 !out->WriteU16(10) ||
1062 !out->WriteU32(offset_31013 - table_start)) {
1063 return OTS_FAILURE();
1064 }
1065 }
1066
1067 if (!out->Seek(table_end)) {
1068 return OTS_FAILURE();
1069 }
1070 out->RestoreChecksum(saved_checksum);
1071
1072 return true;
1073}
1074
1075void ots_cmap_free(OpenTypeFile *file) {
1076 delete file->cmap;
1077}
1078
1079} // namespace ots