blob: 219fa2de7e50f8b50d503f1823c2757ebf3c1db2 [file] [log] [blame]
Roozbeh Pournader0e969e22016-03-09 23:08:45 -08001#!/usr/bin/env python
2
3import collections
Roozbeh Pournader5dde0872016-03-31 13:54:56 -07004import copy
Roozbeh Pournader0e969e22016-03-09 23:08:45 -08005import glob
Roozbeh Pournader5dde0872016-03-31 13:54:56 -07006import itertools
Roozbeh Pournader0e969e22016-03-09 23:08:45 -08007from os import path
8import sys
9from xml.etree import ElementTree
10
11from fontTools import ttLib
12
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070013EMOJI_VS = 0xFE0F
14
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080015LANG_TO_SCRIPT = {
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070016 'as': 'Beng',
17 'bn': 'Beng',
18 'cy': 'Latn',
19 'da': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080020 'de': 'Latn',
21 'en': 'Latn',
22 'es': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070023 'et': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080024 'eu': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070025 'fr': 'Latn',
26 'ga': 'Latn',
27 'gu': 'Gujr',
28 'hi': 'Deva',
29 'hr': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080030 'hu': 'Latn',
31 'hy': 'Armn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070032 'ja': 'Jpan',
33 'kn': 'Knda',
34 'ko': 'Kore',
35 'ml': 'Mlym',
36 'mn': 'Cyrl',
37 'mr': 'Deva',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080038 'nb': 'Latn',
39 'nn': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070040 'or': 'Orya',
41 'pa': 'Guru',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080042 'pt': 'Latn',
Jungshik Shin6c4f9e02016-03-19 09:32:34 -070043 'sl': 'Latn',
44 'ta': 'Taml',
45 'te': 'Telu',
46 'tk': 'Latn',
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080047}
48
49def lang_to_script(lang_code):
50 lang = lang_code.lower()
51 while lang not in LANG_TO_SCRIPT:
52 hyphen_idx = lang.rfind('-')
53 assert hyphen_idx != -1, (
54 'We do not know what script the "%s" language is written in.'
55 % lang_code)
56 assumed_script = lang[hyphen_idx+1:]
57 if len(assumed_script) == 4 and assumed_script.isalpha():
58 # This is actually the script
59 return assumed_script.title()
60 lang = lang[:hyphen_idx]
61 return LANG_TO_SCRIPT[lang]
62
63
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070064def printable(inp):
65 if type(inp) is set: # set of character sequences
66 return '{' + ', '.join([printable(seq) for seq in inp]) + '}'
67 if type(inp) is tuple: # character sequence
68 return '<' + (', '.join([printable(ch) for ch in inp])) + '>'
69 else: # single character
70 return 'U+%04X' % inp
71
72
73def open_font(font):
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080074 font_file, index = font
75 font_path = path.join(_fonts_dir, font_file)
76 if index is not None:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070077 return ttLib.TTFont(font_path, fontNumber=index)
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080078 else:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070079 return ttLib.TTFont(font_path)
80
81
82def get_best_cmap(font):
83 ttfont = open_font(font)
Roozbeh Pournader0e969e22016-03-09 23:08:45 -080084 all_unicode_cmap = None
85 bmp_cmap = None
86 for cmap in ttfont['cmap'].tables:
87 specifier = (cmap.format, cmap.platformID, cmap.platEncID)
88 if specifier == (4, 3, 1):
89 assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, )
90 bmp_cmap = cmap
91 elif specifier == (12, 3, 10):
92 assert all_unicode_cmap is None, (
93 'More than one UCS-4 cmap in %s' % (font, ))
94 all_unicode_cmap = cmap
95
96 return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap
97
98
Roozbeh Pournader5dde0872016-03-31 13:54:56 -070099def get_variation_sequences_cmap(font):
100 ttfont = open_font(font)
101 vs_cmap = None
102 for cmap in ttfont['cmap'].tables:
103 specifier = (cmap.format, cmap.platformID, cmap.platEncID)
104 if specifier == (14, 0, 5):
105 assert vs_cmap is None, 'More than one VS cmap in %s' % (font, )
106 vs_cmap = cmap
107 return vs_cmap
108
109
110def get_emoji_map(font):
111 # Add normal characters
112 emoji_map = copy.copy(get_best_cmap(font))
113 reverse_cmap = {glyph: code for code, glyph in emoji_map.items()}
114
115 # Add variation sequences
116 vs_dict = get_variation_sequences_cmap(font).uvsDict
117 for vs in vs_dict:
118 for base, glyph in vs_dict[vs]:
119 if glyph is None:
120 emoji_map[(base, vs)] = emoji_map[base]
121 else:
122 emoji_map[(base, vs)] = glyph
123
124 # Add GSUB rules
125 ttfont = open_font(font)
126 for lookup in ttfont['GSUB'].table.LookupList.Lookup:
127 assert lookup.LookupType == 4, 'We only understand type 4 lookups'
128 for subtable in lookup.SubTable:
129 ligatures = subtable.ligatures
130 for first_glyph in ligatures:
131 for ligature in ligatures[first_glyph]:
132 sequence = [first_glyph] + ligature.Component
133 sequence = [reverse_cmap[glyph] for glyph in sequence]
134 sequence = tuple(sequence)
135 # Make sure no starting subsequence of 'sequence' has been
136 # seen before.
137 for sub_len in range(2, len(sequence)+1):
138 subsequence = sequence[:sub_len]
139 assert subsequence not in emoji_map
140 emoji_map[sequence] = ligature.LigGlyph
141
142 return emoji_map
143
144
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800145def assert_font_supports_any_of_chars(font, chars):
146 best_cmap = get_best_cmap(font)
147 for char in chars:
148 if char in best_cmap:
149 return
150 sys.exit('None of characters in %s were found in %s' % (chars, font))
151
152
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700153def assert_font_supports_all_of_chars(font, chars):
154 best_cmap = get_best_cmap(font)
155 for char in chars:
156 assert char in best_cmap, (
157 'U+%04X was not found in %s' % (char, font))
158
159
160def assert_font_supports_none_of_chars(font, chars):
161 best_cmap = get_best_cmap(font)
162 for char in chars:
163 assert char not in best_cmap, (
164 'U+%04X was found in %s' % (char, font))
165
166
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700167def assert_font_supports_all_sequences(font, sequences):
168 vs_dict = get_variation_sequences_cmap(font).uvsDict
169 for base, vs in sorted(sequences):
170 assert vs in vs_dict and (base, None) in vs_dict[vs], (
171 '<U+%04X, U+%04X> was not found in %s' % (base, vs, font))
172
173
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800174def check_hyphens(hyphens_dir):
175 # Find all the scripts that need automatic hyphenation
176 scripts = set()
177 for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')):
178 hyb_file = path.basename(hyb_file)
179 assert hyb_file.startswith('hyph-'), (
180 'Unknown hyphenation file %s' % hyb_file)
181 lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')]
182 scripts.add(lang_to_script(lang_code))
183
184 HYPHENS = {0x002D, 0x2010}
185 for script in scripts:
186 fonts = _script_to_font_map[script]
187 assert fonts, 'No fonts found for the "%s" script' % script
188 for font in fonts:
189 assert_font_supports_any_of_chars(font, HYPHENS)
190
191
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700192class FontRecord(object):
193 def __init__(self, name, scripts, variant, weight, style, font):
194 self.name = name
195 self.scripts = scripts
196 self.variant = variant
197 self.weight = weight
198 self.style = style
199 self.font = font
200
201
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800202def parse_fonts_xml(fonts_xml_path):
203 global _script_to_font_map, _fallback_chain
204 _script_to_font_map = collections.defaultdict(set)
205 _fallback_chain = []
206 tree = ElementTree.parse(fonts_xml_path)
207 for family in tree.findall('family'):
208 name = family.get('name')
209 variant = family.get('variant')
210 langs = family.get('lang')
211 if name:
212 assert variant is None, (
213 'No variant expected for LGC font %s.' % name)
214 assert langs is None, (
215 'No language expected for LGC fonts %s.' % name)
216 else:
217 assert variant in {None, 'elegant', 'compact'}, (
218 'Unexpected value for variant: %s' % variant)
219
220 if langs:
221 langs = langs.split()
222 scripts = {lang_to_script(lang) for lang in langs}
223 else:
224 scripts = set()
225
226 for child in family:
227 assert child.tag == 'font', (
228 'Unknown tag <%s>' % child.tag)
229 font_file = child.text
230 weight = int(child.get('weight'))
231 assert weight % 100 == 0, (
232 'Font weight "%d" is not a multiple of 100.' % weight)
233
234 style = child.get('style')
235 assert style in {'normal', 'italic'}, (
236 'Unknown style "%s"' % style)
237
238 index = child.get('index')
239 if index:
240 index = int(index)
241
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700242 _fallback_chain.append(FontRecord(
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800243 name,
244 frozenset(scripts),
245 variant,
246 weight,
247 style,
248 (font_file, index)))
249
250 if name: # non-empty names are used for default LGC fonts
251 map_scripts = {'Latn', 'Grek', 'Cyrl'}
252 else:
253 map_scripts = scripts
254 for script in map_scripts:
255 _script_to_font_map[script].add((font_file, index))
256
257
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700258def check_emoji_coverage(all_emoji, equivalent_emoji):
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700259 emoji_font = get_emoji_font()
260 check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji)
Doug Feltf874a192016-07-08 17:42:15 -0700261
262
263def get_emoji_font():
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700264 emoji_fonts = [
265 record.font for record in _fallback_chain
266 if 'Zsye' in record.scripts]
Roozbeh Pournader27ec3ac2016-03-31 13:05:32 -0700267 assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts)
Doug Feltf874a192016-07-08 17:42:15 -0700268 return emoji_fonts[0]
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700269
Doug Feltf874a192016-07-08 17:42:15 -0700270
271def check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji):
272 coverage = get_emoji_map(emoji_font)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700273 for sequence in all_emoji:
274 assert sequence in coverage, (
275 '%s is not supported in the emoji font.' % printable(sequence))
276
277 for sequence in coverage:
278 if sequence in {0x0000, 0x000D, 0x0020}:
279 # The font needs to support a few extra characters, which is OK
280 continue
281 assert sequence in all_emoji, (
282 'Emoji font should not support %s.' % printable(sequence))
283
284 for first, second in sorted(equivalent_emoji.items()):
285 assert coverage[first] == coverage[second], (
286 '%s and %s should map to the same glyph.' % (
287 printable(first),
288 printable(second)))
289
290 for glyph in set(coverage.values()):
291 maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph]
292 if len(maps_to_glyph) > 1:
293 # There are more than one sequences mapping to the same glyph. We
294 # need to make sure they were expected to be equivalent.
295 equivalent_seqs = set()
296 for seq in maps_to_glyph:
297 equivalent_seq = seq
298 while equivalent_seq in equivalent_emoji:
299 equivalent_seq = equivalent_emoji[equivalent_seq]
300 equivalent_seqs.add(equivalent_seq)
301 assert len(equivalent_seqs) == 1, (
302 'The sequences %s should not result in the same glyph %s' % (
303 printable(equivalent_seqs),
304 glyph))
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700305
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700306
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700307def check_emoji_defaults(default_emoji):
308 missing_text_chars = _emoji_properties['Emoji'] - default_emoji
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700309 emoji_font_seen = False
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700310 for record in _fallback_chain:
311 if 'Zsye' in record.scripts:
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700312 emoji_font_seen = True
313 # No need to check the emoji font
314 continue
315 # For later fonts, we only check them if they have a script
316 # defined, since the defined script may get them to a higher
yiruif9936b92016-09-07 14:37:30 +0900317 # score even if they appear after the emoji font. However,
318 # we should skip checking the text symbols font, since
319 # symbol fonts should be able to override the emoji display
320 # style when 'Zsym' is explicitly specified by the user.
321 if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts):
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700322 continue
323
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700324 # Check default emoji-style characters
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700325 assert_font_supports_none_of_chars(record.font, sorted(default_emoji))
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700326
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700327 # Mark default text-style characters appearing in fonts above the emoji
328 # font as seen
329 if not emoji_font_seen:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700330 missing_text_chars -= set(get_best_cmap(record.font))
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700331
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700332 # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and
333 # webdings yet.
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700334 missing_text_chars -= _chars_by_age['7.0']
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700335 assert missing_text_chars == set(), (
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700336 'Text style version of some emoji characters are missing: ' +
337 repr(missing_text_chars))
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700338
339
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700340# Setting reverse to true returns a dictionary that maps the values to sets of
341# characters, useful for some binary properties. Otherwise, we get a
342# dictionary that maps characters to the property values, assuming there's only
343# one property in the file.
344def parse_unicode_datafile(file_path, reverse=False):
345 if reverse:
346 output_dict = collections.defaultdict(set)
347 else:
348 output_dict = {}
349 with open(file_path) as datafile:
350 for line in datafile:
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700351 if '#' in line:
352 line = line[:line.index('#')]
353 line = line.strip()
354 if not line:
355 continue
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700356
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700357 chars, prop = line.split(';')[:2]
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700358 chars = chars.strip()
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700359 prop = prop.strip()
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700360
361 if ' ' in chars: # character sequence
362 sequence = [int(ch, 16) for ch in chars.split(' ')]
363 additions = [tuple(sequence)]
364 elif '..' in chars: # character range
365 char_start, char_end = chars.split('..')
366 char_start = int(char_start, 16)
367 char_end = int(char_end, 16)
368 additions = xrange(char_start, char_end+1)
369 else: # singe character
370 additions = [int(chars, 16)]
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700371 if reverse:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700372 output_dict[prop].update(additions)
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700373 else:
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700374 for addition in additions:
375 assert addition not in output_dict
376 output_dict[addition] = prop
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700377 return output_dict
378
379
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700380def parse_standardized_variants(file_path):
381 emoji_set = set()
382 text_set = set()
383 with open(file_path) as datafile:
384 for line in datafile:
385 if '#' in line:
386 line = line[:line.index('#')]
387 line = line.strip()
388 if not line:
389 continue
390 sequence, description, _ = line.split(';')
391 sequence = sequence.strip().split(' ')
392 base = int(sequence[0], 16)
393 vs = int(sequence[1], 16)
394 description = description.strip()
395 if description == 'text style':
396 text_set.add((base, vs))
397 elif description == 'emoji style':
398 emoji_set.add((base, vs))
399 return text_set, emoji_set
400
401
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700402def parse_ucd(ucd_path):
403 global _emoji_properties, _chars_by_age
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700404 global _text_variation_sequences, _emoji_variation_sequences
405 global _emoji_sequences, _emoji_zwj_sequences
Roozbeh Pournader7b822e52016-03-16 18:55:32 -0700406 _emoji_properties = parse_unicode_datafile(
407 path.join(ucd_path, 'emoji-data.txt'), reverse=True)
408 _chars_by_age = parse_unicode_datafile(
409 path.join(ucd_path, 'DerivedAge.txt'), reverse=True)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700410 sequences = parse_standardized_variants(
411 path.join(ucd_path, 'StandardizedVariants.txt'))
412 _text_variation_sequences, _emoji_variation_sequences = sequences
413 _emoji_sequences = parse_unicode_datafile(
414 path.join(ucd_path, 'emoji-sequences.txt'))
415 _emoji_zwj_sequences = parse_unicode_datafile(
416 path.join(ucd_path, 'emoji-zwj-sequences.txt'))
417
418
419def flag_sequence(territory_code):
420 return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code)
421
422
423UNSUPPORTED_FLAGS = frozenset({
424 flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'),
425 flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'),
426 flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'),
427 flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'),
428 flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'),
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700429 flag_sequence('UN'), flag_sequence('WF'), flag_sequence('XK'),
430 flag_sequence('YT'),
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700431})
432
433EQUIVALENT_FLAGS = {
434 flag_sequence('BV'): flag_sequence('NO'),
435 flag_sequence('CP'): flag_sequence('FR'),
436 flag_sequence('HM'): flag_sequence('AU'),
437 flag_sequence('SJ'): flag_sequence('NO'),
438 flag_sequence('UM'): flag_sequence('US'),
439}
440
441COMBINING_KEYCAP = 0x20E3
442
Roozbeh Pournader10ea8f72016-07-25 18:14:14 -0700443# Characters that Android defaults to emoji style, different from the recommendations in UTR #51
444ANDROID_DEFAULT_EMOJI = frozenset({
445 0x2600, # BLACK SUN WITH RAYS
446 0x2601, # CLOUD
447 0x260E, # BLACK TELEPHONE
448 0x261D, # WHITE UP POINTING INDEX
449 0x263A, # WHITE SMILING FACE
450 0x2660, # BLACK SPADE SUIT
451 0x2663, # BLACK CLUB SUIT
452 0x2665, # BLACK HEART SUIT
453 0x2666, # BLACK DIAMOND SUIT
454 0x270C, # VICTORY HAND
455 0x2744, # SNOWFLAKE
456 0x2764, # HEAVY BLACK HEART
457})
458
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700459LEGACY_ANDROID_EMOJI = {
460 0xFE4E5: flag_sequence('JP'),
461 0xFE4E6: flag_sequence('US'),
462 0xFE4E7: flag_sequence('FR'),
463 0xFE4E8: flag_sequence('DE'),
464 0xFE4E9: flag_sequence('IT'),
465 0xFE4EA: flag_sequence('GB'),
466 0xFE4EB: flag_sequence('ES'),
467 0xFE4EC: flag_sequence('RU'),
468 0xFE4ED: flag_sequence('CN'),
469 0xFE4EE: flag_sequence('KR'),
470 0xFE82C: (ord('#'), COMBINING_KEYCAP),
471 0xFE82E: (ord('1'), COMBINING_KEYCAP),
472 0xFE82F: (ord('2'), COMBINING_KEYCAP),
473 0xFE830: (ord('3'), COMBINING_KEYCAP),
474 0xFE831: (ord('4'), COMBINING_KEYCAP),
475 0xFE832: (ord('5'), COMBINING_KEYCAP),
476 0xFE833: (ord('6'), COMBINING_KEYCAP),
477 0xFE834: (ord('7'), COMBINING_KEYCAP),
478 0xFE835: (ord('8'), COMBINING_KEYCAP),
479 0xFE836: (ord('9'), COMBINING_KEYCAP),
480 0xFE837: (ord('0'), COMBINING_KEYCAP),
481}
482
483ZWJ_IDENTICALS = {
484 # KISS
485 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F,
486 # COUPLE WITH HEART
487 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491,
488 # FAMILY
489 (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A,
490}
491
Doug Feltf874a192016-07-08 17:42:15 -0700492
493def is_fitzpatrick_modifier(cp):
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700494 return 0x1F3FB <= cp <= 0x1F3FF
495
496
497def reverse_emoji(seq):
498 rev = list(reversed(seq))
499 # if there are fitzpatrick modifiers in the sequence, keep them after
500 # the emoji they modify
501 for i in xrange(1, len(rev)):
502 if is_fitzpatrick_modifier(rev[i-1]):
503 rev[i], rev[i-1] = rev[i-1], rev[i]
504 return tuple(rev)
Doug Feltf874a192016-07-08 17:42:15 -0700505
506
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700507def compute_expected_emoji():
508 equivalent_emoji = {}
509 sequence_pieces = set()
510 all_sequences = set()
511 all_sequences.update(_emoji_variation_sequences)
512
Raph Levien2b8b8192016-08-09 14:28:54 -0700513 # add zwj sequences not in the current emoji-zwj-sequences.txt
514 adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences)
515 adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences)
516 # single parent families
517 additional_emoji_zwj = (
518 (0x1F468, 0x200D, 0x1F466),
519 (0x1F468, 0x200D, 0x1F467),
520 (0x1F468, 0x200D, 0x1F466, 0x200D, 0x1F466),
521 (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F466),
522 (0x1F468, 0x200D, 0x1F467, 0x200D, 0x1F467),
523 (0x1F469, 0x200D, 0x1F466),
524 (0x1F469, 0x200D, 0x1F467),
525 (0x1F469, 0x200D, 0x1F466, 0x200D, 0x1F466),
526 (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F466),
527 (0x1F469, 0x200D, 0x1F467, 0x200D, 0x1F467),
528 )
529 # sequences formed from man and woman and optional fitzpatrick modifier
530 modified_extensions = (
531 0x2696,
532 0x2708,
533 0x1F3A8,
534 0x1F680,
535 0x1F692,
536 )
537 for seq in additional_emoji_zwj:
538 adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
539 for ext in modified_extensions:
540 for base in (0x1F468, 0x1F469):
541 seq = (base, 0x200D, ext)
542 adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
543 for modifier in range(0x1F3FB, 0x1F400):
544 seq = (base, modifier, 0x200D, ext)
545 adjusted_emoji_zwj_sequences[seq] = 'Emoji_ZWJ_Sequence'
546
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700547 for sequence in _emoji_sequences.keys():
548 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
549 all_sequences.add(sequence)
550 sequence_pieces.update(sequence)
551
Raph Levien2b8b8192016-08-09 14:28:54 -0700552 for sequence in adjusted_emoji_zwj_sequences.keys():
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700553 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS)
554 all_sequences.add(sequence)
555 sequence_pieces.update(sequence)
556 # Add reverse of all emoji ZWJ sequences, which are added to the fonts
557 # as a workaround to get the sequences work in RTL text.
Roozbeh Pournader3b3c78e2016-07-25 14:04:34 -0700558 reversed_seq = reverse_emoji(sequence)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700559 all_sequences.add(reversed_seq)
560 equivalent_emoji[reversed_seq] = sequence
561
562 # Add all two-letter flag sequences, as even the unsupported ones should
563 # resolve to a flag tofu.
564 all_letters = [chr(code) for code in range(ord('A'), ord('Z')+1)]
565 all_two_letter_codes = itertools.product(all_letters, repeat=2)
566 all_flags = {flag_sequence(code) for code in all_two_letter_codes}
567 all_sequences.update(all_flags)
568 tofu_flags = UNSUPPORTED_FLAGS | (all_flags - set(_emoji_sequences.keys()))
569
570 all_emoji = (
571 _emoji_properties['Emoji'] |
572 all_sequences |
573 sequence_pieces |
574 set(LEGACY_ANDROID_EMOJI.keys()))
575 default_emoji = (
576 _emoji_properties['Emoji_Presentation'] |
Roozbeh Pournader10ea8f72016-07-25 18:14:14 -0700577 ANDROID_DEFAULT_EMOJI |
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700578 all_sequences |
579 set(LEGACY_ANDROID_EMOJI.keys()))
580
581 first_tofu_flag = sorted(tofu_flags)[0]
582 for flag in tofu_flags:
583 if flag != first_tofu_flag:
584 equivalent_emoji[flag] = first_tofu_flag
585 equivalent_emoji.update(EQUIVALENT_FLAGS)
586 equivalent_emoji.update(LEGACY_ANDROID_EMOJI)
587 equivalent_emoji.update(ZWJ_IDENTICALS)
588 for seq in _emoji_variation_sequences:
589 equivalent_emoji[seq] = seq[0]
590
591 return all_emoji, default_emoji, equivalent_emoji
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700592
593
Roozbeh Pournaderbac1aec2016-07-27 13:08:37 -0700594def check_vertical_metrics():
595 for record in _fallback_chain:
596 if record.name in ['sans-serif', 'sans-serif-condensed']:
597 font = open_font(record.font)
Roozbeh Pournaderede3a172016-07-27 16:35:12 -0700598 assert font['head'].yMax == 2163 and font['head'].yMin == -555, (
599 'yMax and yMin of %s do not match expected values.' % (record.font,))
600
601 if record.name in ['sans-serif', 'sans-serif-condensed', 'serif', 'monospace']:
602 font = open_font(record.font)
603 assert font['hhea'].ascent == 1900 and font['hhea'].descent == -500, (
604 'ascent and descent of %s do not match expected values.' % (record.font,))
Roozbeh Pournaderbac1aec2016-07-27 13:08:37 -0700605
606
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800607def main():
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800608 global _fonts_dir
Doug Feltf874a192016-07-08 17:42:15 -0700609 target_out = sys.argv[1]
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800610 _fonts_dir = path.join(target_out, 'fonts')
611
612 fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml')
613 parse_fonts_xml(fonts_xml_path)
614
Roozbeh Pournaderbac1aec2016-07-27 13:08:37 -0700615 check_vertical_metrics()
616
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800617 hyphens_dir = path.join(target_out, 'usr', 'hyphen-data')
618 check_hyphens(hyphens_dir)
619
Roozbeh Pournader27ec3ac2016-03-31 13:05:32 -0700620 check_emoji = sys.argv[2]
621 if check_emoji == 'true':
622 ucd_path = sys.argv[3]
623 parse_ucd(ucd_path)
Roozbeh Pournader5dde0872016-03-31 13:54:56 -0700624 all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji()
625 check_emoji_coverage(all_emoji, equivalent_emoji)
626 check_emoji_defaults(default_emoji)
Roozbeh Pournaderfa1facc2016-03-16 13:53:47 -0700627
Roozbeh Pournader0e969e22016-03-09 23:08:45 -0800628
629if __name__ == '__main__':
630 main()