blob: 944da3156e4f8d2cca3bb5743372d31ce5d4d29e [file] [log] [blame]
Hal Canaryb5680ca2018-04-03 17:25:15 -04001#! /usr/bin/env python
halcanary627ad6d2016-07-01 08:48:12 -07002
3# Copyright 2016 Google Inc.
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8import array, zlib, base64, sys
9
Hal Canarya0a6fe42018-10-31 12:59:26 -040010# import array, zlib, base64, sys, unicodedata
11# def codepoints():
12# for i in range(0x110000):
13# try:
14# u = ('\\U%08x' % i).decode('unicode-escape')
15# n = unicodedata.name(u)
16# c = unicodedata.category(u)
17# if c[0] == 'C' or n.startswith('VARIATION '):
18# continue
19# yield i
20# except ValueError:
21# pass
22# def make_unicode_data():
23# last = 0
24# a = array.array('I')
25# for i in codepoints():
26# a.append(i - last - 1)
27# last = i
28# return base64.b64encode(zlib.compress(a.tostring(), 9))
29# if __name__ == '__main__':
30# b = make_unicode_data()
31# for i in range(0, len(b), 76):
32# print ' %s' % b[i:i+76]
halcanary627ad6d2016-07-01 08:48:12 -070033valid_codepoint_data = '''
Hal Canarya0a6fe42018-10-31 12:59:26 -040034 eNrt3LmOHEUYAOCt7p5jL89iAbZJwJY4ExKTIiJChAh4CgSvgIgREjEJEa+ARAQxEW/gR4GRbDGa
35 7Z4+pq+q/j5p5fVMd3XVX3fPTr99wVSedjgnCBsAANyTlby2qllX9722tlYH4DqieTKG+etV3tYJ
36 t5ntiNcqWxu9NnCbW+r6yLpQnaewn1rS3pFpPVhQWVcL7Dd/GVt6m1PGHL/WDfMXGsZlqLyHmvfy
37 DjGsyvN+3V6cuG42wrhRVd5i4jiX/ZtVnFeW1309bUrS3fS4Pw8tyxlmEMsu7auqDPvzd2ekuYuo
38 358Tw7K8b4/abh/9/66mLPmJ8uYHr4ej1/KKNJooWuYjezlXDLGOW7dob2P02zZph5o4ti3D5uj1
39 dce2t23Y9lOId9byvKqYbyrafZt87SKN60WDsaltOS5L2mE2YH76vi+ZXwyr6Tolq8ljceLcums8
40 PPj9ZiF7uGKCa75bUp/hYF5tu+Y5rN9z2n2o6Zuhh33qq+OeXTDkWB1zPykii00Y6dz3dQu45yrC
41 POeqDaDx2qju79u63q+JpYxLiHvQrhetmOi6Y63H1qq4d9dCAABAIvuFlO162DuPtQ+47Dm9mzPL
42 Hs5IZ86yRPK37vm4tn2hr2tQbxtRXleqq9QUnyH38XfadcfHsk7JRp7PhzDWvev1gG3MOiIdYeZx
43 H3qN9JUmMMvxa6n78LZ5/Tzx9pYvpN7rfDmjvNwaBmGW3hQCYIL9W5ZYfsPRz9zyH7MwwbnZAHk5
44 dY3Qc1nGui+fwv3/uwn6TlldjnF/+tEM4r27gHo+ZwYASHM/UOVJi2PfUJUAAFEIPadVjJzfqZ+9
45 EmZWl1XPmD8ltc8s85nWeWh5rO+kAgAApKvpnu+3iMsY5J2F20SY56tE62L/LLC7gdPvMp6k8B7N
46 fWEOS8aNEEDv3mp5fLHweBnrh+F5ZPMVw3difbZvPCwzt2fP3qhrQN8DAAAAAAAAAAAAAAAAAAAA
47 AAAAAAAAAAAAAKCDKyEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
48 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACIwnMhAAAAAOBInnDZLlUvwChe
49 n/j6WU/pbEfO91bTEV9m5dsEylA0PG6t/y9O273ReqZtl2auJ7puLvSdhA595R3jVec9QTZx+Yx3
50 /3tc8tqLSMvStF2tVTsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA
51 JzcDpl0kEJ/iqTYCxCVTbmbsvZZrktVI+QoVr4Wj94MqjIplHEA87qzhk/RBx/MeNjxufeK9yxnF
52 IQyYVjHw9UhLIQQsUN5y7m3yet5xzL/tOO+PNa4PMV+dk2bX9dEzzX4xVpHPcbkqHKw9NPE80nL+
53 oK128vGM9oM3I177kwT2wFPvhT4yvDKBpuvgr/Pq82O7NxQO5pxshOuc8mvC8+x+vfRnRUxWLWI3
54 ZvsqWo7l2wbHbCZu798nOnZNsWbMRu4/S6zXPj2a8IOwsMA/KDseqz/daYMAAABz97MQzMbt0f9/
55 vBYTAAAAYL7++UUMIDVXM86bZ3s1sxWC2fBd03J/CMFg+nrGyU9C2TvPYKmPTXYw12dnxi/UnKM+
56 7scgO6MNh4nzXvX8zlwbALBPB4xHsXkcV3Y9U3Jevmlx7Gri/nr9cl+Wvdy7le3f9ve5989leHJ0
57 7+CqJL3v/vuZy9eYHzQ45vdb7RUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
58 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
59 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
60 AAAAAAAAAAAAAAAAAIB2PhQCAAAAAAAAAAAAAAAAAAAAYAB/fyYGAMA4/gXQcRfM
halcanary627ad6d2016-07-01 08:48:12 -070061'''
Hal Canarya0a6fe42018-10-31 12:59:26 -040062def codepoints():
63 i = 0
64 for increment in array.array('I', zlib.decompress(
65 base64.b64decode(valid_codepoint_data))).tolist():
66 i += increment + 1
67 yield i
halcanary627ad6d2016-07-01 08:48:12 -070068if sys.version_info[0] < 3:
69 def to_unicode(i):
Hal Canaryb5680ca2018-04-03 17:25:15 -040070 return ('\\U%08x' % i).decode('unicode-escape').encode('UTF-8')
halcanary627ad6d2016-07-01 08:48:12 -070071else:
72 def to_unicode(i):
73 return chr(i)
Hal Canarya0a6fe42018-10-31 12:59:26 -040074if __name__ == '__main__':
75 o = sys.stdout
76 o.write(to_unicode(0xFEFF))
77 last_row = -1
78 for i in codepoints():
79 row = i - (i & 63)
80 if last_row != row:
81 if row:
82 o.write('\n' if row % 1024 else '\n\n')
83 o.write('U+%06x ' % row)
84 last_row = row
85 o.write(' ' + to_unicode(i))
86 o.write('\n')