Hal Canary | b5680ca | 2018-04-03 17:25:15 -0400 | [diff] [blame] | 1 | #! /usr/bin/env python |
halcanary | 627ad6d | 2016-07-01 08:48:12 -0700 | [diff] [blame] | 2 | |
| 3 | # Copyright 2016 Google Inc. |
| 4 | # |
| 5 | # Use of this source code is governed by a BSD-style license that can be |
| 6 | # found in the LICENSE file. |
| 7 | |
| 8 | import array, zlib, base64, sys |
| 9 | |
Hal Canary | a0a6fe4 | 2018-10-31 12:59:26 -0400 | [diff] [blame] | 10 | # import array, zlib, base64, sys, unicodedata |
| 11 | # def codepoints(): |
| 12 | # for i in range(0x110000): |
| 13 | # try: |
| 14 | # u = ('\\U%08x' % i).decode('unicode-escape') |
| 15 | # n = unicodedata.name(u) |
| 16 | # c = unicodedata.category(u) |
| 17 | # if c[0] == 'C' or n.startswith('VARIATION '): |
| 18 | # continue |
| 19 | # yield i |
| 20 | # except ValueError: |
| 21 | # pass |
| 22 | # def make_unicode_data(): |
| 23 | # last = 0 |
| 24 | # a = array.array('I') |
| 25 | # for i in codepoints(): |
| 26 | # a.append(i - last - 1) |
| 27 | # last = i |
| 28 | # return base64.b64encode(zlib.compress(a.tostring(), 9)) |
| 29 | # if __name__ == '__main__': |
| 30 | # b = make_unicode_data() |
| 31 | # for i in range(0, len(b), 76): |
| 32 | # print ' %s' % b[i:i+76] |
halcanary | 627ad6d | 2016-07-01 08:48:12 -0700 | [diff] [blame] | 33 | valid_codepoint_data = ''' |
Hal Canary | a0a6fe4 | 2018-10-31 12:59:26 -0400 | [diff] [blame] | 34 | eNrt3LmOHEUYAOCt7p5jL89iAbZJwJY4ExKTIiJChAh4CgSvgIgREjEJEa+ARAQxEW/gR4GRbDGa |
| 35 | 7Z4+pq+q/j5p5fVMd3XVX3fPTr99wVSedjgnCBsAANyTlby2qllX9722tlYH4DqieTKG+etV3tYJ |
| 36 | t5ntiNcqWxu9NnCbW+r6yLpQnaewn1rS3pFpPVhQWVcL7Dd/GVt6m1PGHL/WDfMXGsZlqLyHmvfy |
| 37 | DjGsyvN+3V6cuG42wrhRVd5i4jiX/ZtVnFeW1309bUrS3fS4Pw8tyxlmEMsu7auqDPvzd2ekuYuo |
| 38 | 358Tw7K8b4/abh/9/66mLPmJ8uYHr4ej1/KKNJooWuYjezlXDLGOW7dob2P02zZph5o4ti3D5uj1 |
| 39 | dce2t23Y9lOId9byvKqYbyrafZt87SKN60WDsaltOS5L2mE2YH76vi+ZXwyr6Tolq8ljceLcums8 |
| 40 | PPj9ZiF7uGKCa75bUp/hYF5tu+Y5rN9z2n2o6Zuhh33qq+OeXTDkWB1zPykii00Y6dz3dQu45yrC |
| 41 | POeqDaDx2qju79u63q+JpYxLiHvQrhetmOi6Y63H1qq4d9dCAABAIvuFlO162DuPtQ+47Dm9mzPL |
| 42 | Hs5IZ86yRPK37vm4tn2hr2tQbxtRXleqq9QUnyH38XfadcfHsk7JRp7PhzDWvev1gG3MOiIdYeZx |
| 43 | H3qN9JUmMMvxa6n78LZ5/Tzx9pYvpN7rfDmjvNwaBmGW3hQCYIL9W5ZYfsPRz9zyH7MwwbnZAHk5 |
| 44 | dY3Qc1nGui+fwv3/uwn6TlldjnF/+tEM4r27gHo+ZwYASHM/UOVJi2PfUJUAAFEIPadVjJzfqZ+9 |
| 45 | EmZWl1XPmD8ltc8s85nWeWh5rO+kAgAApKvpnu+3iMsY5J2F20SY56tE62L/LLC7gdPvMp6k8B7N |
| 46 | fWEOS8aNEEDv3mp5fLHweBnrh+F5ZPMVw3difbZvPCwzt2fP3qhrQN8DAAAAAAAAAAAAAAAAAAAA |
| 47 | AAAAAAAAAAAAAKCDKyEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA |
| 48 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACIwnMhAAAAAOBInnDZLlUvwChe |
| 49 | n/j6WU/pbEfO91bTEV9m5dsEylA0PG6t/y9O273ReqZtl2auJ7puLvSdhA595R3jVec9QTZx+Yx3 |
| 50 | /3tc8tqLSMvStF2tVTsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA |
| 51 | JzcDpl0kEJ/iqTYCxCVTbmbsvZZrktVI+QoVr4Wj94MqjIplHEA87qzhk/RBx/MeNjxufeK9yxnF |
| 52 | IQyYVjHw9UhLIQQsUN5y7m3yet5xzL/tOO+PNa4PMV+dk2bX9dEzzX4xVpHPcbkqHKw9NPE80nL+ |
| 53 | oK128vGM9oM3I177kwT2wFPvhT4yvDKBpuvgr/Pq82O7NxQO5pxshOuc8mvC8+x+vfRnRUxWLWI3 |
| 54 | ZvsqWo7l2wbHbCZu798nOnZNsWbMRu4/S6zXPj2a8IOwsMA/KDseqz/daYMAAABz97MQzMbt0f9/ |
| 55 | vBYTAAAAYL7++UUMIDVXM86bZ3s1sxWC2fBd03J/CMFg+nrGyU9C2TvPYKmPTXYw12dnxi/UnKM+ |
| 56 | 7scgO6MNh4nzXvX8zlwbALBPB4xHsXkcV3Y9U3Jevmlx7Gri/nr9cl+Wvdy7le3f9ve5989leHJ0 |
| 57 | 7+CqJL3v/vuZy9eYHzQ45vdb7RUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA |
| 58 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA |
| 59 | AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA |
| 60 | AAAAAAAAAAAAAAAAAIB2PhQCAAAAAAAAAAAAAAAAAAAAYAB/fyYGAMA4/gXQcRfM |
halcanary | 627ad6d | 2016-07-01 08:48:12 -0700 | [diff] [blame] | 61 | ''' |
Hal Canary | a0a6fe4 | 2018-10-31 12:59:26 -0400 | [diff] [blame] | 62 | def codepoints(): |
| 63 | i = 0 |
| 64 | for increment in array.array('I', zlib.decompress( |
| 65 | base64.b64decode(valid_codepoint_data))).tolist(): |
| 66 | i += increment + 1 |
| 67 | yield i |
halcanary | 627ad6d | 2016-07-01 08:48:12 -0700 | [diff] [blame] | 68 | if sys.version_info[0] < 3: |
| 69 | def to_unicode(i): |
Hal Canary | b5680ca | 2018-04-03 17:25:15 -0400 | [diff] [blame] | 70 | return ('\\U%08x' % i).decode('unicode-escape').encode('UTF-8') |
halcanary | 627ad6d | 2016-07-01 08:48:12 -0700 | [diff] [blame] | 71 | else: |
| 72 | def to_unicode(i): |
| 73 | return chr(i) |
Hal Canary | a0a6fe4 | 2018-10-31 12:59:26 -0400 | [diff] [blame] | 74 | if __name__ == '__main__': |
| 75 | o = sys.stdout |
| 76 | o.write(to_unicode(0xFEFF)) |
| 77 | last_row = -1 |
| 78 | for i in codepoints(): |
| 79 | row = i - (i & 63) |
| 80 | if last_row != row: |
| 81 | if row: |
| 82 | o.write('\n' if row % 1024 else '\n\n') |
| 83 | o.write('U+%06x ' % row) |
| 84 | last_row = row |
| 85 | o.write(' ' + to_unicode(i)) |
| 86 | o.write('\n') |