experimental/tools/generate-unicode-test-txt: update
No-Try: true
Change-Id: Ie904a1493570e7f2d50ab2e79c30951610641428
Reviewed-on: https://skia-review.googlesource.com/c/166802
Reviewed-by: Hal Canary <halcanary@google.com>
Commit-Queue: Hal Canary <halcanary@google.com>
diff --git a/experimental/tools/generate-unicode-test-txt b/experimental/tools/generate-unicode-test-txt
index 9927753..944da31 100755
--- a/experimental/tools/generate-unicode-test-txt
+++ b/experimental/tools/generate-unicode-test-txt
@@ -7,58 +7,80 @@
import array, zlib, base64, sys
-## Source: http://www.unicode.org/Public/9.0.0/ucd/UnicodeData.txt
+# import array, zlib, base64, sys, unicodedata
+# def codepoints():
+# for i in range(0x110000):
+# try:
+# u = ('\\U%08x' % i).decode('unicode-escape')
+# n = unicodedata.name(u)
+# c = unicodedata.category(u)
+# if c[0] == 'C' or n.startswith('VARIATION '):
+# continue
+# yield i
+# except ValueError:
+# pass
+# def make_unicode_data():
+# last = 0
+# a = array.array('I')
+# for i in codepoints():
+# a.append(i - last - 1)
+# last = i
+# return base64.b64encode(zlib.compress(a.tostring(), 9))
+# if __name__ == '__main__':
+# b = make_unicode_data()
+# for i in range(0, len(b), 76):
+# print ' %s' % b[i:i+76]
valid_codepoint_data = '''
- eNrt3buPG0UcwHHPeNf22b67hPBIUpAc0iEgElAgIkSRXEkXUVOcKKj5F6BFQpSUqSgpkUAU+Rco
- +Av4PyLicD6ZzT5mZmfmtzPz/UhWFJ/X8356H/dmkHJGFgAAAADe6I73q55j1NXLJ0VRAAASHzN0
- ouOcaol/zuPyKmJYdct7G9oMANqv01gKGScFpbUusN0ogbheCPSVb9KUkfFcQAceQ9XA3+YOfVJX
- nHdrtaon3FBprA3SWwnnc9u/uuO4trjuymnZ8r3LkfHeWI4pKsIYZJOXLvWrKw27409H1tllo0ym
- 3v7H5GVb3FeNOhyyH1gPpHHekw/zg/dV4715x3eYqCzjscuLRaD5em05FsVo2z6+ez4iHcuW92vH
- +rnynO6p5b9r2zXJ/2VHW7CJ18kE8zbW2tAkPZXg+tQlHF/lMg8cX9N5kh6IY9Vz7FAYC4cy3Bay
- 5qoEwjxvKXd1ML7bzskO68GY9qEG2nNfXdOO/cR9lv1B5z85tJdU80pFPvac5gK0tqV6xm+PAFDS
- fHno3E3XfaVU0lhCvivqddGk82IeKZwFRR21/tDGAAAAkMtaomQu58H27TGEvO7zyPP3bUemXY34
- HoRbqzYtPH/Otk34CgPDVgnFtaa4Wq0Fw7bdP68cPp/KvEZHHt9DiHWOlk2/Xuq8lj1kmd/ndIB6
- vBCoAxdUn0n2faz5kcN8XLr+llb3HyZcdxTreSBb7M0AkFyf68ziqxqvqcU/ZUrgWB0gLn1hqMD5
- gGnUTT3xdiO5Tr85gfI8pUrDQkUWAAAAZLt2i+mOxWffoCgBAAAAAAAAAAAAdNCFh79ney7s0O/J
- dxv/v5FwHVHEHXipTrQNlPCspP09IE8Ch3Pk2N/k8DfI+JzxMhvcZxXw767l50u/nou+PgzuD8n6
- bQzuHUh/mFo93lLuAGh7AAw9u1N4BnxJHQAAAAAAAMD/5fz78BHFCwBRvE4WIGMrsgCJGjo35lGG
- aTY9Jz/359XSb/nJF6l6wjUAfm2EwuVc/DBjl0ufP5X4psZ1T017/hziux05vLVlHdEOdbbEexes
- qcpFMu1bf3sgvyab9LryjLoEADmOf5D1bs/f2q4Nj3X9uup4TzX+znWoZblJFgBAMm4Unv5c58Lv
- OR73muHncjpXQAX8ripweMivP6J+oERzyzHa5P25Y7+/9jBXiNWOQ4xfY77TdU71Dk2gGPXAHCnV
- vgoyfdquDn2UaFq/p/46OY0c3tT6qZj3B07pGU1ScZ3ius21zn7AEAQBpmuHRSbtrJT4NOV2Ldbu
- WpqfaL4AMhTqHkJD12D83DNPSO13EnWw9tYRwoH73OPjAOUxhTV0zH2fXXofZ16XfM+zczkfxbTO
- K6F+q7IsQ5M+Y1l4v3o/o3aa6z0TJfb9Y/ZpscbYZlvv24t/wpRr0MOM0nKcUVq6xrRLwUXGdyxw
- JjFXNXVK1l87EWyzTTne7821a5jys3a5f7fMvNP3MLMRruNSLgbSojymz+bYRUFtoXbME2XYxkKt
- 8UzuS6Uc235b3PVVHtSO/Urb+voHuuJoY//uxXnk/UzPvfiFeUFU1Fv3OZPLWH6Ldv7Sp5HiM2bv
- eezexSV7Rt7s1srPCsjPpUEaU8wG1zhL3C9V+joVPbEy+IILzwAAAAAAHn1DFiSrbW9hQ7YA1/7k
- 4U8AAAAAAGDC/uKCSgAAgGu/n5EHAAAgP5L3GTW9bm1/ve6lUDy7rtvjtJ9x+aI81A0bPu/n+S3X
- Tr4i5fNCfTy7/vGL173b1AMAAAAgRcfn//37nAvekuGyxjdZypvukw3d4+mrW5QRkJsp7wVrigcF
- e4ssaPUHWRCMr+es/EhWesftm4fzRs+6n8ehHL5TUR5WdVKPqMNKOO6q4zNz6gAAsOYHQH8EAAAA
- ZKTt9kpj9jqf9Jzg5GMPtW+e3veM7L+XlDXgc837PllozPS8z/21xE8r/32nqcN+VFuE3fY72+61
- uEqXmr36W2tbWKrls2pm9zuvMoybS7rGlr8KFMYnV/+eJPZjZUX3MClboTFFCR0biss5t5KX7Zd2
- jkOJ/c5nAmEeZ1gv9svJXxkuAAAAAACw4rJP0LdfdkSWZmVLFnj3wPLzue+Zrgw/t8gkHb59PeJY
- ZZAm7fk7fdZvX49j+fDF69Fz+iYAABDHP/Vm9jbZIOpfwWoh0w==
+ eNrt3LmOHEUYAOCt7p5jL89iAbZJwJY4ExKTIiJChAh4CgSvgIgREjEJEa+ARAQxEW/gR4GRbDGa
+ 7Z4+pq+q/j5p5fVMd3XVX3fPTr99wVSedjgnCBsAANyTlby2qllX9722tlYH4DqieTKG+etV3tYJ
+ t5ntiNcqWxu9NnCbW+r6yLpQnaewn1rS3pFpPVhQWVcL7Dd/GVt6m1PGHL/WDfMXGsZlqLyHmvfy
+ DjGsyvN+3V6cuG42wrhRVd5i4jiX/ZtVnFeW1309bUrS3fS4Pw8tyxlmEMsu7auqDPvzd2ekuYuo
+ 358Tw7K8b4/abh/9/66mLPmJ8uYHr4ej1/KKNJooWuYjezlXDLGOW7dob2P02zZph5o4ti3D5uj1
+ dce2t23Y9lOId9byvKqYbyrafZt87SKN60WDsaltOS5L2mE2YH76vi+ZXwyr6Tolq8ljceLcums8
+ PPj9ZiF7uGKCa75bUp/hYF5tu+Y5rN9z2n2o6Zuhh33qq+OeXTDkWB1zPykii00Y6dz3dQu45yrC
+ POeqDaDx2qju79u63q+JpYxLiHvQrhetmOi6Y63H1qq4d9dCAABAIvuFlO162DuPtQ+47Dm9mzPL
+ Hs5IZ86yRPK37vm4tn2hr2tQbxtRXleqq9QUnyH38XfadcfHsk7JRp7PhzDWvev1gG3MOiIdYeZx
+ H3qN9JUmMMvxa6n78LZ5/Tzx9pYvpN7rfDmjvNwaBmGW3hQCYIL9W5ZYfsPRz9zyH7MwwbnZAHk5
+ dY3Qc1nGui+fwv3/uwn6TlldjnF/+tEM4r27gHo+ZwYASHM/UOVJi2PfUJUAAFEIPadVjJzfqZ+9
+ EmZWl1XPmD8ltc8s85nWeWh5rO+kAgAApKvpnu+3iMsY5J2F20SY56tE62L/LLC7gdPvMp6k8B7N
+ fWEOS8aNEEDv3mp5fLHweBnrh+F5ZPMVw3difbZvPCwzt2fP3qhrQN8DAAAAAAAAAAAAAAAAAAAA
+ AAAAAAAAAAAAAKCDKyEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACIwnMhAAAAAOBInnDZLlUvwChe
+ n/j6WU/pbEfO91bTEV9m5dsEylA0PG6t/y9O273ReqZtl2auJ7puLvSdhA595R3jVec9QTZx+Yx3
+ /3tc8tqLSMvStF2tVTsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABA
+ JzcDpl0kEJ/iqTYCxCVTbmbsvZZrktVI+QoVr4Wj94MqjIplHEA87qzhk/RBx/MeNjxufeK9yxnF
+ IQyYVjHw9UhLIQQsUN5y7m3yet5xzL/tOO+PNa4PMV+dk2bX9dEzzX4xVpHPcbkqHKw9NPE80nL+
+ oK128vGM9oM3I177kwT2wFPvhT4yvDKBpuvgr/Pq82O7NxQO5pxshOuc8mvC8+x+vfRnRUxWLWI3
+ ZvsqWo7l2wbHbCZu798nOnZNsWbMRu4/S6zXPj2a8IOwsMA/KDseqz/daYMAAABz97MQzMbt0f9/
+ vBYTAAAAYL7++UUMIDVXM86bZ3s1sxWC2fBd03J/CMFg+nrGyU9C2TvPYKmPTXYw12dnxi/UnKM+
+ 7scgO6MNh4nzXvX8zlwbALBPB4xHsXkcV3Y9U3Jevmlx7Gri/nr9cl+Wvdy7le3f9ve5989leHJ0
+ 7+CqJL3v/vuZy9eYHzQ45vdb7RUAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
+ AAAAAAAAAAAAAAAAAIB2PhQCAAAAAAAAAAAAAAAAAAAAYAB/fyYGAMA4/gXQcRfM
'''
+def codepoints():
+ i = 0
+ for increment in array.array('I', zlib.decompress(
+ base64.b64decode(valid_codepoint_data))).tolist():
+ i += increment + 1
+ yield i
if sys.version_info[0] < 3:
def to_unicode(i):
return ('\\U%08x' % i).decode('unicode-escape').encode('UTF-8')
else:
def to_unicode(i):
return chr(i)
-o = sys.stdout
-o.write(to_unicode(0xFEFF))
-pack = array.array('I', zlib.decompress(
- base64.b64decode(valid_codepoint_data)))
-i = 0
-last_row = -1
-plist = pack.tolist()
-for increment in plist:
- i += increment + 1
- row = i - (i & 63)
- if last_row != row:
- if row:
- o.write('\n' if row % 1024 else '\n\n')
- o.write('U+%06x ' % row)
- last_row = row
- o.write(' ' + to_unicode(i))
-o.write('\n')
+if __name__ == '__main__':
+ o = sys.stdout
+ o.write(to_unicode(0xFEFF))
+ last_row = -1
+ for i in codepoints():
+ row = i - (i & 63)
+ if last_row != row:
+ if row:
+ o.write('\n' if row % 1024 else '\n\n')
+ o.write('U+%06x ' % row)
+ last_row = row
+ o.write(' ' + to_unicode(i))
+ o.write('\n')