| """This script generates a Python codec module from a Windows Code Page. |
| |
| It uses the function MultiByteToWideChar to generate a decoding table. |
| """ |
| |
| import ctypes |
| from ctypes import wintypes |
| from gencodec import codegen |
| import unicodedata |
| |
| def genwinmap(codepage): |
| MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar |
| MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, |
| wintypes.LPCSTR, ctypes.c_int, |
| wintypes.LPWSTR, ctypes.c_int] |
| MultiByteToWideChar.restype = ctypes.c_int |
| |
| enc2uni = {} |
| |
| for i in list(range(32)) + [127]: |
| enc2uni[i] = (i, 'CONTROL CHARACTER') |
| |
| for i in range(256): |
| buf = ctypes.create_unicode_buffer(2) |
| ret = MultiByteToWideChar( |
| codepage, 0, |
| bytes([i]), 1, |
| buf, 2) |
| assert ret == 1, "invalid code page" |
| assert buf[1] == '\x00' |
| try: |
| name = unicodedata.name(buf[0]) |
| except ValueError: |
| try: |
| name = enc2uni[i][1] |
| except KeyError: |
| name = '' |
| |
| enc2uni[i] = (ord(buf[0]), name) |
| |
| return enc2uni |
| |
| def genwincodec(codepage): |
| import platform |
| map = genwinmap(codepage) |
| encodingname = 'cp%d' % codepage |
| code = codegen("", map, encodingname) |
| # Replace first lines with our own docstring |
| code = '''\ |
| """Python Character Mapping Codec %s generated on Windows: |
| %s with the command: |
| python Tools/unicode/genwincodec.py %s |
| """#" |
| ''' % (encodingname, ' '.join(platform.win32_ver()), codepage |
| ) + code.split('"""#"', 1)[1] |
| |
| print(code) |
| |
| if __name__ == '__main__': |
| import sys |
| genwincodec(int(sys.argv[1])) |