| """This script generates a Python codec module from a Windows Code Page. | |
| It uses the function MultiByteToWideChar to generate a decoding table. | |
| """ | |
| import ctypes | |
| from ctypes import wintypes | |
| from gencodec import codegen | |
| import unicodedata | |
| def genwinmap(codepage): | |
| MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar | |
| MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, | |
| wintypes.LPCSTR, ctypes.c_int, | |
| wintypes.LPWSTR, ctypes.c_int] | |
| MultiByteToWideChar.restype = ctypes.c_int | |
| enc2uni = {} | |
| for i in list(range(32)) + [127]: | |
| enc2uni[i] = (i, 'CONTROL CHARACTER') | |
| for i in range(256): | |
| buf = ctypes.create_unicode_buffer(2) | |
| ret = MultiByteToWideChar( | |
| codepage, 0, | |
| bytes([i]), 1, | |
| buf, 2) | |
| assert ret == 1, "invalid code page" | |
| assert buf[1] == '\x00' | |
| try: | |
| name = unicodedata.name(buf[0]) | |
| except ValueError: | |
| try: | |
| name = enc2uni[i][1] | |
| except KeyError: | |
| name = '' | |
| enc2uni[i] = (ord(buf[0]), name) | |
| return enc2uni | |
| def genwincodec(codepage): | |
| import platform | |
| map = genwinmap(codepage) | |
| encodingname = 'cp%d' % codepage | |
| code = codegen("", map, encodingname) | |
| # Replace first lines with our own docstring | |
| code = '''\ | |
| """Python Character Mapping Codec %s generated on Windows: | |
| %s with the command: | |
| python Tools/unicode/genwincodec.py %s | |
| """#" | |
| ''' % (encodingname, ' '.join(platform.win32_ver()), codepage | |
| ) + code.split('"""#"', 1)[1] | |
| print(code) | |
| if __name__ == '__main__': | |
| import sys | |
| genwincodec(int(sys.argv[1])) |