Benjamin Peterson | 75ad1fc | 2010-03-08 22:17:58 +0000 | [diff] [blame] | 1 | """This script generates a Python codec module from a Windows Code Page. |
| 2 | |
| 3 | It uses the function MultiByteToWideChar to generate a decoding table. |
| 4 | """ |
| 5 | |
| 6 | import ctypes |
| 7 | from ctypes import wintypes |
| 8 | from gencodec import codegen |
| 9 | import unicodedata |
| 10 | |
| 11 | def genwinmap(codepage): |
| 12 | MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar |
| 13 | MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, |
| 14 | wintypes.LPCSTR, ctypes.c_int, |
| 15 | wintypes.LPWSTR, ctypes.c_int] |
| 16 | MultiByteToWideChar.restype = ctypes.c_int |
| 17 | |
| 18 | enc2uni = {} |
| 19 | |
| 20 | for i in list(range(32)) + [127]: |
| 21 | enc2uni[i] = (i, 'CONTROL CHARACTER') |
| 22 | |
| 23 | for i in range(256): |
| 24 | buf = ctypes.create_unicode_buffer(2) |
| 25 | ret = MultiByteToWideChar( |
| 26 | codepage, 0, |
| 27 | bytes([i]), 1, |
| 28 | buf, 2) |
| 29 | assert ret == 1, "invalid code page" |
| 30 | assert buf[1] == '\x00' |
| 31 | try: |
| 32 | name = unicodedata.name(buf[0]) |
| 33 | except ValueError: |
| 34 | try: |
| 35 | name = enc2uni[i][1] |
| 36 | except KeyError: |
| 37 | name = '' |
| 38 | |
| 39 | enc2uni[i] = (ord(buf[0]), name) |
| 40 | |
| 41 | return enc2uni |
| 42 | |
| 43 | def genwincodec(codepage): |
| 44 | import platform |
| 45 | map = genwinmap(codepage) |
| 46 | encodingname = 'cp%d' % codepage |
| 47 | code = codegen("", map, encodingname) |
| 48 | # Replace first lines with our own docstring |
| 49 | code = '''\ |
| 50 | """Python Character Mapping Codec %s generated on Windows: |
| 51 | %s with the command: |
| 52 | python Tools/unicode/genwincodec.py %s |
| 53 | """#" |
| 54 | ''' % (encodingname, ' '.join(platform.win32_ver()), codepage |
| 55 | ) + code.split('"""#"', 1)[1] |
| 56 | |
| 57 | print(code) |
| 58 | |
| 59 | if __name__ == '__main__': |
| 60 | import sys |
| 61 | genwincodec(int(sys.argv[1])) |