Amaury Forgeot d'Arc | 8b84ea0 | 2009-07-13 20:38:21 +0000 | [diff] [blame] | 1 | """This script generates a Python codec module from a Windows Code Page.
|
| 2 |
|
| 3 | It uses the function MultiByteToWideChar to generate a decoding table.
|
| 4 | """
|
| 5 |
|
| 6 | import ctypes
|
| 7 | from ctypes import wintypes
|
| 8 | from gencodec import codegen
|
| 9 | import unicodedata
|
| 10 |
|
| 11 | def genwinmap(codepage):
|
| 12 | MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
|
| 13 | MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
|
| 14 | wintypes.LPCSTR, ctypes.c_int,
|
| 15 | wintypes.LPWSTR, ctypes.c_int]
|
| 16 | MultiByteToWideChar.restype = ctypes.c_int
|
| 17 |
|
| 18 | enc2uni = {}
|
| 19 |
|
| 20 | for i in list(range(32)) + [127]:
|
| 21 | enc2uni[i] = (i, 'CONTROL CHARACTER')
|
| 22 |
|
| 23 | for i in range(256):
|
| 24 | buf = ctypes.create_unicode_buffer(2)
|
| 25 | ret = MultiByteToWideChar(
|
| 26 | codepage, 0,
|
Amaury Forgeot d'Arc | d884086 | 2009-07-13 20:48:07 +0000 | [diff] [blame] | 27 | bytes([i]), 1,
|
Amaury Forgeot d'Arc | 8b84ea0 | 2009-07-13 20:38:21 +0000 | [diff] [blame] | 28 | buf, 2)
|
| 29 | assert ret == 1, "invalid code page"
|
| 30 | assert buf[1] == '\x00'
|
| 31 | try:
|
| 32 | name = unicodedata.name(buf[0])
|
| 33 | except ValueError:
|
| 34 | try:
|
| 35 | name = enc2uni[i][1]
|
| 36 | except KeyError:
|
| 37 | name = ''
|
| 38 |
|
| 39 | enc2uni[i] = (ord(buf[0]), name)
|
| 40 |
|
| 41 | return enc2uni
|
| 42 |
|
| 43 | def genwincodec(codepage):
|
| 44 | import platform
|
| 45 | map = genwinmap(codepage)
|
| 46 | encodingname = 'cp%d' % codepage
|
| 47 | code = codegen("", map, encodingname)
|
| 48 | # Replace first lines with our own docstring
|
| 49 | code = '''\
|
| 50 | """Python Character Mapping Codec %s generated on Windows:
|
| 51 | %s with the command:
|
| 52 | python Tools/unicode/genwincodec.py %s
|
| 53 | """#"
|
| 54 | ''' % (encodingname, ' '.join(platform.win32_ver()), codepage
|
| 55 | ) + code.split('"""#"', 1)[1]
|
| 56 |
|
| 57 | print(code)
|
| 58 |
|
| 59 | if __name__ == '__main__':
|
| 60 | import sys
|
| 61 | genwincodec(int(sys.argv[1]))
|