blob: 7a1ef5eda62174fc8f201fd77407ea631b8f76c1 [file] [log] [blame]
Benjamin Peterson75ad1fc2010-03-08 22:17:58 +00001"""This script generates a Python codec module from a Windows Code Page.
2
3It uses the function MultiByteToWideChar to generate a decoding table.
4"""
5
6import ctypes
7from ctypes import wintypes
8from gencodec import codegen
9import unicodedata
10
11def genwinmap(codepage):
12 MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
13 MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
14 wintypes.LPCSTR, ctypes.c_int,
15 wintypes.LPWSTR, ctypes.c_int]
16 MultiByteToWideChar.restype = ctypes.c_int
17
18 enc2uni = {}
19
20 for i in list(range(32)) + [127]:
21 enc2uni[i] = (i, 'CONTROL CHARACTER')
22
23 for i in range(256):
24 buf = ctypes.create_unicode_buffer(2)
25 ret = MultiByteToWideChar(
26 codepage, 0,
27 bytes([i]), 1,
28 buf, 2)
29 assert ret == 1, "invalid code page"
30 assert buf[1] == '\x00'
31 try:
32 name = unicodedata.name(buf[0])
33 except ValueError:
34 try:
35 name = enc2uni[i][1]
36 except KeyError:
37 name = ''
38
39 enc2uni[i] = (ord(buf[0]), name)
40
41 return enc2uni
42
43def genwincodec(codepage):
44 import platform
45 map = genwinmap(codepage)
46 encodingname = 'cp%d' % codepage
47 code = codegen("", map, encodingname)
48 # Replace first lines with our own docstring
49 code = '''\
50"""Python Character Mapping Codec %s generated on Windows:
51%s with the command:
52 python Tools/unicode/genwincodec.py %s
53"""#"
54''' % (encodingname, ' '.join(platform.win32_ver()), codepage
55 ) + code.split('"""#"', 1)[1]
56
57 print(code)
58
59if __name__ == '__main__':
60 import sys
61 genwincodec(int(sys.argv[1]))