Blame - Tools/unicode/makeunicodedata.py - platform/external/python/cpython3

blob: 6c29fd189a298a5000c9aaaace987793f9ff1457 [file] [log] [blame]

Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	1	#
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	2	# (re)generate unicode property and type databases
				3	#
Martin v. Löwis	b5c980b	2002-11-25 09:13:37 +0000	[diff] [blame]	4	# this script converts a unicode 3.2 database file to
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	5	# Modules/unicodedata_db.h, Modules/unicodename_db.h,
				6	# and Objects/unicodetype_db.h
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	7	#
				8	# history:
				9	# 2000-09-24 fl created (based on bits and pieces from unidb)
				10	# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	11	# 2000-09-25 fl added character type table
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	12	# 2000-09-26 fl added LINEBREAK, DECIMAL, and DIGIT flags/fields (2.0)
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	13	# 2000-11-03 fl expand first/last ranges
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	14	# 2001-01-19 fl added character name tables (2.1)
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	15	# 2001-01-21 fl added decomp compression; dynamic phrasebook threshold
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	16	# 2002-09-11 wd use string methods
				17	# 2002-10-18 mvl update to Unicode 3.2
				18	# 2002-10-22 mvl generate NFC tables
Martin v. Löwis	97225da	2002-11-24 23:05:09 +0000	[diff] [blame]	19	# 2002-11-24 mvl expand all ranges, sort names version-independently
Martin v. Löwis	b5c980b	2002-11-25 09:13:37 +0000	[diff] [blame]	20	# 2002-11-25 mvl add UNIDATA_VERSION
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	21	# 2004-05-29 perky add east asian width information
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	22	#
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	23	# written by Fredrik Lundh (fredrik@pythonware.com)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	24	#
				25
				26	import sys
				27
				28	SCRIPT = sys.argv[0]
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	29	VERSION = "2.3"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	30
Martin v. Löwis	b5c980b	2002-11-25 09:13:37 +0000	[diff] [blame]	31	# The Unicode Database
				32	UNIDATA_VERSION = "3.2.0"
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	33	UNICODE_DATA = "UnicodeData.txt"
				34	COMPOSITION_EXCLUSIONS = "CompositionExclusions.txt"
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	35	EASTASIAN_WIDTH = "EastAsianWidth.txt"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	36
				37	CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
				38	"Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm",
				39	"Lo", "Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk",
				40	"So" ]
				41
				42	BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
				43	"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
				44	"ON" ]
				45
Fredrik Lundh	0f8fad4	2000-09-25 21:01:56 +0000	[diff] [blame]	46	# note: should match definitions in Objects/unicodectype.c
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	47	ALPHA_MASK = 0x01
				48	DECIMAL_MASK = 0x02
				49	DIGIT_MASK = 0x04
				50	LOWER_MASK = 0x08
Fredrik Lundh	0f8fad4	2000-09-25 21:01:56 +0000	[diff] [blame]	51	LINEBREAK_MASK = 0x10
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	52	SPACE_MASK = 0x20
				53	TITLE_MASK = 0x40
				54	UPPER_MASK = 0x80
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	55	WIDE_MASK = 0x100
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	56
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	57	def maketables(trace=0):
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	58
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	59	print "--- Reading", UNICODE_DATA, "..."
				60
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	61	unicode = UnicodeData(UNICODE_DATA, COMPOSITION_EXCLUSIONS,
				62	EASTASIAN_WIDTH)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	63
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	64	print len(filter(None, unicode.table)), "characters"
				65
Fredrik Lundh	b2dfd73	2001-01-21 23:31:52 +0000	[diff] [blame]	66	makeunicodename(unicode, trace)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	67	makeunicodedata(unicode, trace)
Fredrik Lundh	b2dfd73	2001-01-21 23:31:52 +0000	[diff] [blame]	68	makeunicodetype(unicode, trace)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	69
				70	# --------------------------------------------------------------------
				71	# unicode character properties
				72
				73	def makeunicodedata(unicode, trace):
				74
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	75	dummy = (0, 0, 0, 0)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	76	table = [dummy]
				77	cache = {0: dummy}
				78	index = [0] * len(unicode.chars)
				79
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	80	FILE = "Modules/unicodedata_db.h"
				81
				82	print "--- Preparing", FILE, "..."
				83
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	84	# 1) database properties
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	85
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	86	for char in unicode.chars:
				87	record = unicode.table[char]
				88	if record:
				89	# extract database properties
				90	category = CATEGORY_NAMES.index(record[2])
				91	combining = int(record[3])
				92	bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
				93	mirrored = record[9] == "Y"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	94	item = (
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	95	category, combining, bidirectional, mirrored
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	96	)
				97	# add entry to index and item tables
				98	i = cache.get(item)
				99	if i is None:
				100	cache[item] = i = len(table)
				101	table.append(item)
				102	index[char] = i
				103
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	104	# 2) decomposition data
				105
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	106	decomp_data = [0]
				107	decomp_prefix = [""]
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	108	decomp_index = [0] * len(unicode.chars)
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	109	decomp_size = 0
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	110
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	111	comp_pairs = []
				112	comp_first = [None] * len(unicode.chars)
				113	comp_last = [None] * len(unicode.chars)
				114
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	115	for char in unicode.chars:
				116	record = unicode.table[char]
				117	if record:
				118	if record[5]:
Walter Dörwald	aaab30e	2002-09-11 20:36:02 +0000	[diff] [blame]	119	decomp = record[5].split()
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	120	# prefix
				121	if decomp[0][0] == "<":
				122	prefix = decomp.pop(0)
				123	else:
				124	prefix = ""
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	125	try:
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	126	i = decomp_prefix.index(prefix)
				127	except ValueError:
				128	i = len(decomp_prefix)
				129	decomp_prefix.append(prefix)
				130	prefix = i
				131	assert prefix < 256
				132	# content
				133	decomp = [prefix + (len(decomp)<<8)] +\
				134	map(lambda s: int(s, 16), decomp)
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	135	# Collect NFC pairs
				136	if not prefix and len(decomp) == 3 and \
				137	char not in unicode.exclusions and \
				138	unicode.table[decomp[1]][3] == "0":
				139	p, l, r = decomp
				140	comp_first[l] = 1
				141	comp_last[r] = 1
				142	comp_pairs.append((l,r,char))
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	143	try:
				144	i = decomp_data.index(decomp)
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	145	except ValueError:
				146	i = len(decomp_data)
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	147	decomp_data.extend(decomp)
				148	decomp_size = decomp_size + len(decomp) * 2
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	149	else:
				150	i = 0
				151	decomp_index[char] = i
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	152
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	153	f = l = 0
				154	comp_first_ranges = []
				155	comp_last_ranges = []
				156	prev_f = prev_l = None
				157	for i in unicode.chars:
				158	if comp_first[i] is not None:
				159	comp_first[i] = f
				160	f += 1
				161	if prev_f is None:
				162	prev_f = (i,i)
				163	elif prev_f[1]+1 == i:
				164	prev_f = prev_f[0],i
				165	else:
				166	comp_first_ranges.append(prev_f)
				167	prev_f = (i,i)
				168	if comp_last[i] is not None:
				169	comp_last[i] = l
				170	l += 1
				171	if prev_l is None:
				172	prev_l = (i,i)
				173	elif prev_l[1]+1 == i:
				174	prev_l = prev_l[0],i
				175	else:
				176	comp_last_ranges.append(prev_l)
				177	prev_l = (i,i)
				178	comp_first_ranges.append(prev_f)
				179	comp_last_ranges.append(prev_l)
				180	total_first = f
				181	total_last = l
				182
				183	comp_data = [0](total_firsttotal_last)
				184	for f,l,char in comp_pairs:
				185	f = comp_first[f]
				186	l = comp_last[l]
				187	comp_data[f*total_last+l] = char
				188
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	189	print len(table), "unique properties"
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	190	print len(decomp_prefix), "unique decomposition prefixes"
				191	print len(decomp_data), "unique decomposition entries:",
				192	print decomp_size, "bytes"
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	193	print total_first, "first characters in NFC"
				194	print total_last, "last characters in NFC"
				195	print len(comp_pairs), "NFC pairs"
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	196
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	197	print "--- Writing", FILE, "..."
				198
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	199	fp = open(FILE, "w")
				200	print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
				201	print >>fp
Martin v. Löwis	b5c980b	2002-11-25 09:13:37 +0000	[diff] [blame]	202	print >>fp, '#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	203	print >>fp, "/* a list of unique database records */"
				204	print >>fp, \
				205	"const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	206	for item in table:
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	207	print >>fp, " {%d, %d, %d, %d}," % item
				208	print >>fp, "};"
				209	print >>fp
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	210
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	211	print >>fp, "/* Reindexing of NFC first characters. */"
				212	print >>fp, "#define TOTAL_FIRST",total_first
				213	print >>fp, "#define TOTAL_LAST",total_last
				214	print >>fp, "struct reindex{int start;short count,index;};"
				215	print >>fp, "struct reindex nfc_first[] = {"
				216	for start,end in comp_first_ranges:
				217	print >>fp," { %d, %d, %d}," % (start,end-start,comp_first[start])
				218	print >>fp," {0,0,0}"
				219	print >>fp,"};\n"
				220	print >>fp, "struct reindex nfc_last[] = {"
				221	for start,end in comp_last_ranges:
				222	print >>fp," { %d, %d, %d}," % (start,end-start,comp_last[start])
				223	print >>fp," {0,0,0}"
				224	print >>fp,"};\n"
				225
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	226	# FIXME: <fl> the following tables could be made static, and
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	227	# the support code moved into unicodedatabase.c
				228
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	229	print >>fp, "/* string literals */"
				230	print >>fp, "const char *_PyUnicode_CategoryNames[] = {"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	231	for name in CATEGORY_NAMES:
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	232	print >>fp, " \"%s\"," % name
				233	print >>fp, " NULL"
				234	print >>fp, "};"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	235
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	236	print >>fp, "const char *_PyUnicode_BidirectionalNames[] = {"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	237	for name in BIDIRECTIONAL_NAMES:
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	238	print >>fp, " \"%s\"," % name
				239	print >>fp, " NULL"
				240	print >>fp, "};"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	241
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	242	print >>fp, "static const char *decomp_prefix[] = {"
				243	for name in decomp_prefix:
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	244	print >>fp, " \"%s\"," % name
				245	print >>fp, " NULL"
				246	print >>fp, "};"
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	247
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	248	# split record index table
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	249	index1, index2, shift = splitbins(index, trace)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	250
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	251	print >>fp, "/* index tables for the database records */"
				252	print >>fp, "#define SHIFT", shift
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	253	Array("index1", index1).dump(fp, trace)
				254	Array("index2", index2).dump(fp, trace)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	255
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	256	# split decomposition index table
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	257	index1, index2, shift = splitbins(decomp_index, trace)
Fredrik Lundh	cfcea49	2000-09-25 08:07:06 +0000	[diff] [blame]	258
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	259	print >>fp, "/* decomposition data */"
				260	Array("decomp_data", decomp_data).dump(fp, trace)
				261
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	262	print >>fp, "/* index tables for the decomposition data */"
				263	print >>fp, "#define DECOMP_SHIFT", shift
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	264	Array("decomp_index1", index1).dump(fp, trace)
				265	Array("decomp_index2", index2).dump(fp, trace)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	266
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	267	index, index2, shift = splitbins(comp_data, trace)
				268	print >>fp, "/* NFC pairs */"
				269	print >>fp, "#define COMP_SHIFT", shift
				270	Array("comp_index", index).dump(fp, trace)
				271	Array("comp_data", index2).dump(fp, trace)
				272
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	273	fp.close()
				274
				275	# --------------------------------------------------------------------
				276	# unicode character type tables
				277
				278	def makeunicodetype(unicode, trace):
				279
				280	FILE = "Objects/unicodetype_db.h"
				281
				282	print "--- Preparing", FILE, "..."
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	283
				284	# extract unicode types
Fredrik Lundh	0f8fad4	2000-09-25 21:01:56 +0000	[diff] [blame]	285	dummy = (0, 0, 0, 0, 0, 0)
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	286	table = [dummy]
				287	cache = {0: dummy}
				288	index = [0] * len(unicode.chars)
				289
				290	for char in unicode.chars:
				291	record = unicode.table[char]
				292	if record:
				293	# extract database properties
				294	category = record[2]
				295	bidirectional = record[4]
				296	flags = 0
				297	if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]:
				298	flags \|= ALPHA_MASK
				299	if category == "Ll":
				300	flags \|= LOWER_MASK
Fredrik Lundh	0f8fad4	2000-09-25 21:01:56 +0000	[diff] [blame]	301	if category == "Zl" or bidirectional == "B":
				302	flags \|= LINEBREAK_MASK
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	303	if category == "Zs" or bidirectional in ("WS", "B", "S"):
				304	flags \|= SPACE_MASK
Fredrik Lundh	375732c	2000-09-25 23:03:34 +0000	[diff] [blame]	305	if category == "Lt":
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	306	flags \|= TITLE_MASK
				307	if category == "Lu":
				308	flags \|= UPPER_MASK
				309	# use delta predictor for upper/lower/title
				310	if record[12]:
Martin v. Löwis	99ac328	2002-10-18 17:34:18 +0000	[diff] [blame]	311	upper = int(record[12], 16) - char
				312	assert -32768 <= upper <= 32767
				313	upper = upper & 0xffff
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	314	else:
				315	upper = 0
				316	if record[13]:
Martin v. Löwis	99ac328	2002-10-18 17:34:18 +0000	[diff] [blame]	317	lower = int(record[13], 16) - char
				318	assert -32768 <= lower <= 32767
				319	lower = lower & 0xffff
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	320	else:
				321	lower = 0
				322	if record[14]:
Martin v. Löwis	99ac328	2002-10-18 17:34:18 +0000	[diff] [blame]	323	title = int(record[14], 16) - char
				324	assert -32768 <= lower <= 32767
				325	title = title & 0xffff
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	326	else:
				327	title = 0
Fredrik Lundh	0f8fad4	2000-09-25 21:01:56 +0000	[diff] [blame]	328	# decimal digit, integer digit
				329	decimal = 0
				330	if record[6]:
				331	flags \|= DECIMAL_MASK
				332	decimal = int(record[6])
				333	digit = 0
				334	if record[7]:
				335	flags \|= DIGIT_MASK
				336	digit = int(record[7])
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	337	if record[15] in ('W', 'F'): # Wide or Full width
				338	flags \|= WIDE_MASK
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	339	item = (
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	340	upper, lower, title, decimal, digit, flags
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	341	)
				342	# add entry to index and item tables
				343	i = cache.get(item)
				344	if i is None:
				345	cache[item] = i = len(table)
				346	table.append(item)
				347	index[char] = i
				348
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	349	print len(table), "unique character type entries"
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	350
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	351	print "--- Writing", FILE, "..."
				352
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	353	fp = open(FILE, "w")
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	354	print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
				355	print >>fp
				356	print >>fp, "/* a list of unique character type descriptors */"
				357	print >>fp, "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	358	for item in table:
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	359	print >>fp, " {%d, %d, %d, %d, %d, %d}," % item
				360	print >>fp, "};"
				361	print >>fp
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	362
				363	# split decomposition index table
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	364	index1, index2, shift = splitbins(index, trace)
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	365
Fred Drake	9c68505	2000-10-26 03:56:46 +0000	[diff] [blame]	366	print >>fp, "/* type indexes */"
				367	print >>fp, "#define SHIFT", shift
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	368	Array("index1", index1).dump(fp, trace)
				369	Array("index2", index2).dump(fp, trace)
Fredrik Lundh	e9133f7	2000-09-25 17:59:57 +0000	[diff] [blame]	370
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	371	fp.close()
				372
				373	# --------------------------------------------------------------------
				374	# unicode name database
				375
				376	def makeunicodename(unicode, trace):
				377
				378	FILE = "Modules/unicodename_db.h"
				379
				380	print "--- Preparing", FILE, "..."
				381
				382	# collect names
				383	names = [None] * len(unicode.chars)
				384
				385	for char in unicode.chars:
				386	record = unicode.table[char]
				387	if record:
				388	name = record[1].strip()
				389	if name and name[0] != "<":
				390	names[char] = name + chr(0)
				391
				392	print len(filter(lambda n: n is not None, names)), "distinct names"
				393
				394	# collect unique words from names (note that we differ between
				395	# words inside a sentence, and words ending a sentence. the
				396	# latter includes the trailing null byte.
				397
				398	words = {}
				399	n = b = 0
				400	for char in unicode.chars:
				401	name = names[char]
				402	if name:
				403	w = name.split()
				404	b = b + len(name)
				405	n = n + len(w)
				406	for w in w:
				407	l = words.get(w)
				408	if l:
				409	l.append(None)
				410	else:
				411	words[w] = [len(words)]
				412
				413	print n, "words in text;", b, "bytes"
				414
				415	wordlist = words.items()
				416
Martin v. Löwis	97225da	2002-11-24 23:05:09 +0000	[diff] [blame]	417	# sort on falling frequency, then by name
				418	def cmpwords((aword, alist),(bword, blist)):
				419	r = -cmp(len(alist),len(blist))
				420	if r:
				421	return r
				422	return cmp(aword, bword)
				423	wordlist.sort(cmpwords)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	424
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	425	# figure out how many phrasebook escapes we need
				426	escapes = 0
				427	while escapes * 256 < len(wordlist):
				428	escapes = escapes + 1
				429	print escapes, "escapes"
				430
				431	short = 256 - escapes
				432
				433	assert short > 0
				434
				435	print short, "short indexes in lexicon"
				436
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	437	# statistics
				438	n = 0
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	439	for i in range(short):
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	440	n = n + len(wordlist[i][1])
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	441	print n, "short indexes in phrasebook"
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	442
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	443	# pick the most commonly used words, and sort the rest on falling
				444	# length (to maximize overlap)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	445
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	446	wordlist, wordtail = wordlist[:short], wordlist[short:]
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	447	wordtail.sort(lambda a, b: len(b[0])-len(a[0]))
				448	wordlist.extend(wordtail)
				449
				450	# generate lexicon from words
				451
				452	lexicon_offset = [0]
				453	lexicon = ""
				454	words = {}
				455
				456	# build a lexicon string
				457	offset = 0
				458	for w, x in wordlist:
				459	# encoding: bit 7 indicates last character in word (chr(128)
				460	# indicates the last character in an entire string)
				461	ww = w[:-1] + chr(ord(w[-1])+128)
				462	# reuse string tails, when possible
Walter Dörwald	aaab30e	2002-09-11 20:36:02 +0000	[diff] [blame]	463	o = lexicon.find(ww)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	464	if o < 0:
				465	o = offset
				466	lexicon = lexicon + ww
				467	offset = offset + len(w)
				468	words[w] = len(lexicon_offset)
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	469	lexicon_offset.append(o)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	470
				471	lexicon = map(ord, lexicon)
				472
				473	# generate phrasebook from names and lexicon
				474	phrasebook = [0]
				475	phrasebook_offset = [0] * len(unicode.chars)
				476	for char in unicode.chars:
				477	name = names[char]
				478	if name:
				479	w = name.split()
				480	phrasebook_offset[char] = len(phrasebook)
				481	for w in w:
				482	i = words[w]
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	483	if i < short:
				484	phrasebook.append(i)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	485	else:
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	486	# store as two bytes
				487	phrasebook.append((i>>8) + short)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	488	phrasebook.append(i&255)
				489
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	490	assert getsize(phrasebook) == 1
				491
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	492	#
				493	# unicode name hash table
				494
				495	# extract names
				496	data = []
				497	for char in unicode.chars:
				498	record = unicode.table[char]
				499	if record:
				500	name = record[1].strip()
				501	if name and name[0] != "<":
				502	data.append((name, char))
				503
				504	# the magic number 47 was chosen to minimize the number of
				505	# collisions on the current data set. if you like, change it
				506	# and see what happens...
				507
				508	codehash = Hash("code", data, 47)
				509
				510	print "--- Writing", FILE, "..."
				511
				512	fp = open(FILE, "w")
				513	print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
				514	print >>fp
				515	print >>fp, "#define NAME_MAXLEN", 256
				516	print >>fp
				517	print >>fp, "/* lexicon */"
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	518	Array("lexicon", lexicon).dump(fp, trace)
				519	Array("lexicon_offset", lexicon_offset).dump(fp, trace)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	520
				521	# split decomposition index table
				522	offset1, offset2, shift = splitbins(phrasebook_offset, trace)
				523
				524	print >>fp, "/* code->name phrasebook */"
				525	print >>fp, "#define phrasebook_shift", shift
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	526	print >>fp, "#define phrasebook_short", short
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	527
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	528	Array("phrasebook", phrasebook).dump(fp, trace)
				529	Array("phrasebook_offset1", offset1).dump(fp, trace)
				530	Array("phrasebook_offset2", offset2).dump(fp, trace)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	531
				532	print >>fp, "/* name->code dictionary */"
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	533	codehash.dump(fp, trace)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	534
				535	fp.close()
				536
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	537	# --------------------------------------------------------------------
				538	# the following support code is taken from the unidb utilities
				539	# Copyright (c) 1999-2000 by Secret Labs AB
				540
				541	# load a unicode-data file from disk
				542
Walter Dörwald	aaab30e	2002-09-11 20:36:02 +0000	[diff] [blame]	543	import sys
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	544
				545	class UnicodeData:
				546
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	547	def __init__(self, filename, exclusions, eastasianwidth, expand=1):
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	548	file = open(filename)
Martin v. Löwis	9def6a3	2002-10-18 16:11:54 +0000	[diff] [blame]	549	table = [None] * 0x110000
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	550	while 1:
				551	s = file.readline()
				552	if not s:
				553	break
Walter Dörwald	aaab30e	2002-09-11 20:36:02 +0000	[diff] [blame]	554	s = s.strip().split(";")
				555	char = int(s[0], 16)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	556	table[char] = s
				557
Martin v. Löwis	97225da	2002-11-24 23:05:09 +0000	[diff] [blame]	558	# expand first-last ranges
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	559	if expand:
				560	field = None
Martin v. Löwis	97225da	2002-11-24 23:05:09 +0000	[diff] [blame]	561	for i in range(0, 0x110000):
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	562	s = table[i]
				563	if s:
				564	if s[1][-6:] == "First>":
				565	s[1] = ""
				566	field = s[:]
				567	elif s[1][-5:] == "Last>":
				568	s[1] = ""
				569	field = None
				570	elif field:
				571	field[0] = hex(i)
				572	table[i] = field
				573
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	574	# public attributes
				575	self.filename = filename
				576	self.table = table
Martin v. Löwis	9def6a3	2002-10-18 16:11:54 +0000	[diff] [blame]	577	self.chars = range(0x110000) # unicode 3.2
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	578
Martin v. Löwis	677bde2	2002-11-23 22:08:15 +0000	[diff] [blame]	579	file = open(exclusions)
				580	self.exclusions = {}
				581	for s in file:
				582	s = s.strip()
				583	if not s:
				584	continue
				585	if s[0] == '#':
				586	continue
				587	char = int(s.split()[0],16)
				588	self.exclusions[char] = 1
				589
Hye-Shik Chang	974ed7c	2004-06-02 16:49:17 +0000	[diff] [blame]	590	widths = [None] * 0x110000
				591	for s in open(eastasianwidth):
				592	s = s.strip()
				593	if not s:
				594	continue
				595	if s[0] == '#':
				596	continue
				597	s = s.split()[0].split(';')
				598	if '..' in s[0]:
				599	first, last = [int(c, 16) for c in s[0].split('..')]
				600	chars = range(first, last+1)
				601	else:
				602	chars = [int(s[0], 16)]
				603	for char in chars:
				604	widths[char] = s[1]
				605	for i in range(0, 0x110000):
				606	if table[i] is not None:
				607	table[i].append(widths[i])
				608
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	609	def uselatin1(self):
				610	# restrict character range to ISO Latin 1
				611	self.chars = range(256)
				612
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	613	# hash table tools
				614
				615	# this is a straight-forward reimplementation of Python's built-in
				616	# dictionary type, using a static data structure, and a custom string
				617	# hash algorithm.
				618
				619	def myhash(s, magic):
				620	h = 0
Walter Dörwald	aaab30e	2002-09-11 20:36:02 +0000	[diff] [blame]	621	for c in map(ord, s.upper()):
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	622	h = (h * magic) + c
Martin v. Löwis	97225da	2002-11-24 23:05:09 +0000	[diff] [blame]	623	ix = h & 0xff000000L
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	624	if ix:
				625	h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
				626	return h
				627
				628	SIZES = [
				629	(4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17),
				630	(1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3),
				631	(65536,45), (131072,9), (262144,39), (524288,39), (1048576,9),
				632	(2097152,5), (4194304,3), (8388608,33), (16777216,27)
				633	]
				634
				635	class Hash:
				636	def __init__(self, name, data, magic):
				637	# turn a (key, value) list into a static hash table structure
				638
				639	# determine table size
				640	for size, poly in SIZES:
				641	if size > len(data):
				642	poly = size + poly
				643	break
				644	else:
				645	raise AssertionError, "ran out of polynominals"
				646
				647	print size, "slots in hash table"
				648
				649	table = [None] * size
				650
				651	mask = size-1
				652
				653	n = 0
				654
				655	hash = myhash
				656
				657	# initialize hash table
				658	for key, value in data:
				659	h = hash(key, magic)
				660	i = (~h) & mask
				661	v = table[i]
				662	if v is None:
				663	table[i] = value
				664	continue
				665	incr = (h ^ (h >> 3)) & mask;
				666	if not incr:
				667	incr = mask
				668	while 1:
				669	n = n + 1
				670	i = (i + incr) & mask
				671	v = table[i]
				672	if v is None:
				673	table[i] = value
				674	break
				675	incr = incr << 1
				676	if incr > mask:
				677	incr = incr ^ poly
				678
				679	print n, "collisions"
				680	self.collisions = n
				681
				682	for i in range(len(table)):
				683	if table[i] is None:
				684	table[i] = 0
				685
				686	self.data = Array(name + "_hash", table)
				687	self.magic = magic
				688	self.name = name
				689	self.size = size
				690	self.poly = poly
				691
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	692	def dump(self, file, trace):
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	693	# write data to file, as a C array
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	694	self.data.dump(file, trace)
Fredrik Lundh	9e9bcda	2001-01-21 17:01:31 +0000	[diff] [blame]	695	file.write("#define %s_magic %d\n" % (self.name, self.magic))
				696	file.write("#define %s_size %d\n" % (self.name, self.size))
				697	file.write("#define %s_poly %d\n" % (self.name, self.poly))
				698
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	699	# stuff to deal with arrays of unsigned integers
				700
				701	class Array:
				702
				703	def __init__(self, name, data):
				704	self.name = name
				705	self.data = data
				706
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	707	def dump(self, file, trace=0):
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	708	# write data to file, as a C array
				709	size = getsize(self.data)
Fredrik Lundh	7b7dd10	2001-01-21 22:41:08 +0000	[diff] [blame]	710	if trace:
				711	print >>sys.stderr, self.name+":", size*len(self.data), "bytes"
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	712	file.write("static ")
				713	if size == 1:
				714	file.write("unsigned char")
				715	elif size == 2:
				716	file.write("unsigned short")
				717	else:
				718	file.write("unsigned int")
				719	file.write(" " + self.name + "[] = {\n")
				720	if self.data:
				721	s = " "
				722	for item in self.data:
				723	i = str(item) + ", "
				724	if len(s) + len(i) > 78:
				725	file.write(s + "\n")
				726	s = " " + i
				727	else:
				728	s = s + i
Walter Dörwald	aaab30e	2002-09-11 20:36:02 +0000	[diff] [blame]	729	if s.strip():
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	730	file.write(s + "\n")
				731	file.write("};\n\n")
				732
				733	def getsize(data):
				734	# return smallest possible integer size for the given array
				735	maxdata = max(data)
				736	if maxdata < 256:
				737	return 1
				738	elif maxdata < 65536:
				739	return 2
				740	else:
				741	return 4
				742
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	743	def splitbins(t, trace=0):
				744	"""t, trace=0 -> (t1, t2, shift). Split a table to save space.
				745
				746	t is a sequence of ints. This function can be useful to save space if
				747	many of the ints are the same. t1 and t2 are lists of ints, and shift
				748	is an int, chosen to minimize the combined size of t1 and t2 (in C
				749	code), and where for each i in range(len(t)),
				750	t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
				751	where mask is a bitmask isolating the last "shift" bits.
				752
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	753	If optional arg trace is non-zero (default zero), progress info
				754	is printed to sys.stderr. The higher the value, the more info
				755	you'll get.
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	756	"""
				757
				758	import sys
				759	if trace:
				760	def dump(t1, t2, shift, bytes):
				761	print >>sys.stderr, "%d+%d bins at shift %d; %d bytes" % (
				762	len(t1), len(t2), shift, bytes)
				763	print >>sys.stderr, "Size of original table:", len(t)*getsize(t), \
				764	"bytes"
				765	n = len(t)-1 # last valid index
				766	maxshift = 0 # the most we can shift n and still have something left
				767	if n > 0:
				768	while n >> 1:
				769	n >>= 1
				770	maxshift += 1
				771	del n
				772	bytes = sys.maxint # smallest total size so far
				773	t = tuple(t) # so slices can be dict keys
				774	for shift in range(maxshift + 1):
				775	t1 = []
				776	t2 = []
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	777	size = 2**shift
				778	bincache = {}
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	779	for i in range(0, len(t), size):
				780	bin = t[i:i+size]
				781	index = bincache.get(bin)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	782	if index is None:
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	783	index = len(t2)
				784	bincache[bin] = index
				785	t2.extend(bin)
				786	t1.append(index >> shift)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	787	# determine memory size
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	788	b = len(t1)getsize(t1) + len(t2)getsize(t2)
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	789	if trace > 1:
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	790	dump(t1, t2, shift, b)
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	791	if b < bytes:
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	792	best = t1, t2, shift
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	793	bytes = b
Tim Peters	2101348	2000-09-25 07:13:41 +0000	[diff] [blame]	794	t1, t2, shift = best
				795	if trace:
				796	print >>sys.stderr, "Best:",
				797	dump(t1, t2, shift, bytes)
				798	if __debug__:
				799	# exhaustively verify that the decomposition is correct
				800	mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
				801	for i in xrange(len(t)):
				802	assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
				803	return best
Fredrik Lundh	f367cac	2000-09-24 23:18:31 +0000	[diff] [blame]	804
				805	if __name__ == "__main__":
Fredrik Lundh	fad27ae	2000-11-03 20:24:15 +0000	[diff] [blame]	806	maketables(1)