Blame - genUnicode.py - platform/external/libxml2

blob: c5668fdcf3f711c0e0cc109cb14eb83f3bb2a2c2 [file] [log] [blame]

Daniel Veillard	4255d50	2002-04-16 15:50:10 +0000	[diff] [blame]	1	#!/usr/bin/python -u
				2	import sys
				3	import string
				4	import time
				5
				6	sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
				7
				8	try:
				9	blocks = open("Blocks-4.txt", "r")
				10	except:
				11	print "Missing Blocks-4.txt, aborting ..."
				12	sys.exit(1)
				13
				14	BlockNames = {}
				15	for line in blocks.readlines():
				16	if line[0] == '#':
				17	continue
				18	line = string.strip(line)
				19	if line == '':
				20	continue
				21	try:
				22	fields = string.split(line, ';')
				23	range = string.strip(fields[0])
				24	(start, end) = string.split(range, "..")
				25	name = string.strip(fields[1])
				26	name = string.replace(name, ' ', '')
				27	except:
				28	print "Failed to process line: %s" % (line)
				29	continue
				30	BlockNames[name] = ("0x"+start, "0x"+end)
				31	blocks.close()
				32	print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
				33
				34	try:
				35	data = open("UnicodeData-3.1.0.txt", "r")
				36	except:
				37	print "Missing UnicodeData-3.1.0.txt, aborting ..."
				38	sys.exit(1)
				39
				40	nbchar = 0;
				41	Categories = {}
				42	for line in data.readlines():
				43	if line[0] == '#':
				44	continue
				45	line = string.strip(line)
				46	if line == '':
				47	continue
				48	try:
				49	fields = string.split(line, ';')
				50	point = string.strip(fields[0])
				51	value = 0
				52	while point != '':
				53	value = value * 16
				54	if point[0] >= '0' and point[0] <= '9':
				55	value = value + ord(point[0]) - ord('0')
				56	elif point[0] >= 'A' and point[0] <= 'F':
				57	value = value + 10 + ord(point[0]) - ord('A')
				58	elif point[0] >= 'a' and point[0] <= 'f':
				59	value = value + 10 + ord(point[0]) - ord('a')
				60	point = point[1:]
				61	name = fields[2]
				62	except:
				63	print "Failed to process line: %s" % (line)
				64	continue
				65
				66	nbchar = nbchar + 1
				67	try:
				68	Categories[name].append(value)
				69	except:
				70	try:
				71	Categories[name] = [value]
				72	except:
				73	print "Failed to process line: %s" % (line)
				74	try:
				75	Categories[name[0]].append(value)
				76	except:
				77	try:
				78	Categories[name[0]] = [value]
				79	except:
				80	print "Failed to process line: %s" % (line)
				81
				82	blocks.close()
				83	print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
				84	#reduce the number list into ranges
				85	for cat in Categories.keys():
				86	list = Categories[cat]
				87	start = -1
				88	prev = -1
				89	end = -1
				90	ranges = []
				91	for val in list:
				92	if start == -1:
				93	start = val
				94	prev = val
				95	continue
				96	elif val == prev + 1:
				97	prev = val
				98	continue
				99	elif prev == start:
				100	ranges.append((prev, prev))
				101	start = val
				102	prev = val
				103	continue
				104	else:
				105	ranges.append((start, prev))
				106	start = val
				107	prev = val
				108	continue
				109	if prev == start:
				110	ranges.append((prev, prev))
				111	else:
				112	ranges.append((start, prev))
				113	Categories[cat] = ranges
				114
				115	#
				116	# Generate the resulting files
				117	#
				118	try:
				119	header = open("xmlunicode.h", "w")
				120	except:
				121	print "Failed to open xmlunicode.h"
				122	sys.exit(1)
				123
				124	try:
				125	output = open("xmlunicode.c", "w")
				126	except:
				127	print "Failed to open xmlunicode.c"
				128	sys.exit(1)
				129
				130	date = time.asctime(time.localtime(time.time()))
				131
				132	header.write(
				133	"""/*
				134	* xmlunicode.h: this header exports interfaces for the Unicode character APIs
				135	*
				136	* This file is automatically generated from the
				137	* UCS description files of the Unicode Character Database
				138	* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
				139	* using the genUnicode.py Python script.
				140	*
				141	* Generation date: %s
				142	* Sources: %s
				143	* Daniel Veillard <veillard@redhat.com>
				144	*/
				145
				146	#ifndef __XML_UNICODE_H__
				147	#define __XML_UNICODE_H__
				148
				149	#ifdef __cplusplus
				150	extern "C" {
				151	#endif
				152
				153	""" % (date, sources));
				154	output.write(
				155	"""/*
				156	* xmlunicode.c: this module implements the Unicode character APIs
				157	*
				158	* This file is automatically generated from the
				159	* UCS description files of the Unicode Character Database
				160	* http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
				161	* using the genUnicode.py Python script.
				162	*
				163	* Generation date: %s
				164	* Sources: %s
				165	* Daniel Veillard <veillard@redhat.com>
				166	*/
				167
				168	#define IN_LIBXML
				169	#include "libxml.h"
				170
				171	#ifdef LIBXML_UNICODE_ENABLED
				172
				173	#include <string.h>
				174	#include <libxml/xmlversion.h>
				175	#include <libxml/xmlunicode.h>
				176
				177	""" % (date, sources));
				178
				179	keys = BlockNames.keys()
				180	keys.sort()
				181	for block in keys:
				182	(start, end) = BlockNames[block]
				183	name = string.replace(block, '-', '')
				184	header.write("int\txmlUCSIs%s\t(int code);\n" % name)
				185	output.write("/*\n xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
				186	output.write(" \n Check whether the character is part of %s UCS Block\n"%
				187	(block))
				188	output.write(" \n Returns 1 if true 0 otherwise\n */\n");
				189	output.write("int\nxmlUCSIs%s(int code) {\n" % name)
				190	output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
				191	output.write("}\n\n")
				192
				193	header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
				194	output.write("/*\n xmlUCSIsBlock:\n * @code: UCS code point\n")
				195	output.write(" * @block: UCS block name\n")
				196	output.write(" \n Check whether the caracter is part of the UCS Block\n")
				197	output.write(" \n Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
				198	output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
				199	keys = BlockNames.keys()
				200	keys.sort()
				201	for block in keys:
				202	name = string.replace(block, '-', '')
				203	output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
				204	(block, name));
				205	output.write(" return(-1);\n}\n\n")
				206
				207
				208	keys = Categories.keys()
				209	keys.sort()
				210	for name in keys:
				211	ranges = Categories[name]
				212	header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
				213	output.write("/*\n xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
				214	output.write(" \n Check whether the character is part of %s UCS Category\n"%
				215	(name))
				216	output.write(" \n Returns 1 if true 0 otherwise\n */\n");
				217	output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
				218	start = 1
				219	for range in ranges:
				220	(begin, end) = range;
				221	if start:
				222	output.write(" return(");
				223	start = 0
				224	else:
				225	output.write(" \|\|\n ");
				226	if (begin == end):
				227	output.write("(code == %s)" % (hex(begin)))
				228	else:
				229	output.write("((code >= %s) && (code <= %s))" % (
				230	hex(begin), hex(end)))
				231	output.write(");\n}\n\n")
				232
				233	header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
				234	output.write("/*\n xmlUCSIsCat:\n * @code: UCS code point\n")
				235	output.write(" * @cat: UCS Category name\n")
				236	output.write(" \n Check whether the caracter is part of the UCS Category\n")
				237	output.write(" \n Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
				238	output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
				239	keys = Categories.keys()
				240	keys.sort()
				241	for name in keys:
				242	output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
				243	(name, name));
				244	output.write(" return(-1);\n}\n\n")
				245
				246	header.write("""
				247	#ifdef __cplusplus
				248	}
				249	#endif
				250	#endif /* __XML_UNICODE_H__ */
				251	""");
				252	output.write("""
				253	#endif /* LIBXML_UNICODE_ENABLED */
				254	""");
				255	header.close()
				256	output.close()