blob: 8f32a659655d8ba0c41bc9c8d3f366e0733d434e [file] [log] [blame]
Daniel Veillard4255d502002-04-16 15:50:10 +00001#!/usr/bin/python -u
2import sys
3import string
4import time
5
6sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
7
8try:
9 blocks = open("Blocks-4.txt", "r")
10except:
11 print "Missing Blocks-4.txt, aborting ..."
12 sys.exit(1)
13
14BlockNames = {}
15for line in blocks.readlines():
16 if line[0] == '#':
17 continue
18 line = string.strip(line)
19 if line == '':
20 continue
21 try:
22 fields = string.split(line, ';')
23 range = string.strip(fields[0])
24 (start, end) = string.split(range, "..")
25 name = string.strip(fields[1])
26 name = string.replace(name, ' ', '')
27 except:
28 print "Failed to process line: %s" % (line)
29 continue
30 BlockNames[name] = ("0x"+start, "0x"+end)
31blocks.close()
32print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
33
34try:
35 data = open("UnicodeData-3.1.0.txt", "r")
36except:
37 print "Missing UnicodeData-3.1.0.txt, aborting ..."
38 sys.exit(1)
39
40nbchar = 0;
41Categories = {}
42for line in data.readlines():
43 if line[0] == '#':
44 continue
45 line = string.strip(line)
46 if line == '':
47 continue
48 try:
49 fields = string.split(line, ';')
50 point = string.strip(fields[0])
51 value = 0
52 while point != '':
53 value = value * 16
54 if point[0] >= '0' and point[0] <= '9':
55 value = value + ord(point[0]) - ord('0')
56 elif point[0] >= 'A' and point[0] <= 'F':
57 value = value + 10 + ord(point[0]) - ord('A')
58 elif point[0] >= 'a' and point[0] <= 'f':
59 value = value + 10 + ord(point[0]) - ord('a')
60 point = point[1:]
61 name = fields[2]
62 except:
63 print "Failed to process line: %s" % (line)
64 continue
65
66 nbchar = nbchar + 1
67 try:
68 Categories[name].append(value)
69 except:
70 try:
71 Categories[name] = [value]
72 except:
73 print "Failed to process line: %s" % (line)
74 try:
75 Categories[name[0]].append(value)
76 except:
77 try:
78 Categories[name[0]] = [value]
79 except:
80 print "Failed to process line: %s" % (line)
81
82blocks.close()
83print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
84#reduce the number list into ranges
85for cat in Categories.keys():
86 list = Categories[cat]
87 start = -1
88 prev = -1
89 end = -1
90 ranges = []
91 for val in list:
92 if start == -1:
93 start = val
94 prev = val
95 continue
96 elif val == prev + 1:
97 prev = val
98 continue
99 elif prev == start:
100 ranges.append((prev, prev))
101 start = val
102 prev = val
103 continue
104 else:
105 ranges.append((start, prev))
106 start = val
107 prev = val
108 continue
109 if prev == start:
110 ranges.append((prev, prev))
111 else:
112 ranges.append((start, prev))
113 Categories[cat] = ranges
114
115#
116# Generate the resulting files
117#
118try:
119 header = open("xmlunicode.h", "w")
120except:
121 print "Failed to open xmlunicode.h"
122 sys.exit(1)
123
124try:
125 output = open("xmlunicode.c", "w")
126except:
127 print "Failed to open xmlunicode.c"
128 sys.exit(1)
129
130date = time.asctime(time.localtime(time.time()))
131
132header.write(
133"""/*
134 * xmlunicode.h: this header exports interfaces for the Unicode character APIs
135 *
136 * This file is automatically generated from the
137 * UCS description files of the Unicode Character Database
138 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
139 * using the genUnicode.py Python script.
140 *
141 * Generation date: %s
142 * Sources: %s
143 * Daniel Veillard <veillard@redhat.com>
144 */
145
146#ifndef __XML_UNICODE_H__
147#define __XML_UNICODE_H__
148
Igor Zlatkovic76874e42003-08-25 09:05:12 +0000149#include <libxml/xmlversion.h>
150
Daniel Veillard4255d502002-04-16 15:50:10 +0000151#ifdef __cplusplus
152extern "C" {
153#endif
154
155""" % (date, sources));
156output.write(
157"""/*
158 * xmlunicode.c: this module implements the Unicode character APIs
159 *
160 * This file is automatically generated from the
161 * UCS description files of the Unicode Character Database
162 * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
163 * using the genUnicode.py Python script.
164 *
165 * Generation date: %s
166 * Sources: %s
167 * Daniel Veillard <veillard@redhat.com>
168 */
169
170#define IN_LIBXML
171#include "libxml.h"
172
173#ifdef LIBXML_UNICODE_ENABLED
174
175#include <string.h>
176#include <libxml/xmlversion.h>
177#include <libxml/xmlunicode.h>
178
179""" % (date, sources));
180
181keys = BlockNames.keys()
182keys.sort()
183for block in keys:
184 (start, end) = BlockNames[block]
185 name = string.replace(block, '-', '')
Igor Zlatkovic76874e42003-08-25 09:05:12 +0000186 header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name)
Daniel Veillard4255d502002-04-16 15:50:10 +0000187 output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
188 output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
189 (block))
190 output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
191 output.write("int\nxmlUCSIs%s(int code) {\n" % name)
192 output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
193 output.write("}\n\n")
194
Igor Zlatkovic76874e42003-08-25 09:05:12 +0000195header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
Daniel Veillard4255d502002-04-16 15:50:10 +0000196output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
197output.write(" * @block: UCS block name\n")
198output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
199output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
200output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
201keys = BlockNames.keys()
202keys.sort()
203for block in keys:
204 name = string.replace(block, '-', '')
205 output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
206 (block, name));
207output.write(" return(-1);\n}\n\n")
208
209
210keys = Categories.keys()
211keys.sort()
212for name in keys:
213 ranges = Categories[name]
Igor Zlatkovic76874e42003-08-25 09:05:12 +0000214 header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name)
Daniel Veillard4255d502002-04-16 15:50:10 +0000215 output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
216 output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
217 (name))
218 output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
219 output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
220 start = 1
221 for range in ranges:
222 (begin, end) = range;
223 if start:
224 output.write(" return(");
225 start = 0
226 else:
227 output.write(" ||\n ");
228 if (begin == end):
229 output.write("(code == %s)" % (hex(begin)))
230 else:
231 output.write("((code >= %s) && (code <= %s))" % (
232 hex(begin), hex(end)))
233 output.write(");\n}\n\n")
234
Igor Zlatkovic76874e42003-08-25 09:05:12 +0000235header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
Daniel Veillard4255d502002-04-16 15:50:10 +0000236output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
237output.write(" * @cat: UCS Category name\n")
238output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
239output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
240output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
241keys = Categories.keys()
242keys.sort()
243for name in keys:
244 output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
245 (name, name));
246output.write(" return(-1);\n}\n\n")
247
248header.write("""
249#ifdef __cplusplus
250}
251#endif
252#endif /* __XML_UNICODE_H__ */
253""");
254output.write("""
255#endif /* LIBXML_UNICODE_ENABLED */
256""");
257header.close()
258output.close()