Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 1 | #!/usr/bin/python -u |
| 2 | import sys |
| 3 | import string |
| 4 | import time |
| 5 | |
| 6 | sources = "Blocks-4.txt UnicodeData-3.1.0.txt" |
| 7 | |
| 8 | try: |
| 9 | blocks = open("Blocks-4.txt", "r") |
| 10 | except: |
| 11 | print "Missing Blocks-4.txt, aborting ..." |
| 12 | sys.exit(1) |
| 13 | |
| 14 | BlockNames = {} |
| 15 | for line in blocks.readlines(): |
| 16 | if line[0] == '#': |
| 17 | continue |
| 18 | line = string.strip(line) |
| 19 | if line == '': |
| 20 | continue |
| 21 | try: |
| 22 | fields = string.split(line, ';') |
| 23 | range = string.strip(fields[0]) |
| 24 | (start, end) = string.split(range, "..") |
| 25 | name = string.strip(fields[1]) |
| 26 | name = string.replace(name, ' ', '') |
| 27 | except: |
| 28 | print "Failed to process line: %s" % (line) |
| 29 | continue |
| 30 | BlockNames[name] = ("0x"+start, "0x"+end) |
| 31 | blocks.close() |
| 32 | print "Parsed %d blocks descriptions" % (len(BlockNames.keys())) |
| 33 | |
| 34 | try: |
| 35 | data = open("UnicodeData-3.1.0.txt", "r") |
| 36 | except: |
| 37 | print "Missing UnicodeData-3.1.0.txt, aborting ..." |
| 38 | sys.exit(1) |
| 39 | |
| 40 | nbchar = 0; |
| 41 | Categories = {} |
| 42 | for line in data.readlines(): |
| 43 | if line[0] == '#': |
| 44 | continue |
| 45 | line = string.strip(line) |
| 46 | if line == '': |
| 47 | continue |
| 48 | try: |
| 49 | fields = string.split(line, ';') |
| 50 | point = string.strip(fields[0]) |
| 51 | value = 0 |
| 52 | while point != '': |
| 53 | value = value * 16 |
| 54 | if point[0] >= '0' and point[0] <= '9': |
| 55 | value = value + ord(point[0]) - ord('0') |
| 56 | elif point[0] >= 'A' and point[0] <= 'F': |
| 57 | value = value + 10 + ord(point[0]) - ord('A') |
| 58 | elif point[0] >= 'a' and point[0] <= 'f': |
| 59 | value = value + 10 + ord(point[0]) - ord('a') |
| 60 | point = point[1:] |
| 61 | name = fields[2] |
| 62 | except: |
| 63 | print "Failed to process line: %s" % (line) |
| 64 | continue |
| 65 | |
| 66 | nbchar = nbchar + 1 |
| 67 | try: |
| 68 | Categories[name].append(value) |
| 69 | except: |
| 70 | try: |
| 71 | Categories[name] = [value] |
| 72 | except: |
| 73 | print "Failed to process line: %s" % (line) |
| 74 | try: |
| 75 | Categories[name[0]].append(value) |
| 76 | except: |
| 77 | try: |
| 78 | Categories[name[0]] = [value] |
| 79 | except: |
| 80 | print "Failed to process line: %s" % (line) |
| 81 | |
| 82 | blocks.close() |
| 83 | print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys())) |
| 84 | #reduce the number list into ranges |
| 85 | for cat in Categories.keys(): |
| 86 | list = Categories[cat] |
| 87 | start = -1 |
| 88 | prev = -1 |
| 89 | end = -1 |
| 90 | ranges = [] |
| 91 | for val in list: |
| 92 | if start == -1: |
| 93 | start = val |
| 94 | prev = val |
| 95 | continue |
| 96 | elif val == prev + 1: |
| 97 | prev = val |
| 98 | continue |
| 99 | elif prev == start: |
| 100 | ranges.append((prev, prev)) |
| 101 | start = val |
| 102 | prev = val |
| 103 | continue |
| 104 | else: |
| 105 | ranges.append((start, prev)) |
| 106 | start = val |
| 107 | prev = val |
| 108 | continue |
| 109 | if prev == start: |
| 110 | ranges.append((prev, prev)) |
| 111 | else: |
| 112 | ranges.append((start, prev)) |
| 113 | Categories[cat] = ranges |
| 114 | |
| 115 | # |
| 116 | # Generate the resulting files |
| 117 | # |
| 118 | try: |
| 119 | header = open("xmlunicode.h", "w") |
| 120 | except: |
| 121 | print "Failed to open xmlunicode.h" |
| 122 | sys.exit(1) |
| 123 | |
| 124 | try: |
| 125 | output = open("xmlunicode.c", "w") |
| 126 | except: |
| 127 | print "Failed to open xmlunicode.c" |
| 128 | sys.exit(1) |
| 129 | |
| 130 | date = time.asctime(time.localtime(time.time())) |
| 131 | |
| 132 | header.write( |
| 133 | """/* |
| 134 | * xmlunicode.h: this header exports interfaces for the Unicode character APIs |
| 135 | * |
| 136 | * This file is automatically generated from the |
| 137 | * UCS description files of the Unicode Character Database |
| 138 | * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html |
| 139 | * using the genUnicode.py Python script. |
| 140 | * |
| 141 | * Generation date: %s |
| 142 | * Sources: %s |
| 143 | * Daniel Veillard <veillard@redhat.com> |
| 144 | */ |
| 145 | |
| 146 | #ifndef __XML_UNICODE_H__ |
| 147 | #define __XML_UNICODE_H__ |
| 148 | |
Igor Zlatkovic | 76874e4 | 2003-08-25 09:05:12 +0000 | [diff] [blame] | 149 | #include <libxml/xmlversion.h> |
| 150 | |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 151 | #ifdef __cplusplus |
| 152 | extern "C" { |
| 153 | #endif |
| 154 | |
| 155 | """ % (date, sources)); |
| 156 | output.write( |
| 157 | """/* |
| 158 | * xmlunicode.c: this module implements the Unicode character APIs |
| 159 | * |
| 160 | * This file is automatically generated from the |
| 161 | * UCS description files of the Unicode Character Database |
| 162 | * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html |
| 163 | * using the genUnicode.py Python script. |
| 164 | * |
| 165 | * Generation date: %s |
| 166 | * Sources: %s |
| 167 | * Daniel Veillard <veillard@redhat.com> |
| 168 | */ |
| 169 | |
| 170 | #define IN_LIBXML |
| 171 | #include "libxml.h" |
| 172 | |
| 173 | #ifdef LIBXML_UNICODE_ENABLED |
| 174 | |
| 175 | #include <string.h> |
| 176 | #include <libxml/xmlversion.h> |
| 177 | #include <libxml/xmlunicode.h> |
| 178 | |
| 179 | """ % (date, sources)); |
| 180 | |
| 181 | keys = BlockNames.keys() |
| 182 | keys.sort() |
| 183 | for block in keys: |
| 184 | (start, end) = BlockNames[block] |
| 185 | name = string.replace(block, '-', '') |
Igor Zlatkovic | 76874e4 | 2003-08-25 09:05:12 +0000 | [diff] [blame] | 186 | header.write("XMLPUBFUN int XMLCALL xmlUCSIs%s\t(int code);\n" % name) |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 187 | output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name)) |
| 188 | output.write(" *\n * Check whether the character is part of %s UCS Block\n"% |
| 189 | (block)) |
| 190 | output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); |
| 191 | output.write("int\nxmlUCSIs%s(int code) {\n" % name) |
| 192 | output.write(" return((code >= %s) && (code <= %s));\n" % (start, end)) |
| 193 | output.write("}\n\n") |
| 194 | |
Igor Zlatkovic | 76874e4 | 2003-08-25 09:05:12 +0000 | [diff] [blame] | 195 | header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n") |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 196 | output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n") |
| 197 | output.write(" * @block: UCS block name\n") |
| 198 | output.write(" *\n * Check whether the caracter is part of the UCS Block\n") |
| 199 | output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n"); |
| 200 | output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n") |
| 201 | keys = BlockNames.keys() |
| 202 | keys.sort() |
| 203 | for block in keys: |
| 204 | name = string.replace(block, '-', '') |
| 205 | output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" % |
| 206 | (block, name)); |
| 207 | output.write(" return(-1);\n}\n\n") |
| 208 | |
| 209 | |
| 210 | keys = Categories.keys() |
| 211 | keys.sort() |
| 212 | for name in keys: |
| 213 | ranges = Categories[name] |
Igor Zlatkovic | 76874e4 | 2003-08-25 09:05:12 +0000 | [diff] [blame] | 214 | header.write("XMLPUBFUN int XMLCALL xmlUCSIsCat%s\t(int code);\n" % name) |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 215 | output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name)) |
| 216 | output.write(" *\n * Check whether the character is part of %s UCS Category\n"% |
| 217 | (name)) |
| 218 | output.write(" *\n * Returns 1 if true 0 otherwise\n */\n"); |
| 219 | output.write("int\nxmlUCSIsCat%s(int code) {\n" % name) |
| 220 | start = 1 |
| 221 | for range in ranges: |
| 222 | (begin, end) = range; |
| 223 | if start: |
| 224 | output.write(" return("); |
| 225 | start = 0 |
| 226 | else: |
| 227 | output.write(" ||\n "); |
| 228 | if (begin == end): |
| 229 | output.write("(code == %s)" % (hex(begin))) |
| 230 | else: |
| 231 | output.write("((code >= %s) && (code <= %s))" % ( |
| 232 | hex(begin), hex(end))) |
| 233 | output.write(");\n}\n\n") |
| 234 | |
Igor Zlatkovic | 76874e4 | 2003-08-25 09:05:12 +0000 | [diff] [blame] | 235 | header.write("\nXMLPUBFUN int XMLCALL xmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n") |
Daniel Veillard | 4255d50 | 2002-04-16 15:50:10 +0000 | [diff] [blame] | 236 | output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n") |
| 237 | output.write(" * @cat: UCS Category name\n") |
| 238 | output.write(" *\n * Check whether the caracter is part of the UCS Category\n") |
| 239 | output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n"); |
| 240 | output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n") |
| 241 | keys = Categories.keys() |
| 242 | keys.sort() |
| 243 | for name in keys: |
| 244 | output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" % |
| 245 | (name, name)); |
| 246 | output.write(" return(-1);\n}\n\n") |
| 247 | |
| 248 | header.write(""" |
| 249 | #ifdef __cplusplus |
| 250 | } |
| 251 | #endif |
| 252 | #endif /* __XML_UNICODE_H__ */ |
| 253 | """); |
| 254 | output.write(""" |
| 255 | #endif /* LIBXML_UNICODE_ENABLED */ |
| 256 | """); |
| 257 | header.close() |
| 258 | output.close() |