merged the current state of XML Schemas implementation, it is not
* Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c
testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c
xmlunicode.c include/libxml/Makefile.am
include/libxml/schemasInternals.h include/libxml/xmlautomata.h
include/libxml/xmlregexp.h include/libxml/xmlschemas.h
include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h
include/libxml/xmlversion.h.in : merged the current state of
XML Schemas implementation, it is not configured in by default,
a specific --schemas configure option has been added.
* test/automata test/regexp test/schemas Makefile.am
result/automata result/regexp result/schemas:
merged automata/regexp/schemas regression tests
Daniel
diff --git a/genUnicode.py b/genUnicode.py
new file mode 100755
index 0000000..c5668fd
--- /dev/null
+++ b/genUnicode.py
@@ -0,0 +1,256 @@
+#!/usr/bin/python -u
+import sys
+import string
+import time
+
+sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
+
+try:
+ blocks = open("Blocks-4.txt", "r")
+except:
+ print "Missing Blocks-4.txt, aborting ..."
+ sys.exit(1)
+
+BlockNames = {}
+for line in blocks.readlines():
+ if line[0] == '#':
+ continue
+ line = string.strip(line)
+ if line == '':
+ continue
+ try:
+ fields = string.split(line, ';')
+ range = string.strip(fields[0])
+ (start, end) = string.split(range, "..")
+ name = string.strip(fields[1])
+ name = string.replace(name, ' ', '')
+ except:
+ print "Failed to process line: %s" % (line)
+ continue
+ BlockNames[name] = ("0x"+start, "0x"+end)
+blocks.close()
+print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
+
+try:
+ data = open("UnicodeData-3.1.0.txt", "r")
+except:
+ print "Missing UnicodeData-3.1.0.txt, aborting ..."
+ sys.exit(1)
+
+nbchar = 0;
+Categories = {}
+for line in data.readlines():
+ if line[0] == '#':
+ continue
+ line = string.strip(line)
+ if line == '':
+ continue
+ try:
+ fields = string.split(line, ';')
+ point = string.strip(fields[0])
+ value = 0
+ while point != '':
+ value = value * 16
+ if point[0] >= '0' and point[0] <= '9':
+ value = value + ord(point[0]) - ord('0')
+ elif point[0] >= 'A' and point[0] <= 'F':
+ value = value + 10 + ord(point[0]) - ord('A')
+ elif point[0] >= 'a' and point[0] <= 'f':
+ value = value + 10 + ord(point[0]) - ord('a')
+ point = point[1:]
+ name = fields[2]
+ except:
+ print "Failed to process line: %s" % (line)
+ continue
+
+ nbchar = nbchar + 1
+ try:
+ Categories[name].append(value)
+ except:
+ try:
+ Categories[name] = [value]
+ except:
+ print "Failed to process line: %s" % (line)
+ try:
+ Categories[name[0]].append(value)
+ except:
+ try:
+ Categories[name[0]] = [value]
+ except:
+ print "Failed to process line: %s" % (line)
+
+blocks.close()
+print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
+#reduce the number list into ranges
+for cat in Categories.keys():
+ list = Categories[cat]
+ start = -1
+ prev = -1
+ end = -1
+ ranges = []
+ for val in list:
+ if start == -1:
+ start = val
+ prev = val
+ continue
+ elif val == prev + 1:
+ prev = val
+ continue
+ elif prev == start:
+ ranges.append((prev, prev))
+ start = val
+ prev = val
+ continue
+ else:
+ ranges.append((start, prev))
+ start = val
+ prev = val
+ continue
+ if prev == start:
+ ranges.append((prev, prev))
+ else:
+ ranges.append((start, prev))
+ Categories[cat] = ranges
+
+#
+# Generate the resulting files
+#
+try:
+ header = open("xmlunicode.h", "w")
+except:
+ print "Failed to open xmlunicode.h"
+ sys.exit(1)
+
+try:
+ output = open("xmlunicode.c", "w")
+except:
+ print "Failed to open xmlunicode.c"
+ sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * xmlunicode.h: this header exports interfaces for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+""" % (date, sources));
+output.write(
+"""/*
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_UNICODE_ENABLED
+
+#include <string.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlunicode.h>
+
+""" % (date, sources));
+
+keys = BlockNames.keys()
+keys.sort()
+for block in keys:
+ (start, end) = BlockNames[block]
+ name = string.replace(block, '-', '')
+ header.write("int\txmlUCSIs%s\t(int code);\n" % name)
+ output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
+ output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
+ (block))
+ output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+ output.write("int\nxmlUCSIs%s(int code) {\n" % name)
+ output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
+ output.write("}\n\n")
+
+header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
+output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
+output.write(" * @block: UCS block name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
+output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
+keys = BlockNames.keys()
+keys.sort()
+for block in keys:
+ name = string.replace(block, '-', '')
+ output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
+ (block, name));
+output.write(" return(-1);\n}\n\n")
+
+
+keys = Categories.keys()
+keys.sort()
+for name in keys:
+ ranges = Categories[name]
+ header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
+ output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
+ output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
+ (name))
+ output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+ output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
+ start = 1
+ for range in ranges:
+ (begin, end) = range;
+ if start:
+ output.write(" return(");
+ start = 0
+ else:
+ output.write(" ||\n ");
+ if (begin == end):
+ output.write("(code == %s)" % (hex(begin)))
+ else:
+ output.write("((code >= %s) && (code <= %s))" % (
+ hex(begin), hex(end)))
+ output.write(");\n}\n\n")
+
+header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
+output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
+output.write(" * @cat: UCS Category name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
+output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
+keys = Categories.keys()
+keys.sort()
+for name in keys:
+ output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
+ (name, name));
+output.write(" return(-1);\n}\n\n")
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_UNICODE_H__ */
+""");
+output.write("""
+#endif /* LIBXML_UNICODE_ENABLED */
+""");
+header.close()
+output.close()