merged the current state of XML Schemas implementation, it is not

* TODO_SCHEMAS testAutomata.c
 testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c
 xmlunicode.c include/libxml/
 include/libxml/schemasInternals.h include/libxml/xmlautomata.h
 include/libxml/xmlregexp.h include/libxml/xmlschemas.h
 include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h
 include/libxml/ : merged the current state of
 XML Schemas implementation, it is not configured in by default,
 a specific --schemas configure option has been added.
* test/automata test/regexp test/schemas
  result/automata result/regexp result/schemas:
  merged automata/regexp/schemas regression tests
diff --git a/ b/
new file mode 100755
index 0000000..c5668fd
--- /dev/null
+++ b/
@@ -0,0 +1,256 @@
+#!/usr/bin/python -u
+import sys
+import string
+import time
+sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
+    blocks = open("Blocks-4.txt", "r")
+    print "Missing Blocks-4.txt, aborting ..."
+    sys.exit(1)
+BlockNames = {}
+for line in blocks.readlines():
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+	continue
+    try:
+	fields = string.split(line, ';')
+	range = string.strip(fields[0])
+	(start, end) = string.split(range, "..")
+	name = string.strip(fields[1])
+	name = string.replace(name, ' ', '')
+    except:
+        print "Failed to process line: %s" % (line)
+	continue
+    BlockNames[name] = ("0x"+start, "0x"+end)
+print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
+    data = open("UnicodeData-3.1.0.txt", "r")
+    print "Missing UnicodeData-3.1.0.txt, aborting ..."
+    sys.exit(1)
+nbchar = 0;
+Categories = {}
+for line in data.readlines():
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+	continue
+    try:
+	fields = string.split(line, ';')
+	point = string.strip(fields[0])
+	value = 0
+	while point != '':
+	    value = value * 16
+	    if point[0] >= '0' and point[0] <= '9':
+	        value = value + ord(point[0]) - ord('0')
+	    elif point[0] >= 'A' and point[0] <= 'F':
+	        value = value + 10 + ord(point[0]) - ord('A')
+	    elif point[0] >= 'a' and point[0] <= 'f':
+	        value = value + 10 + ord(point[0]) - ord('a')
+	    point = point[1:]
+	name = fields[2]
+    except:
+        print "Failed to process line: %s" % (line)
+	continue
+    nbchar = nbchar + 1
+    try:
+	Categories[name].append(value)
+    except:
+        try:
+	    Categories[name] = [value]
+	except:
+	    print "Failed to process line: %s" % (line)
+    try:
+	Categories[name[0]].append(value)
+    except:
+        try:
+	    Categories[name[0]] = [value]
+	except:
+	    print "Failed to process line: %s" % (line)
+print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
+#reduce the number list into ranges
+for cat in Categories.keys():
+    list = Categories[cat]
+    start = -1
+    prev = -1
+    end = -1
+    ranges = []
+    for val in list:
+        if start == -1:
+	    start = val
+	    prev = val
+	    continue
+	elif val == prev + 1:
+	    prev = val
+	    continue
+	elif prev == start:
+	    ranges.append((prev, prev))
+	    start = val
+	    prev = val
+	    continue
+	else:
+	    ranges.append((start, prev))
+	    start = val
+	    prev = val
+	    continue
+    if prev == start:
+        ranges.append((prev, prev))
+    else:
+        ranges.append((start, prev))
+    Categories[cat] = ranges
+# Generate the resulting files
+    header = open("xmlunicode.h", "w")
+    print "Failed to open xmlunicode.h"
+    sys.exit(1)
+    output = open("xmlunicode.c", "w")
+    print "Failed to open xmlunicode.c"
+    sys.exit(1)
+date = time.asctime(time.localtime(time.time()))
+ * xmlunicode.h: this header exports interfaces for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ *
+ * using the Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <>
+ */
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+#ifdef __cplusplus
+extern "C" {
+""" % (date, sources));
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ *
+ * using the Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <>
+ */
+#define IN_LIBXML
+#include "libxml.h"
+#include <string.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlunicode.h>
+""" % (date, sources));
+keys = BlockNames.keys()
+for block in keys:
+    (start, end) = BlockNames[block]
+    name = string.replace(block, '-', '')
+    header.write("int\txmlUCSIs%s\t(int code);\n" % name)
+    output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
+    output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
+                 (block))
+    output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+    output.write("int\nxmlUCSIs%s(int code) {\n" % name)
+    output.write("    return((code >= %s) && (code <= %s));\n" % (start, end))
+    output.write("}\n\n")
+header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
+output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
+output.write(" * @block: UCS block name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
+output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
+keys = BlockNames.keys()
+for block in keys:
+    name = string.replace(block, '-', '')
+    output.write("    if (!strcmp(block, \"%s\"))\n        return(xmlUCSIs%s(code));\n" %
+                 (block, name));
+output.write("    return(-1);\n}\n\n")
+keys = Categories.keys()
+for name in keys:
+    ranges = Categories[name]
+    header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
+    output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
+    output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
+                 (name))
+    output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+    output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
+    start = 1
+    for range in ranges:
+        (begin, end) = range;
+	if start:
+	    output.write("    return(");
+	    start = 0
+	else:
+	    output.write(" ||\n           ");
+	if (begin == end):
+	    output.write("(code == %s)" % (hex(begin)))
+	else:
+	    output.write("((code >= %s) && (code <= %s))" % (
+	                 hex(begin), hex(end)))
+    output.write(");\n}\n\n")
+header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
+output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
+output.write(" * @cat: UCS Category name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
+output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
+keys = Categories.keys()
+for name in keys:
+    output.write("    if (!strcmp(cat, \"%s\"))\n        return(xmlUCSIsCat%s(code));\n" %
+                 (name, name));
+output.write("    return(-1);\n}\n\n")
+#ifdef __cplusplus
+#endif /* __XML_UNICODE_H__ */