merged the current state of XML Schemas implementation, it is not
* Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c
testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c
xmlunicode.c include/libxml/Makefile.am
include/libxml/schemasInternals.h include/libxml/xmlautomata.h
include/libxml/xmlregexp.h include/libxml/xmlschemas.h
include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h
include/libxml/xmlversion.h.in : merged the current state of
XML Schemas implementation, it is not configured in by default,
a specific --schemas configure option has been added.
* test/automata test/regexp test/schemas Makefile.am
result/automata result/regexp result/schemas:
merged automata/regexp/schemas regression tests
Daniel
diff --git a/ChangeLog b/ChangeLog
index 3df5ee4..92bf6f5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+Tue Apr 16 17:46:43 CEST 2002 Daniel Veillard <daniel@veillard.com>
+
+ * Makefile.am TODO_SCHEMAS configure.in genUnicode.py testAutomata.c
+ testRegexp.c testSchemas.c xmlregexp.c xmlschemas.c xmlschemastypes.c
+ xmlunicode.c include/libxml/Makefile.am
+ include/libxml/schemasInternals.h include/libxml/xmlautomata.h
+ include/libxml/xmlregexp.h include/libxml/xmlschemas.h
+ include/libxml/xmlschemastypes.h include/libxml/xmlunicode.h
+ include/libxml/xmlversion.h.in : merged the current state of
+ XML Schemas implementation, it is not configured in by default,
+ a specific --schemas configure option has been added.
+ * test/automata test/regexp test/schemas Makefile.am
+ result/automata result/regexp result/schemas:
+ merged automata/regexp/schemas regression tests
+
Tue Apr 16 09:48:44 CEST 2002 Daniel Veillard <daniel@veillard.com>
* xpath.c: Gary found a compile time problem, fixes #78823
diff --git a/Makefile.am b/Makefile.am
index 0755576..3d90f4d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,12 +1,12 @@
## Process this file with automake to produce Makefile.in
-#AUTOMAKE_ OPTIONS=no-dependencies
+# AUTOMAKE_ OPTIONS=no-dependencies
SUBDIRS = include . doc example python
INCLUDES = -I@srcdir@/include -I$(top_builddir)/include @THREAD_CFLAGS@ @Z_CFLAGS@
noinst_PROGRAMS=testSAX testHTML testXPath testURI testDocbook testThreads \
- testC14N
+ testC14N testAutomata testSchemas testRegexp
bin_PROGRAMS = xmllint xmlcatalog
@@ -22,15 +22,16 @@
parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \
valid.c xlink.c HTMLparser.c HTMLtree.c debugXML.c xpath.c \
xpointer.c xinclude.c nanohttp.c nanoftp.c DOCBparser.c \
- catalog.c globals.c threads.c c14n.c triostr.c trio.c
-
+ catalog.c globals.c threads.c c14n.c \
+ xmlregexp.c xmlschemas.c schemastypes.c xmlunicode.c \
+ triostr.c trio.c
else
libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c \
parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c \
valid.c xlink.c HTMLparser.c HTMLtree.c debugXML.c xpath.c \
xpointer.c xinclude.c nanohttp.c nanoftp.c DOCBparser.c \
- catalog.c globals.c threads.c c14n.c
-
+ catalog.c globals.c threads.c c14n.c \
+ xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c
endif
DEPS = $(top_builddir)/libxml2.la
@@ -86,11 +87,26 @@
testURI_DEPENDENCIES = $(DEPS)
testURI_LDADD= $(LDADDS)
+testRegexp_SOURCES=testRegexp.c
+testRegexp_LDFLAGS =
+testRegexp_DEPENDENCIES = $(DEPS)
+testRegexp_LDADD= $(LDADDS)
+
+testAutomata_SOURCES=testAutomata.c
+testAutomata_LDFLAGS =
+testAutomata_DEPENDENCIES = $(DEPS)
+testAutomata_LDADD= $(LDADDS)
+
+testSchemas_SOURCES=testSchemas.c
+testSchemas_LDFLAGS =
+testSchemas_DEPENDENCIES = $(DEPS)
+testSchemas_LDADD= $(LDADDS)
+
check-local: tests
testall : tests SVGtests SAXtests
-tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests Scripttests Catatests @TEST_THREADS@
+tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests Scripttests Catatests @TEST_SCHEMAS@ @TEST_THREADS@
@(cd python ; $(MAKE) tests)
valgrind:
@@ -557,6 +573,67 @@
rm result.$$name error.$$name ; \
fi ; fi ; done)
+Regexptests: testRegexp$(EXEEXT)
+ @(echo > .memdump)
+ @echo "##"
+ @echo "## Regexp regression tests"
+ @echo "##"
+ -@(for i in $(srcdir)/test/regexp/* ; do \
+ name=`basename $$i`; \
+ if [ ! -d $$i ] ; then \
+ if [ ! -f $(srcdir)/result/regexp/$$name ] ; then \
+ echo New test file $$name ; \
+ $(CHECKER) $(top_builddir)/testRegexp -i $$i > $(srcdir)/result/regexp/$$name; \
+ else \
+ echo Testing $$name ; \
+ $(CHECKER) $(top_builddir)/testRegexp -i $$i > result.$$name ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
+ diff $(srcdir)/result/regexp/$$name result.$$name ; \
+ rm result.$$name ; \
+ fi ; fi ; done)
+
+Automatatests: testAutomata$(EXEEXT)
+ @(echo > .memdump)
+ @echo "##"
+ @echo "## Automata regression tests"
+ @echo "##"
+ -@(for i in $(srcdir)/test/automata/* ; do \
+ name=`basename $$i`; \
+ if [ ! -d $$i ] ; then \
+ if [ ! -f $(srcdir)/result/automata/$$name ] ; then \
+ echo New test file $$name ; \
+ $(CHECKER) $(top_builddir)/testAutomata $$i > $(srcdir)/result/automata/$$name; \
+ else \
+ echo Testing $$name ; \
+ $(CHECKER) $(top_builddir)/testAutomata $$i > result.$$name ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
+ diff $(srcdir)/result/automata/$$name result.$$name ; \
+ rm result.$$name ; \
+ fi ; fi ; done)
+
+
+Schemastests: testSchemas$(EXEEXT)
+ @(echo > .memdump)
+ @echo "##"
+ @echo "## Schemas regression tests"
+ @echo "##"
+ -@(for i in $(srcdir)/test/schemas/*.xml ; do \
+ name=`basename $$i .xml`; \
+ if [ ! -f $(srcdir)/test/schemas/$$name.xsd ] ; then continue ; fi ; \
+ schemas="$(srcdir)/test/schemas/$$name.xsd" ; \
+ if [ ! -d $$i ] ; then \
+ if [ ! -f $(srcdir)/result/schemas/$$name ] ; then \
+ echo New test file $$name ; \
+ $(CHECKER) $(top_builddir)/testSchemas $$schemas $$i > $(srcdir)/result/schemas/$$name; \
+ else \
+ echo Testing $$name ; \
+ $(CHECKER) $(top_builddir)/testSchemas $$schemas $$i > result.$$name ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
+ diff $(srcdir)/result/schemas/$$name result.$$name ; \
+ rm result.$$name ; \
+ fi ; fi ; done)
+
+
dist-hook: libxml.spec
-cp libxml.spec $(distdir)
(cd $(srcdir) ; tar -cf - --exclude CVS win32 macos vms test result SAXresult ) | (cd $(distdir); tar xf -)
@@ -582,7 +659,7 @@
$(man_MANS) libxml-2.0.pc.in \
trionan.c trionan.h triostr.c triostr.h trio.c trio.h \
triop.h triodef.h libxml.h \
- testThreadsWin32.c
+ testThreadsWin32.c genUnicode.py
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libxml-2.0.pc
diff --git a/TODO_SCHEMAS b/TODO_SCHEMAS
new file mode 100644
index 0000000..145a4ed
--- /dev/null
+++ b/TODO_SCHEMAS
@@ -0,0 +1,31 @@
+- implement counted transitions at the automata level
+
+- Unicode:
+ + upgrade to 3.2
+ + improve the python script to generate better test
+ expressions to check the list of ranges.
+
+- Implement the interface at the SAX level
+
+- Implement the missing parts in the Structure part
+ + all content model
+ + enumerations
+ + countless others c.f. the TODO scattered in the code
+
+- Complete the Built-In datatype collections and Facets implementations
+
+- Regression tests based on
+ + the primer:
+ http://www.w3.org/TR/xmlschema-0/
+ + the Schemas Test Collection:
+ http://www.w3.org/2001/05/xmlschema-test-collection/
+ + archives of the schemas-dev list
+
+- Integrity constraints:
+ + what's that ? How need to read about it
+
+- "formal" checking, i.e. go through the full Structure spec and
+ bind code and associated parts of the Schemas spec
+
+- go though the erratas
+ http://www.w3.org/2001/05/xmlschema-errata
diff --git a/configure.in b/configure.in
index e673893..b478f5b 100644
--- a/configure.in
+++ b/configure.in
@@ -274,6 +274,9 @@
if test "${with_xptr}" = "" ; then
with_xptr="yes"
fi
+ if test "${with_schemas}" = "" ; then
+ with_schemas="yes"
+ fi
CFLAGS="-g -O -pedantic -W -Wunused -Wimplicit -Wreturn-type -Wswitch -Wcomment -Wtrigraphs -Wformat -Wchar-subscripts -Wuninitialized -Wparentheses -Wshadow -Wpointer-arith -Wcast-align -Wwrite-strings -Waggregate-return -Wstrict-prototypes -Wmissing-prototypes -Wnested-externs -Winline -Wredundant-decls "
dnl -Wcast-qual -ansi
fi
@@ -513,6 +516,18 @@
XML_LIBS="-lxml2 $Z_LIBS $THREAD_LIBS $ICONV_LIBS $M_LIBS $LIBS"
AC_SUBST(WITH_ICONV)
+AC_ARG_WITH(schemas, [ --with-schemas Add experimental Schemas support (off)])
+if test "$with_schemas" = "yes" ; then
+ echo Enabling Schemas support
+ WITH_SCHEMAS=1
+ TEST_SCHEMAS="Regexptests Automatatests Schemastests"
+else
+ WITH_SCHEMAS=0
+ TEST_SCHEMAS=
+fi
+AC_SUBST(WITH_SCHEMAS)
+AC_SUBST(TEST_SCHEMAS)
+
AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)])
if test "$with_debug" = "no" ; then
echo Disabling DEBUG support
diff --git a/genUnicode.py b/genUnicode.py
new file mode 100755
index 0000000..c5668fd
--- /dev/null
+++ b/genUnicode.py
@@ -0,0 +1,256 @@
+#!/usr/bin/python -u
+import sys
+import string
+import time
+
+sources = "Blocks-4.txt UnicodeData-3.1.0.txt"
+
+try:
+ blocks = open("Blocks-4.txt", "r")
+except:
+ print "Missing Blocks-4.txt, aborting ..."
+ sys.exit(1)
+
+BlockNames = {}
+for line in blocks.readlines():
+ if line[0] == '#':
+ continue
+ line = string.strip(line)
+ if line == '':
+ continue
+ try:
+ fields = string.split(line, ';')
+ range = string.strip(fields[0])
+ (start, end) = string.split(range, "..")
+ name = string.strip(fields[1])
+ name = string.replace(name, ' ', '')
+ except:
+ print "Failed to process line: %s" % (line)
+ continue
+ BlockNames[name] = ("0x"+start, "0x"+end)
+blocks.close()
+print "Parsed %d blocks descriptions" % (len(BlockNames.keys()))
+
+try:
+ data = open("UnicodeData-3.1.0.txt", "r")
+except:
+ print "Missing UnicodeData-3.1.0.txt, aborting ..."
+ sys.exit(1)
+
+nbchar = 0;
+Categories = {}
+for line in data.readlines():
+ if line[0] == '#':
+ continue
+ line = string.strip(line)
+ if line == '':
+ continue
+ try:
+ fields = string.split(line, ';')
+ point = string.strip(fields[0])
+ value = 0
+ while point != '':
+ value = value * 16
+ if point[0] >= '0' and point[0] <= '9':
+ value = value + ord(point[0]) - ord('0')
+ elif point[0] >= 'A' and point[0] <= 'F':
+ value = value + 10 + ord(point[0]) - ord('A')
+ elif point[0] >= 'a' and point[0] <= 'f':
+ value = value + 10 + ord(point[0]) - ord('a')
+ point = point[1:]
+ name = fields[2]
+ except:
+ print "Failed to process line: %s" % (line)
+ continue
+
+ nbchar = nbchar + 1
+ try:
+ Categories[name].append(value)
+ except:
+ try:
+ Categories[name] = [value]
+ except:
+ print "Failed to process line: %s" % (line)
+ try:
+ Categories[name[0]].append(value)
+ except:
+ try:
+ Categories[name[0]] = [value]
+ except:
+ print "Failed to process line: %s" % (line)
+
+blocks.close()
+print "Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))
+#reduce the number list into ranges
+for cat in Categories.keys():
+ list = Categories[cat]
+ start = -1
+ prev = -1
+ end = -1
+ ranges = []
+ for val in list:
+ if start == -1:
+ start = val
+ prev = val
+ continue
+ elif val == prev + 1:
+ prev = val
+ continue
+ elif prev == start:
+ ranges.append((prev, prev))
+ start = val
+ prev = val
+ continue
+ else:
+ ranges.append((start, prev))
+ start = val
+ prev = val
+ continue
+ if prev == start:
+ ranges.append((prev, prev))
+ else:
+ ranges.append((start, prev))
+ Categories[cat] = ranges
+
+#
+# Generate the resulting files
+#
+try:
+ header = open("xmlunicode.h", "w")
+except:
+ print "Failed to open xmlunicode.h"
+ sys.exit(1)
+
+try:
+ output = open("xmlunicode.c", "w")
+except:
+ print "Failed to open xmlunicode.c"
+ sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * xmlunicode.h: this header exports interfaces for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+""" % (date, sources));
+output.write(
+"""/*
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: %s
+ * Sources: %s
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_UNICODE_ENABLED
+
+#include <string.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlunicode.h>
+
+""" % (date, sources));
+
+keys = BlockNames.keys()
+keys.sort()
+for block in keys:
+ (start, end) = BlockNames[block]
+ name = string.replace(block, '-', '')
+ header.write("int\txmlUCSIs%s\t(int code);\n" % name)
+ output.write("/**\n * xmlUCSIs%s:\n * @code: UCS code point\n" % (name))
+ output.write(" *\n * Check whether the character is part of %s UCS Block\n"%
+ (block))
+ output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+ output.write("int\nxmlUCSIs%s(int code) {\n" % name)
+ output.write(" return((code >= %s) && (code <= %s));\n" % (start, end))
+ output.write("}\n\n")
+
+header.write("\nint\txmlUCSIsBlock\t(int code,\n\t\t\t const char *block);\n\n")
+output.write("/**\n * xmlUCSIsBlock:\n * @code: UCS code point\n")
+output.write(" * @block: UCS block name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Block\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown block\n */\n");
+output.write("int\nxmlUCSIsBlock(int code, const char *block) {\n")
+keys = BlockNames.keys()
+keys.sort()
+for block in keys:
+ name = string.replace(block, '-', '')
+ output.write(" if (!strcmp(block, \"%s\"))\n return(xmlUCSIs%s(code));\n" %
+ (block, name));
+output.write(" return(-1);\n}\n\n")
+
+
+keys = Categories.keys()
+keys.sort()
+for name in keys:
+ ranges = Categories[name]
+ header.write("int\txmlUCSIsCat%s\t(int code);\n" % name)
+ output.write("/**\n * xmlUCSIsCat%s:\n * @code: UCS code point\n" % (name))
+ output.write(" *\n * Check whether the character is part of %s UCS Category\n"%
+ (name))
+ output.write(" *\n * Returns 1 if true 0 otherwise\n */\n");
+ output.write("int\nxmlUCSIsCat%s(int code) {\n" % name)
+ start = 1
+ for range in ranges:
+ (begin, end) = range;
+ if start:
+ output.write(" return(");
+ start = 0
+ else:
+ output.write(" ||\n ");
+ if (begin == end):
+ output.write("(code == %s)" % (hex(begin)))
+ else:
+ output.write("((code >= %s) && (code <= %s))" % (
+ hex(begin), hex(end)))
+ output.write(");\n}\n\n")
+
+header.write("\nint\txmlUCSIsCat\t(int code,\n\t\t\t const char *cat);\n")
+output.write("/**\n * xmlUCSIsCat:\n * @code: UCS code point\n")
+output.write(" * @cat: UCS Category name\n")
+output.write(" *\n * Check whether the caracter is part of the UCS Category\n")
+output.write(" *\n * Returns 1 if true, 0 if false and -1 on unknown category\n */\n");
+output.write("int\nxmlUCSIsCat(int code, const char *cat) {\n")
+keys = Categories.keys()
+keys.sort()
+for name in keys:
+ output.write(" if (!strcmp(cat, \"%s\"))\n return(xmlUCSIsCat%s(code));\n" %
+ (name, name));
+output.write(" return(-1);\n}\n\n")
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_UNICODE_H__ */
+""");
+output.write("""
+#endif /* LIBXML_UNICODE_ENABLED */
+""");
+header.close()
+output.close()
diff --git a/include/libxml/Makefile.am b/include/libxml/Makefile.am
index e460c88..8d8a3d7 100644
--- a/include/libxml/Makefile.am
+++ b/include/libxml/Makefile.am
@@ -32,7 +32,13 @@
catalog.h \
threads.h \
globals.h \
- c14n.h
+ c14n.h \
+ xmlautomata.h \
+ xmlregexp.h \
+ xmlschemas.h \
+ schemasInternals.h \
+ xmlschemastypes.h \
+ xmlunicode.h
install-exec-hook:
$(mkinstalldirs) $(DESTDIR)$(xmlincdir)
diff --git a/include/libxml/schemasInternals.h b/include/libxml/schemasInternals.h
new file mode 100644
index 0000000..1322c4c
--- /dev/null
+++ b/include/libxml/schemasInternals.h
@@ -0,0 +1,275 @@
+/*
+ * schemasInternals.h : internal interfaces for the XML Schemas handling
+ * and schema validity checking
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel.Veillard@w3.org
+ */
+
+
+#ifndef __XML_SCHEMA_INTERNALS_H__
+#define __XML_SCHEMA_INTERNALS_H__
+
+#if defined(WIN32) && defined(_MSC_VER)
+#include <libxml/xmlwin32version.h>
+#else
+#include <libxml/xmlversion.h>
+#endif
+#ifdef LIBXML_SCHEMAS_ENABLED
+
+#include <libxml/xmlregexp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * XML Schemas defines multiple type of types.
+ */
+typedef enum {
+ XML_SCHEMA_TYPE_BASIC = 1,
+ XML_SCHEMA_TYPE_ANY,
+ XML_SCHEMA_TYPE_FACET,
+ XML_SCHEMA_TYPE_SIMPLE,
+ XML_SCHEMA_TYPE_COMPLEX,
+ XML_SCHEMA_TYPE_SEQUENCE,
+ XML_SCHEMA_TYPE_CHOICE,
+ XML_SCHEMA_TYPE_ALL,
+ XML_SCHEMA_TYPE_SIMPLE_CONTENT,
+ XML_SCHEMA_TYPE_COMPLEX_CONTENT,
+ XML_SCHEMA_TYPE_UR,
+ XML_SCHEMA_TYPE_RESTRICTION,
+ XML_SCHEMA_TYPE_EXTENSION,
+ XML_SCHEMA_TYPE_ELEMENT,
+ XML_SCHEMA_TYPE_ATTRIBUTE,
+ XML_SCHEMA_TYPE_GROUP,
+ XML_SCHEMA_TYPE_NOTATION,
+ XML_SCHEMA_TYPE_LIST,
+ XML_SCHEMA_TYPE_UNION,
+ XML_SCHEMA_FACET_MININCLUSIVE = 1000,
+ XML_SCHEMA_FACET_MINEXCLUSIVE,
+ XML_SCHEMA_FACET_MAXINCLUSIVE,
+ XML_SCHEMA_FACET_MAXEXCLUSIVE,
+ XML_SCHEMA_FACET_TOTALDIGITS,
+ XML_SCHEMA_FACET_FRACTIONDIGITS,
+ XML_SCHEMA_FACET_PATTERN,
+ XML_SCHEMA_FACET_ENUMERATION,
+ XML_SCHEMA_FACET_WHITESPACE,
+ XML_SCHEMA_FACET_LENGTH,
+ XML_SCHEMA_FACET_MAXLENGTH,
+ XML_SCHEMA_FACET_MINLENGTH
+} xmlSchemaTypeType;
+
+typedef enum {
+ XML_SCHEMA_CONTENT_UNKNOWN = 0,
+ XML_SCHEMA_CONTENT_EMPTY = 1,
+ XML_SCHEMA_CONTENT_ELEMENTS,
+ XML_SCHEMA_CONTENT_MIXED,
+ XML_SCHEMA_CONTENT_SIMPLE,
+ XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS,
+ XML_SCHEMA_CONTENT_BASIC
+} xmlSchemaContentType;
+
+typedef struct _xmlSchemaVal xmlSchemaVal;
+typedef xmlSchemaVal *xmlSchemaValPtr;
+
+typedef struct _xmlSchemaType xmlSchemaType;
+typedef xmlSchemaType *xmlSchemaTypePtr;
+
+typedef struct _xmlSchemaFacet xmlSchemaFacet;
+typedef xmlSchemaFacet *xmlSchemaFacetPtr;
+
+/**
+ * Annotation
+ */
+typedef struct _xmlSchemaAnnot xmlSchemaAnnot;
+typedef xmlSchemaAnnot *xmlSchemaAnnotPtr;
+struct _xmlSchemaAnnot {
+ struct _xmlSchemaAnnot *next;
+ xmlNodePtr content; /* the annotation */
+};
+
+/**
+ * An attribute definition.
+ */
+
+#define XML_SCHEMAS_ANYATTR_SKIP 1
+#define XML_SCHEMAS_ANYATTR_LAX 2
+#define XML_SCHEMAS_ANYATTR_STRICT 3
+
+typedef struct _xmlSchemaAttribute xmlSchemaAttribute;
+typedef xmlSchemaAttribute *xmlSchemaAttributePtr;
+struct _xmlSchemaAttribute {
+ xmlSchemaTypeType type; /* The kind of type */
+ struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */
+ xmlChar *name;
+ xmlChar *id;
+ xmlChar *ref;
+ xmlChar *refNs;
+ xmlChar *typeName;
+ xmlChar *typeNs;
+ xmlSchemaAnnotPtr annot;
+
+ xmlSchemaTypePtr base;
+ int occurs;
+ xmlChar *defValue;
+ xmlSchemaTypePtr subtypes;
+};
+
+/**
+ * An attribute group definition.
+ *
+ * xmlSchemaAttribute and xmlSchemaAttributeGroup start of structures
+ * must be kept similar
+ */
+typedef struct _xmlSchemaAttributeGroup xmlSchemaAttributeGroup;
+typedef xmlSchemaAttributeGroup *xmlSchemaAttributeGroupPtr;
+struct _xmlSchemaAttributeGroup {
+ xmlSchemaTypeType type; /* The kind of type */
+ struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */
+ xmlChar *name;
+ xmlChar *id;
+ xmlChar *ref;
+ xmlChar *refNs;
+ xmlSchemaAnnotPtr annot;
+
+ xmlSchemaAttributePtr attributes;
+};
+
+
+/**
+ * Schemas type definition.
+ */
+#define XML_SCHEMAS_TYPE_MIXED 1 << 0
+
+struct _xmlSchemaType {
+ xmlSchemaTypeType type; /* The kind of type */
+ struct _xmlSchemaType *next;/* the next type if in a sequence ... */
+ xmlChar *name;
+ xmlChar *id;
+ xmlChar *ref;
+ xmlChar *refNs;
+ xmlSchemaAnnotPtr annot;
+ xmlSchemaTypePtr subtypes;
+ xmlSchemaAttributePtr attributes;
+ xmlNodePtr node;
+ int minOccurs;
+ int maxOccurs;
+
+ int flags;
+ xmlSchemaContentType contentType;
+ xmlChar *base;
+ xmlChar *baseNs;
+ xmlSchemaTypePtr baseType;
+ xmlSchemaFacetPtr facets;
+};
+
+/**
+ * An element definition.
+ *
+ * xmlSchemaType, xmlSchemaFacet and xmlSchemaElement start of
+ * structures must be kept similar
+ */
+#define XML_SCHEMAS_ELEM_NILLABLE 1 << 0
+#define XML_SCHEMAS_ELEM_GLOBAL 1 << 1
+#define XML_SCHEMAS_ELEM_DEFAULT 1 << 2
+#define XML_SCHEMAS_ELEM_FIXED 1 << 3
+#define XML_SCHEMAS_ELEM_ABSTRACT 1 << 4
+#define XML_SCHEMAS_ELEM_TOPLEVEL 1 << 5
+#define XML_SCHEMAS_ELEM_REF 1 << 6
+
+typedef struct _xmlSchemaElement xmlSchemaElement;
+typedef xmlSchemaElement *xmlSchemaElementPtr;
+struct _xmlSchemaElement {
+ xmlSchemaTypeType type; /* The kind of type */
+ struct _xmlSchemaType *next;/* the next type if in a sequence ... */
+ xmlChar *name;
+ xmlChar *id;
+ xmlChar *ref;
+ xmlChar *refNs;
+ xmlSchemaAnnotPtr annot;
+ xmlSchemaTypePtr subtypes;
+ xmlSchemaAttributePtr attributes;
+ xmlNodePtr node;
+ int minOccurs;
+ int maxOccurs;
+
+ int flags;
+ xmlChar *targetNamespace;
+ xmlChar *namedType;
+ xmlChar *namedTypeNs;
+ xmlChar *substGroup;
+ xmlChar *substGroupNs;
+ xmlChar *scope;
+ xmlChar *value;
+ struct _xmlSchemaElement *refDecl;
+ xmlRegexpPtr contModel;
+};
+
+/**
+ * An facet definition.
+ *
+ */
+#define XML_SCHEMAS_FACET_UNKNOWN 0
+#define XML_SCHEMAS_FACET_PRESERVE 1
+#define XML_SCHEMAS_FACET_REPLACE 2
+#define XML_SCHEMAS_FACET_COLLAPSE 3
+
+struct _xmlSchemaFacet {
+ xmlSchemaTypeType type; /* The kind of type */
+ struct _xmlSchemaFacet *next;/* the next type if in a sequence ... */
+ xmlChar *value;
+ xmlChar *id;
+ xmlSchemaAnnotPtr annot;
+ xmlNodePtr node;
+ int fixed;
+ int whitespace;
+ xmlSchemaValPtr val;
+ xmlRegexpPtr regexp;
+};
+
+/**
+ * A notation definition.
+ */
+typedef struct _xmlSchemaNotation xmlSchemaNotation;
+typedef xmlSchemaNotation *xmlSchemaNotationPtr;
+struct _xmlSchemaNotation {
+ xmlSchemaTypeType type; /* The kind of type */
+ xmlChar *name;
+ xmlSchemaAnnotPtr annot;
+ xmlChar *identifier;
+};
+
+/**
+ * A Schemas definition
+ */
+#define XML_SCHEMAS_QUALIF_ELEM 1 << 0
+#define XML_SCHEMAS_QUALIF_ATTR 1 << 1
+struct _xmlSchema {
+ xmlChar *name; /* schema name */
+ xmlChar *targetNamespace; /* the target namespace */
+ xmlChar *version;
+ xmlChar *id;
+ xmlDocPtr doc;
+ xmlSchemaAnnotPtr annot;
+ int flags;
+
+ xmlHashTablePtr typeDecl;
+ xmlHashTablePtr attrDecl;
+ xmlHashTablePtr attrgrpDecl;
+ xmlHashTablePtr elemDecl;
+ xmlHashTablePtr notaDecl;
+};
+
+void xmlSchemaFreeType (xmlSchemaTypePtr type);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBXML_SCHEMAS_ENABLED */
+#endif /* __XML_SCHEMA_INTERNALS_H__ */
+
+
diff --git a/include/libxml/xmlautomata.h b/include/libxml/xmlautomata.h
new file mode 100644
index 0000000..ad3acf7
--- /dev/null
+++ b/include/libxml/xmlautomata.h
@@ -0,0 +1,75 @@
+/*
+ * automata.h : description of the API to build regexp automats
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#ifndef __XML_AUTOMATA_H__
+#define __XML_AUTOMATA_H__
+
+#if defined(WIN32) && defined(_MSC_VER)
+#include <libxml/xmlwin32version.h>
+#else
+#include <libxml/xmlversion.h>
+#endif
+#ifdef LIBXML_AUTOMATA_ENABLED
+
+#include <libxml/xmlregexp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * xmlAutomataPtr:
+ *
+ * A libxml automata description, It can be compiled into a regexp
+ */
+typedef struct _xmlAutomata xmlAutomata;
+typedef xmlAutomata *xmlAutomataPtr;
+
+/**
+ * xmlAutomataStatePtr:
+ *
+ * A state int the automata description,
+ */
+typedef struct _xmlAutomataState xmlAutomataState;
+typedef xmlAutomataState *xmlAutomataStatePtr;
+
+/*
+ * Building API
+ */
+xmlAutomataPtr xmlNewAutomata (void);
+void xmlFreeAutomata (xmlAutomataPtr am);
+
+xmlAutomataStatePtr xmlAutomataGetInitState (xmlAutomataPtr am);
+int xmlAutomataSetFinalState(xmlAutomataPtr am,
+ xmlAutomataStatePtr state);
+xmlAutomataStatePtr xmlAutomataNewState (xmlAutomataPtr am);
+xmlAutomataStatePtr xmlAutomataNewTransition(xmlAutomataPtr am,
+ xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to,
+ const xmlChar *token,
+ void *data);
+xmlAutomataStatePtr xmlAutomataNewCountTrans(xmlAutomataPtr am,
+ xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to,
+ const xmlChar *token,
+ int min,
+ int max,
+ void *data);
+xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am,
+ xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to);
+int xmlAutomataNewCounter (xmlAutomataPtr am);
+
+xmlRegexpPtr xmlAutomataCompile (xmlAutomataPtr am);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBXML_AUTOMATA_ENABLED */
+#endif /* __XML_AUTOMATA_H__ */
diff --git a/include/libxml/xmlregexp.h b/include/libxml/xmlregexp.h
new file mode 100644
index 0000000..e4b9afe
--- /dev/null
+++ b/include/libxml/xmlregexp.h
@@ -0,0 +1,77 @@
+/*
+ * regexp.h : describes the basic API for libxml regular expressions handling
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#ifndef __XML_REGEXP_H__
+#define __XML_REGEXP_H__
+
+#if defined(WIN32) && defined(_MSC_VER)
+#include <libxml/xmlwin32version.h>
+#else
+#include <libxml/xmlversion.h>
+#endif
+#ifdef LIBXML_REGEXP_ENABLED
+
+#include <libxml/tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * xmlRegexpPtr:
+ *
+ * A libxml regular expression, they can actually be far more complex
+ * thank the POSIX regex expressions.
+ */
+typedef struct _xmlRegexp xmlRegexp;
+typedef xmlRegexp *xmlRegexpPtr;
+
+/**
+ * xmlRegExecCtxtPtr:
+ *
+ * A libxml progressive regular expression evaluation context
+ */
+typedef struct _xmlRegExecCtxt xmlRegExecCtxt;
+typedef xmlRegExecCtxt *xmlRegExecCtxtPtr;
+
+/*
+ * The POSIX like API
+ */
+xmlRegexpPtr xmlRegexpCompile(const xmlChar *regexp);
+void xmlRegFreeRegexp(xmlRegexpPtr regexp);
+int xmlRegexpExec (xmlRegexpPtr comp,
+ const xmlChar *value);
+void xmlRegexpPrint (FILE *output,
+ xmlRegexpPtr regexp);
+
+/*
+ * Callback function when doing a transition in the automata
+ */
+typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec,
+ const xmlChar *token,
+ void *transdata,
+ void *inputdata);
+
+/*
+ * The progressive API
+ */
+xmlRegExecCtxtPtr xmlRegNewExecCtxt (xmlRegexpPtr comp,
+ xmlRegExecCallbacks callback,
+ void *data);
+void xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec);
+int xmlRegExecPushString (xmlRegExecCtxtPtr exec,
+ const xmlChar *value,
+ void *data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBXML_REGEXP_ENABLED */
+
+#endif /*__XML_REGEXP_H__ */
diff --git a/include/libxml/xmlschemas.h b/include/libxml/xmlschemas.h
new file mode 100644
index 0000000..58bf645
--- /dev/null
+++ b/include/libxml/xmlschemas.h
@@ -0,0 +1,105 @@
+/*
+ * schemas.h : interface to the XML Schemas handling and schema validity
+ * checking
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel.Veillard@w3.org
+ */
+
+
+#ifndef __XML_SCHEMA_H__
+#define __XML_SCHEMA_H__
+
+#if defined(WIN32) && defined(_MSC_VER)
+#include <libxml/xmlwin32version.h>
+#else
+#include <libxml/xmlversion.h>
+#endif
+#ifdef LIBXML_SCHEMAS_ENABLED
+
+#include <libxml/tree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ XML_SCHEMAS_ERR_OK = 0,
+ XML_SCHEMAS_ERR_NOROOT = 1,
+ XML_SCHEMAS_ERR_UNDECLAREDELEM,
+ XML_SCHEMAS_ERR_NOTTOPLEVEL,
+ XML_SCHEMAS_ERR_MISSING,
+ XML_SCHEMAS_ERR_WRONGELEM,
+ XML_SCHEMAS_ERR_NOTYPE,
+ XML_SCHEMAS_ERR_NOROLLBACK,
+ XML_SCHEMAS_ERR_ISABSTRACT,
+ XML_SCHEMAS_ERR_NOTEMPTY,
+ XML_SCHEMAS_ERR_HAVEDEFAULT,
+ XML_SCHEMAS_ERR_NOTNILLABLE,
+ XML_SCHEMAS_ERR_EXTRACONTENT,
+ XML_SCHEMAS_ERR_INVALIDATTR,
+ XML_SCHEMAS_ERR_INVALIDELEM,
+ XML_SCHEMAS_ERR_CONSTRUCT,
+ XML_SCHEMAS_ERR_INTERNAL,
+ XML_SCHEMAS_ERR_NOTSIMPLE,
+ XML_SCHEMAS_ERR_ATTRUNKNOWN,
+ XML_SCHEMAS_ERR_ATTRINVALID,
+ XML_SCHEMAS_ERR_,
+ XML_SCHEMAS_ERR_XXX
+} xmlSchemaValidError;
+
+
+/**
+ * The schemas related types are kept internal
+ */
+typedef struct _xmlSchema xmlSchema;
+typedef xmlSchema *xmlSchemaPtr;
+
+/**
+ * A schemas validation context
+ */
+typedef void (*xmlSchemaValidityErrorFunc) (void *ctx, const char *msg, ...);
+typedef void (*xmlSchemaValidityWarningFunc) (void *ctx, const char *msg, ...);
+
+typedef struct _xmlSchemaParserCtxt xmlSchemaParserCtxt;
+typedef xmlSchemaParserCtxt *xmlSchemaParserCtxtPtr;
+
+typedef struct _xmlSchemaValidCtxt xmlSchemaValidCtxt;
+typedef xmlSchemaValidCtxt *xmlSchemaValidCtxtPtr;
+
+/*
+ * Interfaces for parsing.
+ */
+xmlSchemaParserCtxtPtr xmlSchemaNewParserCtxt(const char *URL);
+void xmlSchemaFreeParserCtxt (xmlSchemaParserCtxtPtr ctxt);
+void xmlSchemaSetParserErrors(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaValidityErrorFunc err,
+ xmlSchemaValidityWarningFunc warn,
+ void *ctx);
+xmlSchemaPtr xmlSchemaParse (xmlSchemaParserCtxtPtr ctxt);
+void xmlSchemaFree (xmlSchemaPtr schema);
+void xmlSchemaDump (FILE *output,
+ xmlSchemaPtr schema);
+/*
+ * Interfaces for validating
+ */
+void xmlSchemaSetValidErrors (xmlSchemaValidCtxtPtr ctxt,
+ xmlSchemaValidityErrorFunc err,
+ xmlSchemaValidityWarningFunc warn,
+ void *ctx);
+xmlSchemaValidCtxtPtr xmlSchemaNewValidCtxt (xmlSchemaPtr schema);
+void xmlSchemaFreeValidCtxt (xmlSchemaValidCtxtPtr ctxt);
+int xmlSchemaValidateDoc (xmlSchemaValidCtxtPtr ctxt,
+ xmlDocPtr instance);
+int xmlSchemaValidateStream (xmlSchemaValidCtxtPtr ctxt,
+ xmlParserInputBufferPtr input,
+ xmlCharEncoding enc,
+ xmlSAXHandlerPtr sax,
+ void *user_data);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBXML_SCHEMAS_ENABLED */
+#endif /* __XML_SCHEMA_H__ */
diff --git a/include/libxml/xmlschemastypes.h b/include/libxml/xmlschemastypes.h
new file mode 100644
index 0000000..b1788da
--- /dev/null
+++ b/include/libxml/xmlschemastypes.h
@@ -0,0 +1,45 @@
+/*
+ * schemastypes.c : interface of the XML Schema Datatypes
+ * definition and validity checking
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+
+#ifndef __XML_SCHEMA_TYPES_H__
+#define __XML_SCHEMA_TYPES_H__
+
+#if defined(WIN32) && defined(_MSC_VER)
+#include <libxml/xmlwin32version.h>
+#else
+#include <libxml/xmlversion.h>
+#endif
+#ifdef LIBXML_SCHEMAS_ENABLED
+
+#include <libxml/schemasInternals.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void xmlSchemaInitTypes (void);
+void xmlSchemaCleanupTypes (void);
+xmlSchemaTypePtr xmlSchemaGetPredefinedType (const xmlChar *name,
+ const xmlChar *ns);
+int xmlSchemaValidatePredefinedType (xmlSchemaTypePtr type,
+ const xmlChar *value,
+ xmlSchemaValPtr *val);
+int xmlSchemaValidateFacet (xmlSchemaTypePtr base,
+ xmlSchemaFacetPtr facet,
+ const xmlChar *value,
+ xmlSchemaValPtr val);
+void xmlSchemaFreeValue (xmlSchemaValPtr val);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* LIBXML_SCHEMAS_ENABLED */
+#endif /* __XML_SCHEMA_TYPES_H__ */
diff --git a/include/libxml/xmlunicode.h b/include/libxml/xmlunicode.h
new file mode 100644
index 0000000..f0f1fe9
--- /dev/null
+++ b/include/libxml/xmlunicode.h
@@ -0,0 +1,164 @@
+/*
+ * xmlunicode.h: this header exports interfaces for the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: Tue Apr 16 17:28:05 2002
+ * Sources: Blocks-4.txt UnicodeData-3.1.0.txt
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#ifndef __XML_UNICODE_H__
+#define __XML_UNICODE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int xmlUCSIsAlphabeticPresentationForms (int code);
+int xmlUCSIsArabic (int code);
+int xmlUCSIsArabicPresentationFormsA (int code);
+int xmlUCSIsArabicPresentationFormsB (int code);
+int xmlUCSIsArmenian (int code);
+int xmlUCSIsArrows (int code);
+int xmlUCSIsBasicLatin (int code);
+int xmlUCSIsBengali (int code);
+int xmlUCSIsBlockElements (int code);
+int xmlUCSIsBopomofo (int code);
+int xmlUCSIsBopomofoExtended (int code);
+int xmlUCSIsBoxDrawing (int code);
+int xmlUCSIsBraillePatterns (int code);
+int xmlUCSIsByzantineMusicalSymbols (int code);
+int xmlUCSIsCJKCompatibility (int code);
+int xmlUCSIsCJKCompatibilityForms (int code);
+int xmlUCSIsCJKCompatibilityIdeographs (int code);
+int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code);
+int xmlUCSIsCJKRadicalsSupplement (int code);
+int xmlUCSIsCJKSymbolsandPunctuation (int code);
+int xmlUCSIsCJKUnifiedIdeographs (int code);
+int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code);
+int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code);
+int xmlUCSIsCherokee (int code);
+int xmlUCSIsCombiningDiacriticalMarks (int code);
+int xmlUCSIsCombiningHalfMarks (int code);
+int xmlUCSIsCombiningMarksforSymbols (int code);
+int xmlUCSIsControlPictures (int code);
+int xmlUCSIsCurrencySymbols (int code);
+int xmlUCSIsCyrillic (int code);
+int xmlUCSIsDeseret (int code);
+int xmlUCSIsDevanagari (int code);
+int xmlUCSIsDingbats (int code);
+int xmlUCSIsEnclosedAlphanumerics (int code);
+int xmlUCSIsEnclosedCJKLettersandMonths (int code);
+int xmlUCSIsEthiopic (int code);
+int xmlUCSIsGeneralPunctuation (int code);
+int xmlUCSIsGeometricShapes (int code);
+int xmlUCSIsGeorgian (int code);
+int xmlUCSIsGothic (int code);
+int xmlUCSIsGreek (int code);
+int xmlUCSIsGreekExtended (int code);
+int xmlUCSIsGujarati (int code);
+int xmlUCSIsGurmukhi (int code);
+int xmlUCSIsHalfwidthandFullwidthForms (int code);
+int xmlUCSIsHangulCompatibilityJamo (int code);
+int xmlUCSIsHangulJamo (int code);
+int xmlUCSIsHangulSyllables (int code);
+int xmlUCSIsHebrew (int code);
+int xmlUCSIsHighPrivateUseSurrogates (int code);
+int xmlUCSIsHighSurrogates (int code);
+int xmlUCSIsHiragana (int code);
+int xmlUCSIsIPAExtensions (int code);
+int xmlUCSIsIdeographicDescriptionCharacters (int code);
+int xmlUCSIsKanbun (int code);
+int xmlUCSIsKangxiRadicals (int code);
+int xmlUCSIsKannada (int code);
+int xmlUCSIsKatakana (int code);
+int xmlUCSIsKhmer (int code);
+int xmlUCSIsLao (int code);
+int xmlUCSIsLatin1Supplement (int code);
+int xmlUCSIsLatinExtendedA (int code);
+int xmlUCSIsLatinExtendedB (int code);
+int xmlUCSIsLatinExtendedAdditional (int code);
+int xmlUCSIsLetterlikeSymbols (int code);
+int xmlUCSIsLowSurrogates (int code);
+int xmlUCSIsMalayalam (int code);
+int xmlUCSIsMathematicalAlphanumericSymbols (int code);
+int xmlUCSIsMathematicalOperators (int code);
+int xmlUCSIsMiscellaneousSymbols (int code);
+int xmlUCSIsMiscellaneousTechnical (int code);
+int xmlUCSIsMongolian (int code);
+int xmlUCSIsMusicalSymbols (int code);
+int xmlUCSIsMyanmar (int code);
+int xmlUCSIsNumberForms (int code);
+int xmlUCSIsOgham (int code);
+int xmlUCSIsOldItalic (int code);
+int xmlUCSIsOpticalCharacterRecognition (int code);
+int xmlUCSIsOriya (int code);
+int xmlUCSIsPrivateUse (int code);
+int xmlUCSIsRunic (int code);
+int xmlUCSIsSinhala (int code);
+int xmlUCSIsSmallFormVariants (int code);
+int xmlUCSIsSpacingModifierLetters (int code);
+int xmlUCSIsSpecials (int code);
+int xmlUCSIsSuperscriptsandSubscripts (int code);
+int xmlUCSIsSyriac (int code);
+int xmlUCSIsTags (int code);
+int xmlUCSIsTamil (int code);
+int xmlUCSIsTelugu (int code);
+int xmlUCSIsThaana (int code);
+int xmlUCSIsThai (int code);
+int xmlUCSIsTibetan (int code);
+int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code);
+int xmlUCSIsYiRadicals (int code);
+int xmlUCSIsYiSyllables (int code);
+
+int xmlUCSIsBlock (int code,
+ const char *block);
+
+int xmlUCSIsCatC (int code);
+int xmlUCSIsCatCc (int code);
+int xmlUCSIsCatCf (int code);
+int xmlUCSIsCatCo (int code);
+int xmlUCSIsCatCs (int code);
+int xmlUCSIsCatL (int code);
+int xmlUCSIsCatLl (int code);
+int xmlUCSIsCatLm (int code);
+int xmlUCSIsCatLo (int code);
+int xmlUCSIsCatLt (int code);
+int xmlUCSIsCatLu (int code);
+int xmlUCSIsCatM (int code);
+int xmlUCSIsCatMc (int code);
+int xmlUCSIsCatMe (int code);
+int xmlUCSIsCatMn (int code);
+int xmlUCSIsCatN (int code);
+int xmlUCSIsCatNd (int code);
+int xmlUCSIsCatNl (int code);
+int xmlUCSIsCatNo (int code);
+int xmlUCSIsCatP (int code);
+int xmlUCSIsCatPc (int code);
+int xmlUCSIsCatPd (int code);
+int xmlUCSIsCatPe (int code);
+int xmlUCSIsCatPf (int code);
+int xmlUCSIsCatPi (int code);
+int xmlUCSIsCatPo (int code);
+int xmlUCSIsCatPs (int code);
+int xmlUCSIsCatS (int code);
+int xmlUCSIsCatSc (int code);
+int xmlUCSIsCatSk (int code);
+int xmlUCSIsCatSm (int code);
+int xmlUCSIsCatSo (int code);
+int xmlUCSIsCatZ (int code);
+int xmlUCSIsCatZl (int code);
+int xmlUCSIsCatZp (int code);
+int xmlUCSIsCatZs (int code);
+
+int xmlUCSIsCat (int code,
+ const char *cat);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_UNICODE_H__ */
diff --git a/include/libxml/xmlversion.h.in b/include/libxml/xmlversion.h.in
index d99c58a..777e9f7 100644
--- a/include/libxml/xmlversion.h.in
+++ b/include/libxml/xmlversion.h.in
@@ -190,6 +190,42 @@
#endif
/**
+ * LIBXML_UNICODE_ENABLED
+ *
+ * Whether the Unicode related interfaces are compiled in
+ */
+#if @WITH_SCHEMAS@
+#define LIBXML_UNICODE_ENABLED
+#endif
+
+/**
+ * LIBXML_REGEXP_ENABLED
+ *
+ * Whether the regular expressions interfaces are compiled in
+ */
+#if @WITH_SCHEMAS@
+#define LIBXML_REGEXP_ENABLED
+#endif
+
+/**
+ * LIBXML_AUTOMATA_ENABLED
+ *
+ * Whether the automata interfaces are compiled in
+ */
+#if @WITH_SCHEMAS@
+#define LIBXML_AUTOMATA_ENABLED
+#endif
+
+/**
+ * LIBXML_SCHEMAS_ENABLED
+ *
+ * Whether the Schemas validation interfaces are compiled in
+ */
+#if @WITH_SCHEMAS@
+#define LIBXML_SCHEMAS_ENABLED
+#endif
+
+/**
* LIBXML_DLL_IMPORT:
*
* Used on Windows (MS C compiler only) to declare a variable as
diff --git a/result/automata/a b/result/automata/a
new file mode 100644
index 0000000..4ece411
--- /dev/null
+++ b/result/automata/a
@@ -0,0 +1,4 @@
+=> Passed
+=> Failed
+=> Failed
+=> Failed
diff --git a/result/automata/aba b/result/automata/aba
new file mode 100644
index 0000000..051b9bd
--- /dev/null
+++ b/result/automata/aba
@@ -0,0 +1,6 @@
+=> Passed
+=> Passed
+=> Passed
+=> Failed
+=> Failed
+=> Failed
diff --git a/result/automata/abaa b/result/automata/abaa
new file mode 100644
index 0000000..c74769e
--- /dev/null
+++ b/result/automata/abaa
@@ -0,0 +1,5 @@
+=> Passed
+=> Passed
+=> Passed
+=> Failed
+=> Failed
diff --git a/result/automata/abba b/result/automata/abba
new file mode 100644
index 0000000..1a60848
--- /dev/null
+++ b/result/automata/abba
@@ -0,0 +1,4 @@
+=> Passed
+=> Passed
+=> Failed
+=> Failed
diff --git a/result/automata/po b/result/automata/po
new file mode 100644
index 0000000..fafcae5
--- /dev/null
+++ b/result/automata/po
@@ -0,0 +1,2 @@
+=> Passed
+=> Passed
diff --git a/result/regexp/content b/result/regexp/content
new file mode 100644
index 0000000..220fd47
--- /dev/null
+++ b/result/regexp/content
@@ -0,0 +1,12 @@
+Regexp: ((a|b|c)def)
+adef: Ok
+bdef: Ok
+adefg: Fail
+aaef: Fail
+Regexp: ((a|b|c|d|e|f)?(g|h|i)+(k|l)*)
+g: Ok
+gi: Ok
+fil: Ok
+gikl: Ok
+cghhhiill: Ok
+ak: Fail
diff --git a/result/regexp/hard b/result/regexp/hard
new file mode 100644
index 0000000..f348c08
--- /dev/null
+++ b/result/regexp/hard
@@ -0,0 +1,7 @@
+Regexp: ((a|b|\p{Nd}){1,2}|aaa|bbbb){1,2}
+bab: Ok
+aaca: Fail
+aaabbbb: Ok
+a0b: Ok
+aa0aaa: Fail
+b0aaa: Ok
diff --git a/result/regexp/ncname b/result/regexp/ncname
new file mode 100644
index 0000000..3f16d95
--- /dev/null
+++ b/result/regexp/ncname
@@ -0,0 +1,6 @@
+Regexp: [\i-[:]][\c-[:]]*
+a: Ok
+abc: Ok
+abc1d: Ok
+1ac: Fail
+a1b:c: Fail
diff --git a/result/regexp/ranges b/result/regexp/ranges
new file mode 100644
index 0000000..4cbf298
--- /dev/null
+++ b/result/regexp/ranges
@@ -0,0 +1,15 @@
+Regexp: a{2,3}
+a: Fail
+aa: Ok
+aaa: Ok
+aaaa: Fail
+Regexp: ba{2,3}c
+bac: Fail
+baac: Ok
+baaac: Ok
+baaaac: Fail
+Regexp: a(b|c){2,3}d
+abcd: Ok
+acccd: Ok
+abd: Fail
+accccd: Fail
diff --git a/result/regexp/xpath b/result/regexp/xpath
new file mode 100644
index 0000000..4f6b13c
--- /dev/null
+++ b/result/regexp/xpath
@@ -0,0 +1,32 @@
+Regexp: (\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*(\|(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*)*
+a: Ok
+a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3: Ok
+*: Ok
+a|b: Ok
+.//a:b: Ok
+a/b/c: Ok
+a/*/b: Ok
+a:*/b:*/c:*: Ok
+child::a/child::b:*: Ok
+child::a/child::b:*|a/*/b|.//a:b: Ok
+1: Fail
+1ab: Fail
+a:1: Ok
+@a: Fail
+ancestor::a: Ok
+Regexp: (\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*(\|(\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*)*
+a: Ok
+a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3: Ok
+*: Ok
+a|b: Ok
+.//a:b: Ok
+a/b/c: Ok
+a/*/b: Ok
+a:*/b:*/c:*: Ok
+child::a/child::b:*: Ok
+child::a/child::b:*|a/*/b|.//a:b: Ok
+1: Fail
+1ab: Fail
+a:1: Fail
+@a: Fail
+ancestor::a: Fail
diff --git a/result/schemas/po b/result/schemas/po
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/result/schemas/po
diff --git a/test/automata/a b/test/automata/a
new file mode 100644
index 0000000..e1f3e91
--- /dev/null
+++ b/test/automata/a
@@ -0,0 +1,14 @@
+#
+# tests just "a"
+#
+t 0 1 a
+f 1
+-------
+a
+=>
+a
+a
+=>
+=>
+b
+=>
diff --git a/test/automata/aba b/test/automata/aba
new file mode 100644
index 0000000..ee9a873
--- /dev/null
+++ b/test/automata/aba
@@ -0,0 +1,26 @@
+#
+# Tests a[ab]*
+#
+t 0 1 a
+t 1 1 a
+t 1 1 b
+f 1
+-------
+a
+=>
+a
+a
+=>
+a
+b
+a
+b
+a
+b
+=>
+b
+=>
+a
+c
+=>
+=>
diff --git a/test/automata/abaa b/test/automata/abaa
new file mode 100644
index 0000000..7862ba2
--- /dev/null
+++ b/test/automata/abaa
@@ -0,0 +1,43 @@
+#
+# Tests: a[ab]*a{2,3}
+#
+t 0 1 a
+t 1 1 a
+t 1 1 b
+c 1 2 2 3 a
+f 2
+-------
+a
+a
+a
+=>
+# Pass
+a
+b
+a
+a
+=>
+# Pass
+a
+a
+a
+a
+a
+a
+a
+a
+a
+=>
+# Pass
+a
+b
+a
+=>
+# Fail
+a
+b
+a
+a
+b
+=>
+# Fail
diff --git a/test/automata/abba b/test/automata/abba
new file mode 100644
index 0000000..86c08f1
--- /dev/null
+++ b/test/automata/abba
@@ -0,0 +1,30 @@
+#
+# Tests ab*a with an eliminated epsilon transition
+#
+t 0 1 a
+t 1 2 b
+e 1 2
+t 2 2 b
+t 2 3 a
+f 3
+-------
+a
+a
+=>
+# Pass
+a
+b
+b
+a
+=>
+# Pass
+a
+b
+=>
+# Fail
+a
+b
+a
+b
+=>
+# Fail
diff --git a/test/automata/po b/test/automata/po
new file mode 100644
index 0000000..592b8c9
--- /dev/null
+++ b/test/automata/po
@@ -0,0 +1,19 @@
+#
+# purchaseOrder
+#
+t 0 1 shipTo
+t 1 2 billTo
+t 2 3 comment
+t 3 4 items
+e 2 3
+f 4
+-------
+shipTo
+billTo
+comment
+items
+=>
+shipTo
+billTo
+items
+=>
diff --git a/test/regexp/content b/test/regexp/content
new file mode 100644
index 0000000..9d01c8b
--- /dev/null
+++ b/test/regexp/content
@@ -0,0 +1,12 @@
+=>((a|b|c)def)
+adef
+bdef
+adefg
+aaef
+=>((a|b|c|d|e|f)?(g|h|i)+(k|l)*)
+g
+gi
+fil
+gikl
+cghhhiill
+ak
diff --git a/test/regexp/hard b/test/regexp/hard
new file mode 100644
index 0000000..40c6d2d
--- /dev/null
+++ b/test/regexp/hard
@@ -0,0 +1,7 @@
+=>((a|b|\p{Nd}){1,2}|aaa|bbbb){1,2}
+bab
+aaca
+aaabbbb
+a0b
+aa0aaa
+b0aaa
diff --git a/test/regexp/ncname b/test/regexp/ncname
new file mode 100644
index 0000000..1e452a8
--- /dev/null
+++ b/test/regexp/ncname
@@ -0,0 +1,6 @@
+=>[\i-[:]][\c-[:]]*
+a
+abc
+abc1d
+1ac
+a1b:c
diff --git a/test/regexp/ranges b/test/regexp/ranges
new file mode 100644
index 0000000..cb7c22f
--- /dev/null
+++ b/test/regexp/ranges
@@ -0,0 +1,15 @@
+=>a{2,3}
+a
+aa
+aaa
+aaaa
+=>ba{2,3}c
+bac
+baac
+baaac
+baaaac
+=>a(b|c){2,3}d
+abcd
+acccd
+abd
+accccd
diff --git a/test/regexp/xpath b/test/regexp/xpath
new file mode 100644
index 0000000..62c18c4
--- /dev/null
+++ b/test/regexp/xpath
@@ -0,0 +1,37 @@
+=>(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*(\|(\.//)?(((child::)?((\i\c*:)?(\i\c*|\*)))|\.)(/(((child::)?((\i\c*:)?(\i\c*|\*)))|\.))*)*
+a
+a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3
+*
+a|b
+.//a:b
+a/b/c
+a/*/b
+a:*/b:*/c:*
+child::a/child::b:*
+child::a/child::b:*|a/*/b|.//a:b
+1
+1ab
+a:1
+@a
+ancestor::a
+#
+# the previous regexp from the Schemas for Schemas was broken
+# here is the fixed one:
+# http://lists.w3.org/Archives/Public/www-xml-schema-comments/2002AprJun/0005.html
+#
+=>(\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*(\|(\.//)?(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.)(/(((child::)?(([\i-[:]][\c-[:]]*:)?([\i-[:]][\c-[:]]*|\*)))|\.))*)*
+a
+a12/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b312/b3
+*
+a|b
+.//a:b
+a/b/c
+a/*/b
+a:*/b:*/c:*
+child::a/child::b:*
+child::a/child::b:*|a/*/b|.//a:b
+1
+1ab
+a:1
+@a
+ancestor::a
diff --git a/test/schemas/po.xml b/test/schemas/po.xml
new file mode 100644
index 0000000..387232d
--- /dev/null
+++ b/test/schemas/po.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0"?>
+<purchaseOrder orderDate="1999-10-20">
+ <shipTo country="US">
+ <name>Alice Smith</name>
+ <street>123 Maple Street</street>
+ <city>Mill Valley</city>
+ <state>CA</state>
+ <zip>90952</zip>
+ </shipTo>
+ <billTo country="US">
+ <name>Robert Smith</name>
+ <street>8 Oak Avenue</street>
+ <city>Old Town</city>
+ <state>PA</state>
+ <zip>95819</zip>
+ </billTo>
+ <comment>Hurry, my lawn is going wild!</comment>
+ <items>
+ <item partNum="872-AA">
+ <productName>Lawnmower</productName>
+ <quantity>1</quantity>
+ <USPrice>148.95</USPrice>
+ <comment>Confirm this is electric</comment>
+ </item>
+ <item partNum="926-AA">
+ <productName>Baby Monitor</productName>
+ <quantity>1</quantity>
+ <USPrice>39.98</USPrice>
+ <shipDate>1999-05-21</shipDate>
+ </item>
+ </items>
+</purchaseOrder>
diff --git a/test/schemas/po.xsd b/test/schemas/po.xsd
new file mode 100644
index 0000000..5a1e660
--- /dev/null
+++ b/test/schemas/po.xsd
@@ -0,0 +1,59 @@
+<?xml version="1.0"?>
+<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+ <xsd:annotation>
+ <xsd:documentation xml:lang="en">
+Purchase order schema for Example.com.
+ Copyright 2000 Example.com. All rights reserved.
+ </xsd:documentation>
+ </xsd:annotation>
+ <xsd:element name="purchaseOrder" type="PurchaseOrderType"/>
+ <xsd:element name="comment" type="xsd:string"/>
+ <xsd:complexType name="PurchaseOrderType">
+ <xsd:sequence>
+ <xsd:element name="shipTo" type="USAddress"/>
+ <xsd:element name="billTo" type="USAddress"/>
+ <xsd:element ref="comment" minOccurs="0"/>
+ <xsd:element name="items" type="Items"/>
+ </xsd:sequence>
+ <xsd:attribute name="orderDate" type="xsd:date"/>
+ </xsd:complexType>
+ <xsd:complexType name="USAddress">
+ <xsd:sequence>
+ <xsd:element name="name" type="xsd:string"/>
+ <xsd:element name="street" type="xsd:string"/>
+ <xsd:element name="city" type="xsd:string"/>
+ <xsd:element name="state" type="xsd:string"/>
+ <xsd:element name="zip" type="xsd:decimal"/>
+ </xsd:sequence>
+ <xsd:attribute name="country" type="xsd:NMTOKEN" fixed="US"/>
+ </xsd:complexType>
+ <xsd:complexType name="Items">
+ <xsd:sequence>
+ <xsd:element name="item" minOccurs="0" maxOccurs="unbounded">
+ <xsd:complexType>
+ <xsd:sequence>
+ <xsd:element name="productName" type="xsd:string"/>
+ <xsd:element name="quantity">
+ <xsd:simpleType>
+ <xsd:restriction base="xsd:positiveInteger">
+ <xsd:maxExclusive value="100"/>
+ </xsd:restriction>
+ </xsd:simpleType>
+ </xsd:element>
+ <xsd:element name="USPrice" type="xsd:decimal"/>
+ <xsd:element ref="comment" minOccurs="0"/>
+ <xsd:element name="shipDate" type="xsd:date" minOccurs="0"/>
+ </xsd:sequence>
+ <xsd:attribute name="partNum" type="SKU" use="required"/>
+ </xsd:complexType>
+ </xsd:element>
+ </xsd:sequence>
+ </xsd:complexType>
+<!-- Stock Keeping Unit, a code for identifying products -->
+ <xsd:simpleType name="SKU">
+ <xsd:restriction base="xsd:string">
+ <xsd:pattern value="\d{3}-[A-Z]{2}"/>
+ </xsd:restriction>
+ </xsd:simpleType>
+</xsd:schema>
+
diff --git a/testAutomata.c b/testAutomata.c
new file mode 100644
index 0000000..8e65786
--- /dev/null
+++ b/testAutomata.c
@@ -0,0 +1,306 @@
+/*
+ * testRegexp.c: simple module for testing regular expressions
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#include <string.h>
+#include "libxml.h"
+#ifdef LIBXML_AUTOMATA_ENABLED
+
+#include <libxml/xmlautomata.h>
+
+static int scanNumber(char **ptr) {
+ int ret = 0;
+ char *cur;
+
+ cur = *ptr;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ ret = ret * 10 + (*cur - '0');
+ cur++;
+ }
+ *ptr = cur;
+ return(ret);
+}
+
+static void
+testRegexpFile(const char *filename) {
+ FILE *input;
+ char exp[5000];
+ int len;
+ int ret;
+ int i;
+ xmlAutomataPtr am;
+ xmlAutomataStatePtr states[1000];
+ xmlRegexpPtr regexp = NULL;
+ xmlRegExecCtxtPtr exec;
+
+ for (i = 0;i<1000;i++)
+ states[i] = NULL;
+
+ input = fopen(filename, "r");
+ if (input == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Cannot open %s for reading\n", filename);
+ return;
+ }
+
+ am = xmlNewAutomata();
+ if (am == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Cannot create automata\n");
+ fclose(input);
+ }
+ states[0] = xmlAutomataGetInitState(am);
+ if (states[0] == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Cannot get start state\n");
+ xmlFreeAutomata(am);
+ fclose(input);
+ }
+ ret = 0;
+
+ while (fgets(exp, 4500, input) != NULL) {
+ if (exp[0] == '#')
+ continue;
+ len = strlen(exp);
+ len--;
+ while ((len >= 0) &&
+ ((exp[len] == '\n') || (exp[len] == '\t') ||
+ (exp[len] == '\r') || (exp[len] == ' '))) len--;
+ exp[len + 1] = 0;
+ if (len >= 0) {
+ if ((am != NULL) && (exp[0] == 't') && (exp[1] == ' ')) {
+ char *ptr = &exp[2];
+ int from, to;
+
+ from = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ if (states[from] == NULL)
+ states[from] = xmlAutomataNewState(am);
+ ptr++;
+ to = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ if (states[to] == NULL)
+ states[to] = xmlAutomataNewState(am);
+ ptr++;
+ xmlAutomataNewTransition(am, states[from], states[to],
+ BAD_CAST ptr, NULL);
+ } else if ((am != NULL) && (exp[0] == 'e') && (exp[1] == ' ')) {
+ char *ptr = &exp[2];
+ int from, to;
+
+ from = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ if (states[from] == NULL)
+ states[from] = xmlAutomataNewState(am);
+ ptr++;
+ to = scanNumber(&ptr);
+ if (states[to] == NULL)
+ states[to] = xmlAutomataNewState(am);
+ xmlAutomataNewEpsilon(am, states[from], states[to]);
+ } else if ((am != NULL) && (exp[0] == 'f') && (exp[1] == ' ')) {
+ char *ptr = &exp[2];
+ int state;
+
+ state = scanNumber(&ptr);
+ if (states[state] == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad state %d : %s\n", state, exp);
+ break;
+ }
+ xmlAutomataSetFinalState(am, states[state]);
+ } else if ((am != NULL) && (exp[0] == 'c') && (exp[1] == ' ')) {
+ char *ptr = &exp[2];
+ int from, to;
+ int min, max;
+
+ from = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ if (states[from] == NULL)
+ states[from] = xmlAutomataNewState(am);
+ ptr++;
+ to = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ if (states[to] == NULL)
+ states[to] = xmlAutomataNewState(am);
+ ptr++;
+ min = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ ptr++;
+ max = scanNumber(&ptr);
+ if (*ptr != ' ') {
+ xmlGenericError(xmlGenericErrorContext,
+ "Bad line %s\n", exp);
+ break;
+ }
+ ptr++;
+ xmlAutomataNewCountTrans(am, states[from], states[to],
+ BAD_CAST ptr, min, max, NULL);
+ } else if ((am != NULL) && (exp[0] == '-') && (exp[1] == '-')) {
+ /* end of the automata */
+ regexp = xmlAutomataCompile(am);
+ xmlFreeAutomata(am);
+ am = NULL;
+ if (regexp == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Failed to compile the automata");
+ break;
+ }
+ } else if ((exp[0] == '=') && (exp[1] == '>')) {
+ if (regexp == NULL) {
+ printf("=> failed not compiled\n");
+ } else {
+ if (exec == NULL)
+ exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
+ if (ret == 0) {
+ ret = xmlRegExecPushString(exec, NULL, NULL);
+ }
+ if (ret == 1)
+ printf("=> Passed\n");
+ else if ((ret == 0) || (ret == -1))
+ printf("=> Failed\n");
+ else if (ret < 0)
+ printf("=> Error\n");
+ xmlRegFreeExecCtxt(exec);
+ exec = NULL;
+ }
+ ret = 0;
+ } else if (regexp != NULL) {
+ if (exec == NULL)
+ exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
+ ret = xmlRegExecPushString(exec, BAD_CAST exp, NULL);
+ } else {
+ xmlGenericError(xmlGenericErrorContext,
+ "Unexpected line %s\n", exp);
+ }
+ }
+ }
+ fclose(input);
+ if (regexp != NULL)
+ xmlRegFreeRegexp(regexp);
+ if (exec != NULL)
+ xmlRegFreeExecCtxt(exec);
+ if (am != NULL)
+ xmlFreeAutomata(am);
+}
+
+int main(int argc, char **argv) {
+
+ xmlInitMemory();
+
+ if (argc == 1) {
+ int ret;
+ xmlAutomataPtr am;
+ xmlAutomataStatePtr start, cur;
+ xmlRegexpPtr regexp;
+ xmlRegExecCtxtPtr exec;
+
+ am = xmlNewAutomata();
+ start = xmlAutomataGetInitState(am);
+
+ /* generate a[ba]*a */
+ cur = xmlAutomataNewTransition(am, start, NULL, BAD_CAST"a", NULL);
+ xmlAutomataNewTransition(am, cur, cur, BAD_CAST"b", NULL);
+ xmlAutomataNewTransition(am, cur, cur, BAD_CAST"a", NULL);
+ cur = xmlAutomataNewCountTrans(am, cur, NULL, BAD_CAST"a", 2, 3, NULL);
+ xmlAutomataSetFinalState(am, cur);
+
+ /* compile it in a regexp and free the automata */
+ regexp = xmlAutomataCompile(am);
+ xmlFreeAutomata(am);
+
+ /* test the regexp */
+ xmlRegexpPrint(stdout, regexp);
+ exec = xmlRegNewExecCtxt(regexp, NULL, NULL);
+ ret = xmlRegExecPushString(exec, BAD_CAST"a", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ ret =xmlRegExecPushString(exec, BAD_CAST"b", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ ret =xmlRegExecPushString(exec, BAD_CAST"a", NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ if (ret == 0) {
+ ret = xmlRegExecPushString(exec, NULL, NULL);
+ if (ret == 1)
+ printf("final\n");
+ else if (ret < 0)
+ printf("error\n");
+ }
+ xmlRegFreeExecCtxt(exec);
+
+ /* free the regexp */
+ xmlRegFreeRegexp(regexp);
+ } else {
+ int i;
+
+ for (i = 1;i < argc;i++)
+ testRegexpFile(argv[i]);
+ }
+
+ xmlCleanupParser();
+ xmlMemoryDump();
+ return(0);
+}
+
+#else
+#include <stdio.h>
+int main(int argc, char **argv) {
+ printf("%s : Automata support not compiled in\n", argv[0]);
+ return(0);
+}
+#endif /* LIBXML_AUTOMATA_ENABLED */
diff --git a/testRegexp.c b/testRegexp.c
new file mode 100644
index 0000000..a1d0d27
--- /dev/null
+++ b/testRegexp.c
@@ -0,0 +1,157 @@
+/*
+ * testRegexp.c: simple module for testing regular expressions
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#include <string.h>
+#include "libxml.h"
+#ifdef LIBXML_REGEXP_ENABLED
+#include <libxml/tree.h>
+#include <libxml/xmlregexp.h>
+
+int repeat = 0;
+int debug = 0;
+
+static void testRegexp(xmlRegexpPtr comp, const char *value) {
+ int ret;
+
+ ret = xmlRegexpExec(comp, (const xmlChar *) value);
+ if (ret == 1)
+ printf("%s: Ok\n", value);
+ else if (ret == 0)
+ printf("%s: Fail\n", value);
+ else
+ printf("%s: Error: %d\n", value, ret);
+ if (repeat) {
+ int j;
+ for (j = 0;j < 999999;j++)
+ xmlRegexpExec(comp, (const xmlChar *) value);
+ }
+}
+
+static void
+testRegexpFile(const char *filename) {
+ xmlRegexpPtr comp = NULL;
+ FILE *input;
+ char expression[5000];
+ int len;
+
+ input = fopen(filename, "r");
+ if (input == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Cannot open %s for reading\n", filename);
+ return;
+ }
+ while (fgets(expression, 4500, input) != NULL) {
+ len = strlen(expression);
+ len--;
+ while ((len >= 0) &&
+ ((expression[len] == '\n') || (expression[len] == '\t') ||
+ (expression[len] == '\r') || (expression[len] == ' '))) len--;
+ expression[len + 1] = 0;
+ if (len >= 0) {
+ if (expression[0] == '#')
+ continue;
+ if ((expression[0] == '=') && (expression[1] == '>')) {
+ char *pattern = &expression[2];
+
+ if (comp != NULL) {
+ xmlRegFreeRegexp(comp);
+ comp = NULL;
+ }
+ printf("Regexp: %s\n", pattern) ;
+ comp = xmlRegexpCompile((const xmlChar *) pattern);
+ if (comp == NULL) {
+ printf(" failed to compile\n");
+ break;
+ }
+ } else if (comp == NULL) {
+ printf("Regexp: %s\n", expression) ;
+ comp = xmlRegexpCompile((const xmlChar *) expression);
+ if (comp == NULL) {
+ printf(" failed to compile\n");
+ break;
+ }
+ } else if (comp != NULL) {
+ testRegexp(comp, expression);
+ }
+ }
+ }
+ fclose(input);
+ if (comp != NULL)
+ xmlRegFreeRegexp(comp);
+}
+
+
+static void usage(const char *name) {
+ fprintf(stderr, "Usage: %s\n", name);
+}
+
+int main(int argc, char **argv) {
+ xmlRegexpPtr comp = NULL;
+ const char *pattern = NULL;
+ char *filename = NULL;
+ int i;
+
+ xmlInitMemory();
+
+ if (argc <= 1) {
+ usage(argv[0]);
+ return(1);
+ }
+ for (i = 1; i < argc ; i++) {
+ if (!strcmp(argv[i], "-"))
+ break;
+
+ if (argv[i][0] != '-')
+ continue;
+ if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) {
+ debug++;
+ } else if ((!strcmp(argv[i], "-repeat")) ||
+ (!strcmp(argv[i], "--repeat"))) {
+ repeat++;
+ } else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "--input")))
+ filename = argv[++i];
+ else {
+ fprintf(stderr, "Unknown option %s\n", argv[i]);
+ usage(argv[0]);
+ }
+ }
+ if (filename != NULL) {
+ testRegexpFile(filename);
+ } else {
+ for (i = 1; i < argc ; i++) {
+ if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0)) {
+ if (pattern == NULL) {
+ pattern = argv[i];
+ printf("Testing %s:\n", pattern);
+ comp = xmlRegexpCompile((const xmlChar *) pattern);
+ if (comp == NULL) {
+ printf(" failed to compile\n");
+ break;
+ }
+ if (debug)
+ xmlRegexpPrint(stdout, comp);
+ } else {
+ testRegexp(comp, argv[i]);
+ }
+ }
+ }
+ if (comp != NULL)
+ xmlRegFreeRegexp(comp);
+ }
+ xmlCleanupParser();
+ xmlMemoryDump();
+ return(0);
+}
+
+#else
+#include <stdio.h>
+int main(int argc, char **argv) {
+ printf("%s : Regexp support not compiled in\n", argv[0]);
+ return(0);
+}
+#endif /* LIBXML_REGEXP_ENABLED */
diff --git a/testSchemas.c b/testSchemas.c
new file mode 100644
index 0000000..3168167
--- /dev/null
+++ b/testSchemas.c
@@ -0,0 +1,120 @@
+/*
+ * testSchemas.c : a small tester program for Schema validation
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel.Veillard@w3.org
+ */
+
+#include "libxml.h"
+#ifdef LIBXML_SCHEMAS_ENABLED
+
+#include <libxml/xmlversion.h>
+#include <libxml/parser.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#include <libxml/xmlmemory.h>
+#include <libxml/debugXML.h>
+#include <libxml/xmlschemas.h>
+
+#ifdef LIBXML_DEBUG_ENABLED
+static int debug = 0;
+#endif
+static int noout = 0;
+
+
+int main(int argc, char **argv) {
+ int i;
+ int files = 0;
+ xmlSchemaPtr schema = NULL;
+
+ for (i = 1; i < argc ; i++) {
+ if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
+ debug++;
+ else
+ if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout"))) {
+ noout++;
+ }
+ }
+ xmlLineNumbersDefault(1);
+ for (i = 1; i < argc ; i++) {
+ if (argv[i][0] != '-') {
+ if (schema == NULL) {
+ xmlSchemaParserCtxtPtr ctxt;
+
+ ctxt = xmlSchemaNewParserCtxt(argv[i]);
+ xmlSchemaSetParserErrors(ctxt,
+ (xmlSchemaValidityErrorFunc) fprintf,
+ (xmlSchemaValidityWarningFunc) fprintf,
+ stderr);
+ schema = xmlSchemaParse(ctxt);
+ xmlSchemaFreeParserCtxt(ctxt);
+ if (debug)
+ xmlSchemaDump(stdout, schema);
+ } else {
+ xmlDocPtr doc;
+
+ doc = xmlParseFile(argv[i]);
+
+ if (doc == NULL) {
+ fprintf(stderr, "Could not parse %s\n", argv[i]);
+ } else {
+ xmlSchemaValidCtxtPtr ctxt;
+ int ret;
+
+ ctxt = xmlSchemaNewValidCtxt(schema);
+ xmlSchemaSetValidErrors(ctxt,
+ (xmlSchemaValidityErrorFunc) fprintf,
+ (xmlSchemaValidityWarningFunc) fprintf,
+ stderr);
+ ret = xmlSchemaValidateDoc(ctxt, doc);
+ xmlSchemaFreeValidCtxt(ctxt);
+ xmlFreeDoc(doc);
+ }
+ }
+ files ++;
+ }
+ }
+ if (schema != NULL)
+ xmlSchemaFree(schema);
+ if (files == 0) {
+ printf("Usage : %s [--debug] [--noout] schemas XMLfiles ...\n",
+ argv[0]);
+ printf("\tParse the HTML files and output the result of the parsing\n");
+ printf("\t--debug : dump a debug tree of the in-memory document\n");
+ printf("\t--noout : do not print the result\n");
+ }
+ xmlSchemaCleanupTypes();
+ xmlCleanupParser();
+ xmlMemoryDump();
+
+ return(0);
+}
+
+#else
+#include <stdio.h>
+int main(int argc, char **argv) {
+ printf("%s : Schemas support not compiled in\n", argv[0]);
+ return(0);
+}
+#endif /* LIBXML_SCHEMAS_ENABLED */
diff --git a/xmlregexp.c b/xmlregexp.c
new file mode 100644
index 0000000..1139e15
--- /dev/null
+++ b/xmlregexp.c
@@ -0,0 +1,3470 @@
+/*
+ * regexp.c: generic and extensible Regular Expression engine
+ *
+ * Basically designed with the purpose of compiling regexps for
+ * the variety of validation/shemas mechanisms now available in
+ * XML related specifications thise includes:
+ * - XML-1.0 DTD validation
+ * - XML Schemas structure part 1
+ * - XML Schemas Datatypes part 2 especially Appendix F
+ * - RELAX-NG/TREX i.e. the counter proposal
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_REGEXP_ENABLED
+
+#include <stdio.h>
+#include <string.h>
+#include <libxml/tree.h>
+#include <libxml/parserInternals.h>
+#include <libxml/xmlregexp.h>
+#include <libxml/xmlautomata.h>
+#include <libxml/xmlunicode.h>
+
+/* #define DEBUG_REGEXP_GRAPH */
+/* #define DEBUG_REGEXP_EXEC */
+/* #define DEBUG_PUSH */
+
+#define ERROR(str) ctxt->error = 1; \
+ xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
+#define NEXT ctxt->cur++
+#define CUR (*(ctxt->cur))
+#define NXT(index) (ctxt->cur[index])
+
+#define CUR_SCHAR(s, l) xmlStringCurrentChar(NULL, s, &l)
+#define NEXTL(l) ctxt->cur += l;
+
+
+/************************************************************************
+ * *
+ * Datatypes and structures *
+ * *
+ ************************************************************************/
+
+typedef enum {
+ XML_REGEXP_EPSILON = 1,
+ XML_REGEXP_CHARVAL,
+ XML_REGEXP_RANGES,
+ XML_REGEXP_SUBREG,
+ XML_REGEXP_STRING,
+ XML_REGEXP_ANYCHAR, /* . */
+ XML_REGEXP_ANYSPACE, /* \s */
+ XML_REGEXP_NOTSPACE, /* \S */
+ XML_REGEXP_INITNAME, /* \l */
+ XML_REGEXP_NOTINITNAME, /* \l */
+ XML_REGEXP_NAMECHAR, /* \c */
+ XML_REGEXP_NOTNAMECHAR, /* \C */
+ XML_REGEXP_DECIMAL, /* \d */
+ XML_REGEXP_NOTDECIMAL, /* \d */
+ XML_REGEXP_REALCHAR, /* \w */
+ XML_REGEXP_NOTREALCHAR, /* \w */
+ XML_REGEXP_LETTER,
+ XML_REGEXP_LETTER_UPPERCASE,
+ XML_REGEXP_LETTER_LOWERCASE,
+ XML_REGEXP_LETTER_TITLECASE,
+ XML_REGEXP_LETTER_MODIFIER,
+ XML_REGEXP_LETTER_OTHERS,
+ XML_REGEXP_MARK,
+ XML_REGEXP_MARK_NONSPACING,
+ XML_REGEXP_MARK_SPACECOMBINING,
+ XML_REGEXP_MARK_ENCLOSING,
+ XML_REGEXP_NUMBER,
+ XML_REGEXP_NUMBER_DECIMAL,
+ XML_REGEXP_NUMBER_LETTER,
+ XML_REGEXP_NUMBER_OTHERS,
+ XML_REGEXP_PUNCT,
+ XML_REGEXP_PUNCT_CONNECTOR,
+ XML_REGEXP_PUNCT_DASH,
+ XML_REGEXP_PUNCT_OPEN,
+ XML_REGEXP_PUNCT_CLOSE,
+ XML_REGEXP_PUNCT_INITQUOTE,
+ XML_REGEXP_PUNCT_FINQUOTE,
+ XML_REGEXP_PUNCT_OTHERS,
+ XML_REGEXP_SEPAR,
+ XML_REGEXP_SEPAR_SPACE,
+ XML_REGEXP_SEPAR_LINE,
+ XML_REGEXP_SEPAR_PARA,
+ XML_REGEXP_SYMBOL,
+ XML_REGEXP_SYMBOL_MATH,
+ XML_REGEXP_SYMBOL_CURRENCY,
+ XML_REGEXP_SYMBOL_MODIFIER,
+ XML_REGEXP_SYMBOL_OTHERS,
+ XML_REGEXP_OTHER,
+ XML_REGEXP_OTHER_CONTROL,
+ XML_REGEXP_OTHER_FORMAT,
+ XML_REGEXP_OTHER_PRIVATE,
+ XML_REGEXP_OTHER_NA,
+ XML_REGEXP_BLOCK_NAME
+} xmlRegAtomType;
+
+typedef enum {
+ XML_REGEXP_QUANT_EPSILON = 1,
+ XML_REGEXP_QUANT_ONCE,
+ XML_REGEXP_QUANT_OPT,
+ XML_REGEXP_QUANT_MULT,
+ XML_REGEXP_QUANT_PLUS,
+ XML_REGEXP_QUANT_RANGE
+} xmlRegQuantType;
+
+typedef enum {
+ XML_REGEXP_START_STATE = 1,
+ XML_REGEXP_FINAL_STATE,
+ XML_REGEXP_TRANS_STATE
+} xmlRegStateType;
+
+typedef enum {
+ XML_REGEXP_MARK_NORMAL = 0,
+ XML_REGEXP_MARK_START,
+ XML_REGEXP_MARK_VISITED
+} xmlRegMarkedType;
+
+typedef struct _xmlRegRange xmlRegRange;
+typedef xmlRegRange *xmlRegRangePtr;
+
+struct _xmlRegRange {
+ int neg;
+ xmlRegAtomType type;
+ int start;
+ int end;
+ xmlChar *blockName;
+};
+
+typedef struct _xmlRegAtom xmlRegAtom;
+typedef xmlRegAtom *xmlRegAtomPtr;
+
+typedef struct _xmlAutomataState xmlRegState;
+typedef xmlRegState *xmlRegStatePtr;
+
+struct _xmlRegAtom {
+ int no;
+ xmlRegAtomType type;
+ xmlRegQuantType quant;
+ int min;
+ int max;
+
+ void *valuep;
+ int neg;
+ int codepoint;
+ xmlRegStatePtr start;
+ xmlRegStatePtr stop;
+ int maxRanges;
+ int nbRanges;
+ xmlRegRangePtr *ranges;
+ void *data;
+};
+
+typedef struct _xmlRegCounter xmlRegCounter;
+typedef xmlRegCounter *xmlRegCounterPtr;
+
+struct _xmlRegCounter {
+ int min;
+ int max;
+};
+
+typedef struct _xmlRegTrans xmlRegTrans;
+typedef xmlRegTrans *xmlRegTransPtr;
+
+struct _xmlRegTrans {
+ xmlRegAtomPtr atom;
+ int to;
+ int counter;
+ int count;
+};
+
+struct _xmlAutomataState {
+ xmlRegStateType type;
+ xmlRegMarkedType mark;
+ int no;
+
+ int maxTrans;
+ int nbTrans;
+ xmlRegTrans *trans;
+};
+
+typedef struct _xmlAutomata xmlRegParserCtxt;
+typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
+
+struct _xmlAutomata {
+ xmlChar *string;
+ xmlChar *cur;
+
+ int error;
+ int neg;
+
+ xmlRegStatePtr start;
+ xmlRegStatePtr end;
+ xmlRegStatePtr state;
+
+ xmlRegAtomPtr atom;
+
+ int maxAtoms;
+ int nbAtoms;
+ xmlRegAtomPtr *atoms;
+
+ int maxStates;
+ int nbStates;
+ xmlRegStatePtr *states;
+
+ int maxCounters;
+ int nbCounters;
+ xmlRegCounter *counters;
+};
+
+struct _xmlRegexp {
+ xmlChar *string;
+ int nbStates;
+ xmlRegStatePtr *states;
+ int nbAtoms;
+ xmlRegAtomPtr *atoms;
+ int nbCounters;
+ xmlRegCounter *counters;
+};
+
+typedef struct _xmlRegExecRollback xmlRegExecRollback;
+typedef xmlRegExecRollback *xmlRegExecRollbackPtr;
+
+struct _xmlRegExecRollback {
+ xmlRegStatePtr state;/* the current state */
+ int index; /* the index in the input stack */
+ int nextbranch; /* the next transition to explore in that state */
+ int *counts; /* save the automate state if it has some */
+};
+
+typedef struct _xmlRegInputToken xmlRegInputToken;
+typedef xmlRegInputToken *xmlRegInputTokenPtr;
+
+struct _xmlRegInputToken {
+ xmlChar *value;
+ void *data;
+};
+
+struct _xmlRegExecCtxt {
+ int status; /* execution status != 0 indicate an error */
+ int determinist; /* did we found an inderterministic behaviour */
+ xmlRegexpPtr comp; /* the compiled regexp */
+ xmlRegExecCallbacks callback;
+ void *data;
+
+ xmlRegStatePtr state;/* the current state */
+ int transno; /* the current transition on that state */
+ int transcount; /* the number of char in char counted transitions */
+
+ /*
+ * A stack of rollback states
+ */
+ int maxRollbacks;
+ int nbRollbacks;
+ xmlRegExecRollback *rollbacks;
+
+ /*
+ * The state of the automata if any
+ */
+ int *counts;
+
+ /*
+ * The input stack
+ */
+ int inputStackMax;
+ int inputStackNr;
+ int index;
+ int *charStack;
+ const xmlChar *inputString; /* when operating on characters */
+ xmlRegInputTokenPtr inputStack;/* when operating on strings */
+
+};
+
+static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
+
+/************************************************************************
+ * *
+ * Allocation/Deallocation *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlRegEpxFromParse:
+ * @ctxt: the parser context used to build it
+ *
+ * Allocate a new regexp and fill it with the reult from the parser
+ *
+ * Returns the new regexp or NULL in case of error
+ */
+static xmlRegexpPtr
+xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
+ xmlRegexpPtr ret;
+
+ ret = (xmlRegexpPtr) xmlMalloc(sizeof(xmlRegexp));
+ if (ret == NULL)
+ return(NULL);
+ memset(ret, 0, sizeof(xmlRegexp));
+ ret->string = ctxt->string;
+ ctxt->string = NULL;
+ ret->nbStates = ctxt->nbStates;
+ ctxt->nbStates = 0;
+ ret->states = ctxt->states;
+ ctxt->states = NULL;
+ ret->nbAtoms = ctxt->nbAtoms;
+ ctxt->nbAtoms = 0;
+ ret->atoms = ctxt->atoms;
+ ctxt->atoms = NULL;
+ ret->nbCounters = ctxt->nbCounters;
+ ctxt->nbCounters = 0;
+ ret->counters = ctxt->counters;
+ ctxt->counters = NULL;
+ return(ret);
+}
+
+/**
+ * xmlRegNewParserCtxt:
+ * @string: the string to parse
+ *
+ * Allocate a new regexp parser context
+ *
+ * Returns the new context or NULL in case of error
+ */
+static xmlRegParserCtxtPtr
+xmlRegNewParserCtxt(const xmlChar *string) {
+ xmlRegParserCtxtPtr ret;
+
+ ret = (xmlRegParserCtxtPtr) xmlMalloc(sizeof(xmlRegParserCtxt));
+ if (ret == NULL)
+ return(NULL);
+ memset(ret, 0, sizeof(xmlRegParserCtxt));
+ if (string != NULL)
+ ret->string = xmlStrdup(string);
+ ret->cur = ret->string;
+ ret->neg = 0;
+ ret->error = 0;
+ return(ret);
+}
+
+/**
+ * xmlRegNewRange:
+ * @ctxt: the regexp parser context
+ * @neg: is that negative
+ * @type: the type of range
+ * @start: the start codepoint
+ * @end: the end codepoint
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new range or NULL in case of error
+ */
+static xmlRegRangePtr
+xmlRegNewRange(xmlRegParserCtxtPtr ctxt,
+ int neg, xmlRegAtomType type, int start, int end) {
+ xmlRegRangePtr ret;
+
+ ret = (xmlRegRangePtr) xmlMalloc(sizeof(xmlRegRange));
+ if (ret == NULL) {
+ ERROR("failed to allocate regexp range");
+ return(NULL);
+ }
+ ret->neg = neg;
+ ret->type = type;
+ ret->start = start;
+ ret->end = end;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeRange:
+ * @range: the regexp range
+ *
+ * Free a regexp range
+ */
+static void
+xmlRegFreeRange(xmlRegRangePtr range) {
+ if (range == NULL)
+ return;
+
+ if (range->blockName != NULL)
+ xmlFree(range->blockName);
+ xmlFree(range);
+}
+
+/**
+ * xmlRegNewAtom:
+ * @ctxt: the regexp parser context
+ * @type: the type of atom
+ *
+ * Allocate a new regexp range
+ *
+ * Returns the new atom or NULL in case of error
+ */
+static xmlRegAtomPtr
+xmlRegNewAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomType type) {
+ xmlRegAtomPtr ret;
+
+ ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
+ if (ret == NULL) {
+ ERROR("failed to allocate regexp atom");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegAtom));
+ ret->type = type;
+ ret->quant = XML_REGEXP_QUANT_ONCE;
+ ret->min = 0;
+ ret->max = 0;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeAtom:
+ * @atom: the regexp atom
+ *
+ * Free a regexp atom
+ */
+static void
+xmlRegFreeAtom(xmlRegAtomPtr atom) {
+ int i;
+
+ if (atom == NULL)
+ return;
+
+ for (i = 0;i < atom->nbRanges;i++)
+ xmlRegFreeRange(atom->ranges[i]);
+ if (atom->ranges != NULL)
+ xmlFree(atom->ranges);
+ if (atom->type == XML_REGEXP_STRING)
+ xmlFree(atom->valuep);
+ xmlFree(atom);
+}
+
+static xmlRegStatePtr
+xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
+ xmlRegStatePtr ret;
+
+ ret = (xmlRegStatePtr) xmlMalloc(sizeof(xmlRegState));
+ if (ret == NULL) {
+ ERROR("failed to allocate regexp state");
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlRegState));
+ ret->type = XML_REGEXP_TRANS_STATE;
+ ret->mark = XML_REGEXP_MARK_NORMAL;
+ return(ret);
+}
+
+/**
+ * xmlRegFreeState:
+ * @state: the regexp state
+ *
+ * Free a regexp state
+ */
+static void
+xmlRegFreeState(xmlRegStatePtr state) {
+ if (state == NULL)
+ return;
+
+ if (state->trans != NULL)
+ xmlFree(state->trans);
+ xmlFree(state);
+}
+
+/**
+ * xmlRegFreeParserCtxt:
+ * @ctxt: the regexp parser context
+ *
+ * Free a regexp parser context
+ */
+static void
+xmlRegFreeParserCtxt(xmlRegParserCtxtPtr ctxt) {
+ int i;
+ if (ctxt == NULL)
+ return;
+
+ if (ctxt->string != NULL)
+ xmlFree(ctxt->string);
+ if (ctxt->states != NULL) {
+ for (i = 0;i < ctxt->nbStates;i++)
+ xmlRegFreeState(ctxt->states[i]);
+ xmlFree(ctxt->states);
+ }
+ if (ctxt->atoms != NULL) {
+ for (i = 0;i < ctxt->nbAtoms;i++)
+ xmlRegFreeAtom(ctxt->atoms[i]);
+ xmlFree(ctxt->atoms);
+ }
+ if (ctxt->counters != NULL)
+ xmlFree(ctxt->counters);
+ xmlFree(ctxt);
+}
+
+/************************************************************************
+ * *
+ * Display of Data structures *
+ * *
+ ************************************************************************/
+
+static void
+xmlRegPrintAtomType(FILE *output, xmlRegAtomType type) {
+ switch (type) {
+ case XML_REGEXP_EPSILON:
+ fprintf(output, "epsilon "); break;
+ case XML_REGEXP_CHARVAL:
+ fprintf(output, "charval "); break;
+ case XML_REGEXP_RANGES:
+ fprintf(output, "ranges "); break;
+ case XML_REGEXP_SUBREG:
+ fprintf(output, "subexpr "); break;
+ case XML_REGEXP_STRING:
+ fprintf(output, "string "); break;
+ case XML_REGEXP_ANYCHAR:
+ fprintf(output, "anychar "); break;
+ case XML_REGEXP_ANYSPACE:
+ fprintf(output, "anyspace "); break;
+ case XML_REGEXP_NOTSPACE:
+ fprintf(output, "notspace "); break;
+ case XML_REGEXP_INITNAME:
+ fprintf(output, "initname "); break;
+ case XML_REGEXP_NOTINITNAME:
+ fprintf(output, "notinitname "); break;
+ case XML_REGEXP_NAMECHAR:
+ fprintf(output, "namechar "); break;
+ case XML_REGEXP_NOTNAMECHAR:
+ fprintf(output, "notnamechar "); break;
+ case XML_REGEXP_DECIMAL:
+ fprintf(output, "decimal "); break;
+ case XML_REGEXP_NOTDECIMAL:
+ fprintf(output, "notdecimal "); break;
+ case XML_REGEXP_REALCHAR:
+ fprintf(output, "realchar "); break;
+ case XML_REGEXP_NOTREALCHAR:
+ fprintf(output, "notrealchar "); break;
+ case XML_REGEXP_LETTER:
+ fprintf(output, "LETTER "); break;
+ case XML_REGEXP_LETTER_UPPERCASE:
+ fprintf(output, "LETTER_UPPERCASE "); break;
+ case XML_REGEXP_LETTER_LOWERCASE:
+ fprintf(output, "LETTER_LOWERCASE "); break;
+ case XML_REGEXP_LETTER_TITLECASE:
+ fprintf(output, "LETTER_TITLECASE "); break;
+ case XML_REGEXP_LETTER_MODIFIER:
+ fprintf(output, "LETTER_MODIFIER "); break;
+ case XML_REGEXP_LETTER_OTHERS:
+ fprintf(output, "LETTER_OTHERS "); break;
+ case XML_REGEXP_MARK:
+ fprintf(output, "MARK "); break;
+ case XML_REGEXP_MARK_NONSPACING:
+ fprintf(output, "MARK_NONSPACING "); break;
+ case XML_REGEXP_MARK_SPACECOMBINING:
+ fprintf(output, "MARK_SPACECOMBINING "); break;
+ case XML_REGEXP_MARK_ENCLOSING:
+ fprintf(output, "MARK_ENCLOSING "); break;
+ case XML_REGEXP_NUMBER:
+ fprintf(output, "NUMBER "); break;
+ case XML_REGEXP_NUMBER_DECIMAL:
+ fprintf(output, "NUMBER_DECIMAL "); break;
+ case XML_REGEXP_NUMBER_LETTER:
+ fprintf(output, "NUMBER_LETTER "); break;
+ case XML_REGEXP_NUMBER_OTHERS:
+ fprintf(output, "NUMBER_OTHERS "); break;
+ case XML_REGEXP_PUNCT:
+ fprintf(output, "PUNCT "); break;
+ case XML_REGEXP_PUNCT_CONNECTOR:
+ fprintf(output, "PUNCT_CONNECTOR "); break;
+ case XML_REGEXP_PUNCT_DASH:
+ fprintf(output, "PUNCT_DASH "); break;
+ case XML_REGEXP_PUNCT_OPEN:
+ fprintf(output, "PUNCT_OPEN "); break;
+ case XML_REGEXP_PUNCT_CLOSE:
+ fprintf(output, "PUNCT_CLOSE "); break;
+ case XML_REGEXP_PUNCT_INITQUOTE:
+ fprintf(output, "PUNCT_INITQUOTE "); break;
+ case XML_REGEXP_PUNCT_FINQUOTE:
+ fprintf(output, "PUNCT_FINQUOTE "); break;
+ case XML_REGEXP_PUNCT_OTHERS:
+ fprintf(output, "PUNCT_OTHERS "); break;
+ case XML_REGEXP_SEPAR:
+ fprintf(output, "SEPAR "); break;
+ case XML_REGEXP_SEPAR_SPACE:
+ fprintf(output, "SEPAR_SPACE "); break;
+ case XML_REGEXP_SEPAR_LINE:
+ fprintf(output, "SEPAR_LINE "); break;
+ case XML_REGEXP_SEPAR_PARA:
+ fprintf(output, "SEPAR_PARA "); break;
+ case XML_REGEXP_SYMBOL:
+ fprintf(output, "SYMBOL "); break;
+ case XML_REGEXP_SYMBOL_MATH:
+ fprintf(output, "SYMBOL_MATH "); break;
+ case XML_REGEXP_SYMBOL_CURRENCY:
+ fprintf(output, "SYMBOL_CURRENCY "); break;
+ case XML_REGEXP_SYMBOL_MODIFIER:
+ fprintf(output, "SYMBOL_MODIFIER "); break;
+ case XML_REGEXP_SYMBOL_OTHERS:
+ fprintf(output, "SYMBOL_OTHERS "); break;
+ case XML_REGEXP_OTHER:
+ fprintf(output, "OTHER "); break;
+ case XML_REGEXP_OTHER_CONTROL:
+ fprintf(output, "OTHER_CONTROL "); break;
+ case XML_REGEXP_OTHER_FORMAT:
+ fprintf(output, "OTHER_FORMAT "); break;
+ case XML_REGEXP_OTHER_PRIVATE:
+ fprintf(output, "OTHER_PRIVATE "); break;
+ case XML_REGEXP_OTHER_NA:
+ fprintf(output, "OTHER_NA "); break;
+ case XML_REGEXP_BLOCK_NAME:
+ fprintf(output, "BLOCK "); break;
+ }
+}
+
+static void
+xmlRegPrintQuantType(FILE *output, xmlRegQuantType type) {
+ switch (type) {
+ case XML_REGEXP_QUANT_EPSILON:
+ fprintf(output, "epsilon "); break;
+ case XML_REGEXP_QUANT_ONCE:
+ fprintf(output, "once "); break;
+ case XML_REGEXP_QUANT_OPT:
+ fprintf(output, "? "); break;
+ case XML_REGEXP_QUANT_MULT:
+ fprintf(output, "* "); break;
+ case XML_REGEXP_QUANT_PLUS:
+ fprintf(output, "+ "); break;
+ case XML_REGEXP_QUANT_RANGE:
+ fprintf(output, "range "); break;
+ }
+}
+static void
+xmlRegPrintRange(FILE *output, xmlRegRangePtr range) {
+ fprintf(output, " range: ");
+ if (range->neg)
+ fprintf(output, "negative ");
+ xmlRegPrintAtomType(output, range->type);
+ fprintf(output, "%c - %c\n", range->start, range->end);
+}
+
+static void
+xmlRegPrintAtom(FILE *output, xmlRegAtomPtr atom) {
+ fprintf(output, " atom: ");
+ if (atom == NULL) {
+ fprintf(output, "NULL\n");
+ return;
+ }
+ xmlRegPrintAtomType(output, atom->type);
+ xmlRegPrintQuantType(output, atom->quant);
+ if (atom->quant == XML_REGEXP_QUANT_RANGE)
+ fprintf(output, "%d-%d ", atom->min, atom->max);
+ if (atom->type == XML_REGEXP_STRING)
+ fprintf(output, "'%s' ", (char *) atom->valuep);
+ if (atom->type == XML_REGEXP_CHARVAL)
+ fprintf(output, "char %c\n", atom->codepoint);
+ else if (atom->type == XML_REGEXP_RANGES) {
+ int i;
+ fprintf(output, "%d entries\n", atom->nbRanges);
+ for (i = 0; i < atom->nbRanges;i++)
+ xmlRegPrintRange(output, atom->ranges[i]);
+ } else if (atom->type == XML_REGEXP_SUBREG) {
+ fprintf(output, "start %d end %d\n", atom->start->no, atom->stop->no);
+ } else {
+ fprintf(output, "\n");
+ }
+}
+
+static void
+xmlRegPrintTrans(FILE *output, xmlRegTransPtr trans) {
+ fprintf(output, " trans: ");
+ if (trans == NULL) {
+ fprintf(output, "NULL\n");
+ return;
+ }
+ if (trans->to < 0) {
+ fprintf(output, "removed\n");
+ return;
+ }
+ if (trans->counter >= 0) {
+ fprintf(output, "counted %d, ", trans->counter);
+ }
+ if (trans->count >= 0) {
+ fprintf(output, "count based %d, ", trans->count);
+ }
+ if (trans->atom == NULL) {
+ fprintf(output, "epsilon to %d\n", trans->to);
+ return;
+ }
+ if (trans->atom->type == XML_REGEXP_CHARVAL)
+ fprintf(output, "char %c ", trans->atom->codepoint);
+ fprintf(output, "atom %d, to %d\n", trans->atom->no, trans->to);
+}
+
+static void
+xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
+ int i;
+
+ fprintf(output, " state: ");
+ if (state == NULL) {
+ fprintf(output, "NULL\n");
+ return;
+ }
+ if (state->type == XML_REGEXP_START_STATE)
+ fprintf(output, "START ");
+ if (state->type == XML_REGEXP_FINAL_STATE)
+ fprintf(output, "FINAL ");
+
+ fprintf(output, "%d, %d transitions:\n", state->no, state->nbTrans);
+ for (i = 0;i < state->nbTrans; i++) {
+ xmlRegPrintTrans(output, &(state->trans[i]));
+ }
+}
+
+#if 0
+static void
+xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
+ int i;
+
+ fprintf(output, " ctxt: ");
+ if (ctxt == NULL) {
+ fprintf(output, "NULL\n");
+ return;
+ }
+ fprintf(output, "'%s' ", ctxt->string);
+ if (ctxt->error)
+ fprintf(output, "error ");
+ if (ctxt->neg)
+ fprintf(output, "neg ");
+ fprintf(output, "\n");
+ fprintf(output, "%d atoms:\n", ctxt->nbAtoms);
+ for (i = 0;i < ctxt->nbAtoms; i++) {
+ fprintf(output, " %02d ", i);
+ xmlRegPrintAtom(output, ctxt->atoms[i]);
+ }
+ if (ctxt->atom != NULL) {
+ fprintf(output, "current atom:\n");
+ xmlRegPrintAtom(output, ctxt->atom);
+ }
+ fprintf(output, "%d states:", ctxt->nbStates);
+ if (ctxt->start != NULL)
+ fprintf(output, " start: %d", ctxt->start->no);
+ if (ctxt->end != NULL)
+ fprintf(output, " end: %d", ctxt->end->no);
+ fprintf(output, "\n");
+ for (i = 0;i < ctxt->nbStates; i++) {
+ xmlRegPrintState(output, ctxt->states[i]);
+ }
+ fprintf(output, "%d counters:\n", ctxt->nbCounters);
+ for (i = 0;i < ctxt->nbCounters; i++) {
+ fprintf(output, " %d: min %d max %d\n", i, ctxt->counters[i].min,
+ ctxt->counters[i].max);
+ }
+}
+#endif
+
+/************************************************************************
+ * *
+ * Finite Automata structures manipulations *
+ * *
+ ************************************************************************/
+
+static void
+xmlRegAtomAddRange(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom,
+ int neg, xmlRegAtomType type, int start, int end,
+ xmlChar *blockName) {
+ xmlRegRangePtr range;
+
+ if (atom == NULL) {
+ ERROR("add range: atom is NULL");
+ return;
+ }
+ if (atom->type != XML_REGEXP_RANGES) {
+ ERROR("add range: atom is not ranges");
+ return;
+ }
+ if (atom->maxRanges == 0) {
+ atom->maxRanges = 4;
+ atom->ranges = (xmlRegRangePtr *) xmlMalloc(atom->maxRanges *
+ sizeof(xmlRegRangePtr));
+ if (atom->ranges == NULL) {
+ ERROR("add range: allocation failed");
+ atom->maxRanges = 0;
+ return;
+ }
+ } else if (atom->nbRanges >= atom->maxRanges) {
+ xmlRegRangePtr *tmp;
+ atom->maxRanges *= 2;
+ tmp = (xmlRegRangePtr *) xmlRealloc(atom->ranges, atom->maxRanges *
+ sizeof(xmlRegRangePtr));
+ if (tmp == NULL) {
+ ERROR("add range: allocation failed");
+ atom->maxRanges /= 2;
+ return;
+ }
+ atom->ranges = tmp;
+ }
+ range = xmlRegNewRange(ctxt, neg, type, start, end);
+ if (range == NULL)
+ return;
+ range->blockName = blockName;
+ atom->ranges[atom->nbRanges++] = range;
+
+}
+
+static int
+xmlRegGetCounter(xmlRegParserCtxtPtr ctxt) {
+ if (ctxt->maxCounters == 0) {
+ ctxt->maxCounters = 4;
+ ctxt->counters = (xmlRegCounter *) xmlMalloc(ctxt->maxCounters *
+ sizeof(xmlRegCounter));
+ if (ctxt->counters == NULL) {
+ ERROR("reg counter: allocation failed");
+ ctxt->maxCounters = 0;
+ return(-1);
+ }
+ } else if (ctxt->nbCounters >= ctxt->maxCounters) {
+ xmlRegCounter *tmp;
+ ctxt->maxCounters *= 2;
+ tmp = (xmlRegCounter *) xmlRealloc(ctxt->counters, ctxt->maxCounters *
+ sizeof(xmlRegCounter));
+ if (tmp == NULL) {
+ ERROR("reg counter: allocation failed");
+ ctxt->maxCounters /= 2;
+ return(-1);
+ }
+ ctxt->counters = tmp;
+ }
+ ctxt->counters[ctxt->nbCounters].min = -1;
+ ctxt->counters[ctxt->nbCounters].max = -1;
+ return(ctxt->nbCounters++);
+}
+
+static void
+xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
+ if (atom == NULL) {
+ ERROR("atom push: atom is NULL");
+ return;
+ }
+ if (ctxt->maxAtoms == 0) {
+ ctxt->maxAtoms = 4;
+ ctxt->atoms = (xmlRegAtomPtr *) xmlMalloc(ctxt->maxAtoms *
+ sizeof(xmlRegAtomPtr));
+ if (ctxt->atoms == NULL) {
+ ERROR("atom push: allocation failed");
+ ctxt->maxAtoms = 0;
+ return;
+ }
+ } else if (ctxt->nbAtoms >= ctxt->maxAtoms) {
+ xmlRegAtomPtr *tmp;
+ ctxt->maxAtoms *= 2;
+ tmp = (xmlRegAtomPtr *) xmlRealloc(ctxt->atoms, ctxt->maxAtoms *
+ sizeof(xmlRegAtomPtr));
+ if (tmp == NULL) {
+ ERROR("atom push: allocation failed");
+ ctxt->maxAtoms /= 2;
+ return;
+ }
+ ctxt->atoms = tmp;
+ }
+ atom->no = ctxt->nbAtoms;
+ ctxt->atoms[ctxt->nbAtoms++] = atom;
+}
+
+static void
+xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
+ xmlRegAtomPtr atom, xmlRegStatePtr target,
+ int counter, int count) {
+ if (state == NULL) {
+ ERROR("add state: state is NULL");
+ return;
+ }
+ if (target == NULL) {
+ ERROR("add state: target is NULL");
+ return;
+ }
+ if (state->maxTrans == 0) {
+ state->maxTrans = 4;
+ state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
+ sizeof(xmlRegTrans));
+ if (state->trans == NULL) {
+ ERROR("add range: allocation failed");
+ state->maxTrans = 0;
+ return;
+ }
+ } else if (state->nbTrans >= state->maxTrans) {
+ xmlRegTrans *tmp;
+ state->maxTrans *= 2;
+ tmp = (xmlRegTrans *) xmlRealloc(state->trans, state->maxTrans *
+ sizeof(xmlRegTrans));
+ if (tmp == NULL) {
+ ERROR("add range: allocation failed");
+ state->maxTrans /= 2;
+ return;
+ }
+ state->trans = tmp;
+ }
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Add trans from %d to %d ", state->no, target->no);
+ if (count >= 0)
+ printf("count based %d", count);
+ else if (counter >= 0)
+ printf("counted %d", counter);
+ else if (atom == NULL)
+ printf("epsilon transition");
+ printf("\n");
+#endif
+
+ state->trans[state->nbTrans].atom = atom;
+ state->trans[state->nbTrans].to = target->no;
+ state->trans[state->nbTrans].counter = counter;
+ state->trans[state->nbTrans].count = count;
+ state->nbTrans++;
+}
+
+static void
+xmlRegStatePush(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
+ if (ctxt->maxStates == 0) {
+ ctxt->maxStates = 4;
+ ctxt->states = (xmlRegStatePtr *) xmlMalloc(ctxt->maxStates *
+ sizeof(xmlRegStatePtr));
+ if (ctxt->states == NULL) {
+ ERROR("add range: allocation failed");
+ ctxt->maxStates = 0;
+ return;
+ }
+ } else if (ctxt->nbStates >= ctxt->maxStates) {
+ xmlRegStatePtr *tmp;
+ ctxt->maxStates *= 2;
+ tmp = (xmlRegStatePtr *) xmlRealloc(ctxt->states, ctxt->maxStates *
+ sizeof(xmlRegStatePtr));
+ if (tmp == NULL) {
+ ERROR("add range: allocation failed");
+ ctxt->maxStates /= 2;
+ return;
+ }
+ ctxt->states = tmp;
+ }
+ state->no = ctxt->nbStates;
+ ctxt->states[ctxt->nbStates++] = state;
+}
+
+/**
+ * xmlFAGenerateEpsilonTransition:
+ * ctxt: a regexp parser context
+ * from: the from state
+ * to: the target state or NULL for building a new one
+ *
+ */
+static void
+xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1);
+}
+
+/**
+ * xmlFAGenerateCountedEpsilonTransition:
+ * ctxt: a regexp parser context
+ * from: the from state
+ * to: the target state or NULL for building a new one
+ * counter: the counter for that transition
+ *
+ */
+static void
+xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1);
+}
+
+/**
+ * xmlFAGenerateCountedTransition:
+ * ctxt: a regexp parser context
+ * from: the from state
+ * to: the target state or NULL for building a new one
+ * counter: the counter for that transition
+ *
+ */
+static void
+xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to, int counter) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter);
+}
+
+/**
+ * xmlFAGenerateTransitions:
+ * ctxt: a regexp parser context
+ * from: the from state
+ * to: the target state or NULL for building a new one
+ * atom: the atom generating the transition
+ *
+ */
+static void
+xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
+ xmlRegStatePtr to, xmlRegAtomPtr atom) {
+ if (atom == NULL) {
+ ERROR("genrate transition: atom == NULL");
+ return;
+ }
+ if (atom->type == XML_REGEXP_SUBREG) {
+ /*
+ * this is a subexpression handling one should not need to
+ * create a new node excep for XML_REGEXP_QUANT_RANGE.
+ */
+ xmlRegAtomPush(ctxt, atom);
+ if ((to != NULL) && (atom->stop != to) &&
+ (atom->quant != XML_REGEXP_QUANT_RANGE)) {
+ /*
+ * Generate an epsilon transition to link to the target
+ */
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, to);
+ }
+ switch (atom->quant) {
+ case XML_REGEXP_QUANT_OPT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
+ break;
+ case XML_REGEXP_QUANT_MULT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, atom->start, atom->stop);
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
+ break;
+ case XML_REGEXP_QUANT_PLUS:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, atom->stop, atom->start);
+ break;
+ case XML_REGEXP_QUANT_RANGE: {
+ int counter;
+ xmlRegStatePtr newstate;
+
+ /*
+ * This one is nasty:
+ * 1/ register a new counter
+ * 2/ register an epsilon transition associated to
+ * this counter going from atom->stop to atom->start
+ * 3/ create a new state
+ * 4/ generate a counted transition from atom->stop to
+ * that state
+ */
+ counter = xmlRegGetCounter(ctxt);
+ ctxt->counters[counter].min = atom->min - 1;
+ ctxt->counters[counter].max = atom->max - 1;
+ atom->min = 0;
+ atom->max = 0;
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
+ atom->start, counter);
+ if (to != NULL) {
+ newstate = to;
+ } else {
+ newstate = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, newstate);
+ ctxt->state = newstate;
+ }
+ xmlFAGenerateCountedTransition(ctxt, atom->stop,
+ newstate, counter);
+ }
+ default:
+ break;
+ }
+ return;
+ } else {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ }
+ xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
+ xmlRegAtomPush(ctxt, atom);
+ ctxt->state = to;
+ }
+ switch (atom->quant) {
+ case XML_REGEXP_QUANT_OPT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, from, to);
+ break;
+ case XML_REGEXP_QUANT_MULT:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlFAGenerateEpsilonTransition(ctxt, from, to);
+ xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
+ break;
+ case XML_REGEXP_QUANT_PLUS:
+ atom->quant = XML_REGEXP_QUANT_ONCE;
+ xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * xmlFAReduceEpsilonTransitions:
+ * ctxt: a regexp parser context
+ * @fromnr: the from state
+ * @tonr: the to state
+ * @cpunter: should that transition be associted to a counted
+ *
+ */
+static void
+xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
+ int tonr, int counter) {
+ int transnr;
+ xmlRegStatePtr from;
+ xmlRegStatePtr to;
+
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("xmlFAReduceEpsilonTransitions(%d, %d)\n", fromnr, tonr);
+#endif
+ from = ctxt->states[fromnr];
+ if (from == NULL)
+ return;
+ to = ctxt->states[tonr];
+ if (to == NULL)
+ return;
+ if ((to->mark == XML_REGEXP_MARK_START) ||
+ (to->mark == XML_REGEXP_MARK_VISITED))
+ return;
+
+ to->mark = XML_REGEXP_MARK_VISITED;
+ if (to->type == XML_REGEXP_FINAL_STATE) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("State %d is final, so %d becomes final\n", tonr, fromnr);
+#endif
+ from->type = XML_REGEXP_FINAL_STATE;
+ }
+ for (transnr = 0;transnr < to->nbTrans;transnr++) {
+ if (to->trans[transnr].atom == NULL) {
+ /*
+ * Don't remove counted transitions
+ * Don't loop either
+ */
+ if ((to->trans[transnr].count < 0) &&
+ (to->trans[transnr].to != fromnr)) {
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Found epsilon trans %d from %d to %d\n",
+ transnr, tonr, to->trans[transnr].to);
+#endif
+ xmlFAReduceEpsilonTransitions(ctxt, fromnr,
+ to->trans[transnr].to, counter);
+ }
+ } else {
+ int newto = to->trans[transnr].to;
+
+ xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
+ ctxt->states[newto], counter, -1);
+ }
+ }
+ to->mark = XML_REGEXP_MARK_NORMAL;
+}
+
+/**
+ * xmlFAEliminateEpsilonTransitions:
+ * ctxt: a regexp parser context
+ *
+ */
+static void
+xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
+ int statenr, transnr;
+ xmlRegStatePtr state;
+
+ /*
+ * build the completed transitions bypassing the epsilons
+ * Use a marking algorithm to avoid loops
+ */
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ if ((state->trans[transnr].atom == NULL) &&
+ (state->trans[transnr].to >= 0)) {
+ if (state->trans[transnr].to == statenr) {
+ state->trans[transnr].to = -1;
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Removed loopback epsilon trans %d on %d\n",
+ transnr, statenr);
+#endif
+ } else if (state->trans[transnr].count < 0) {
+ int newto = state->trans[transnr].to;
+
+#ifdef DEBUG_REGEXP_GRAPH
+ printf("Found epsilon trans %d from %d to %d\n",
+ transnr, statenr, newto);
+#endif
+ state->mark = XML_REGEXP_MARK_START;
+ xmlFAReduceEpsilonTransitions(ctxt, statenr,
+ newto, state->trans[transnr].counter);
+ state->mark = XML_REGEXP_MARK_NORMAL;
+#ifdef DEBUG_REGEXP_GRAPH
+ } else {
+ printf("Found counted transition %d on %d\n",
+ transnr, statenr);
+#endif
+ }
+ }
+ }
+ }
+ /*
+ * Eliminate the epsilon transitions
+ */
+ for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
+ state = ctxt->states[statenr];
+ if (state == NULL)
+ continue;
+ for (transnr = 0;transnr < state->nbTrans;transnr++) {
+ if ((state->trans[transnr].atom == NULL) &&
+ (state->trans[transnr].count < 0) &&
+ (state->trans[transnr].to >= 0)) {
+ state->trans[transnr].to = -1;
+ }
+ }
+ }
+}
+
+/************************************************************************
+ * *
+ * Routines to check input against transition atoms *
+ * *
+ ************************************************************************/
+
+static int
+xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint, int neg,
+ int start, int end, const xmlChar *blockName) {
+ int ret = 0;
+
+ switch (type) {
+ case XML_REGEXP_STRING:
+ case XML_REGEXP_SUBREG:
+ case XML_REGEXP_RANGES:
+ case XML_REGEXP_EPSILON:
+ return(-1);
+ case XML_REGEXP_ANYCHAR:
+ ret = ((codepoint != '\n') && (codepoint != '\r'));
+ break;
+ case XML_REGEXP_CHARVAL:
+ ret = ((codepoint >= start) && (codepoint <= end));
+ break;
+ case XML_REGEXP_NOTSPACE:
+ neg = !neg;
+ case XML_REGEXP_ANYSPACE:
+ ret = ((codepoint == '\n') || (codepoint == '\r') ||
+ (codepoint == '\t') || (codepoint == ' '));
+ break;
+ case XML_REGEXP_NOTINITNAME:
+ neg = !neg;
+ case XML_REGEXP_INITNAME:
+ ret = (xmlIsLetter(codepoint) ||
+ (codepoint == '_') || (codepoint == ':'));
+ break;
+ case XML_REGEXP_NOTNAMECHAR:
+ neg = !neg;
+ case XML_REGEXP_NAMECHAR:
+ ret = (xmlIsLetter(codepoint) || xmlIsDigit(codepoint) ||
+ (codepoint == '.') || (codepoint == '-') ||
+ (codepoint == '_') || (codepoint == ':') ||
+ xmlIsCombining(codepoint) || xmlIsExtender(codepoint));
+ break;
+ case XML_REGEXP_NOTDECIMAL:
+ neg = !neg;
+ case XML_REGEXP_DECIMAL:
+ ret = xmlUCSIsCatNd(codepoint);
+ break;
+ case XML_REGEXP_REALCHAR:
+ neg = !neg;
+ case XML_REGEXP_NOTREALCHAR:
+ ret = xmlUCSIsCatP(codepoint);
+ if (ret == 0)
+ ret = xmlUCSIsCatZ(codepoint);
+ if (ret == 0)
+ ret = xmlUCSIsCatC(codepoint);
+ break;
+ case XML_REGEXP_LETTER:
+ ret = xmlUCSIsCatL(codepoint);
+ break;
+ case XML_REGEXP_LETTER_UPPERCASE:
+ ret = xmlUCSIsCatLu(codepoint);
+ break;
+ case XML_REGEXP_LETTER_LOWERCASE:
+ ret = xmlUCSIsCatLl(codepoint);
+ break;
+ case XML_REGEXP_LETTER_TITLECASE:
+ ret = xmlUCSIsCatLt(codepoint);
+ break;
+ case XML_REGEXP_LETTER_MODIFIER:
+ ret = xmlUCSIsCatLm(codepoint);
+ break;
+ case XML_REGEXP_LETTER_OTHERS:
+ ret = xmlUCSIsCatLo(codepoint);
+ break;
+ case XML_REGEXP_MARK:
+ ret = xmlUCSIsCatM(codepoint);
+ break;
+ case XML_REGEXP_MARK_NONSPACING:
+ ret = xmlUCSIsCatMn(codepoint);
+ break;
+ case XML_REGEXP_MARK_SPACECOMBINING:
+ ret = xmlUCSIsCatMc(codepoint);
+ break;
+ case XML_REGEXP_MARK_ENCLOSING:
+ ret = xmlUCSIsCatMe(codepoint);
+ break;
+ case XML_REGEXP_NUMBER:
+ ret = xmlUCSIsCatN(codepoint);
+ break;
+ case XML_REGEXP_NUMBER_DECIMAL:
+ ret = xmlUCSIsCatNd(codepoint);
+ break;
+ case XML_REGEXP_NUMBER_LETTER:
+ ret = xmlUCSIsCatNl(codepoint);
+ break;
+ case XML_REGEXP_NUMBER_OTHERS:
+ ret = xmlUCSIsCatNo(codepoint);
+ break;
+ case XML_REGEXP_PUNCT:
+ ret = xmlUCSIsCatP(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_CONNECTOR:
+ ret = xmlUCSIsCatPc(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_DASH:
+ ret = xmlUCSIsCatPd(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_OPEN:
+ ret = xmlUCSIsCatPs(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_CLOSE:
+ ret = xmlUCSIsCatPe(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_INITQUOTE:
+ ret = xmlUCSIsCatPi(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_FINQUOTE:
+ ret = xmlUCSIsCatPf(codepoint);
+ break;
+ case XML_REGEXP_PUNCT_OTHERS:
+ ret = xmlUCSIsCatPo(codepoint);
+ break;
+ case XML_REGEXP_SEPAR:
+ ret = xmlUCSIsCatZ(codepoint);
+ break;
+ case XML_REGEXP_SEPAR_SPACE:
+ ret = xmlUCSIsCatZs(codepoint);
+ break;
+ case XML_REGEXP_SEPAR_LINE:
+ ret = xmlUCSIsCatZl(codepoint);
+ break;
+ case XML_REGEXP_SEPAR_PARA:
+ ret = xmlUCSIsCatZp(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL:
+ ret = xmlUCSIsCatS(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_MATH:
+ ret = xmlUCSIsCatSm(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_CURRENCY:
+ ret = xmlUCSIsCatSc(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_MODIFIER:
+ ret = xmlUCSIsCatSk(codepoint);
+ break;
+ case XML_REGEXP_SYMBOL_OTHERS:
+ ret = xmlUCSIsCatSo(codepoint);
+ break;
+ case XML_REGEXP_OTHER:
+ ret = xmlUCSIsCatC(codepoint);
+ break;
+ case XML_REGEXP_OTHER_CONTROL:
+ ret = xmlUCSIsCatCc(codepoint);
+ break;
+ case XML_REGEXP_OTHER_FORMAT:
+ ret = xmlUCSIsCatCf(codepoint);
+ break;
+ case XML_REGEXP_OTHER_PRIVATE:
+ ret = xmlUCSIsCatCo(codepoint);
+ break;
+ case XML_REGEXP_OTHER_NA:
+ /* ret = xmlUCSIsCatCn(codepoint); */
+ /* Seems it doesn't exist anymore in recent Unicode releases */
+ ret = 0;
+ break;
+ case XML_REGEXP_BLOCK_NAME:
+ ret = xmlUCSIsBlock(codepoint, (const char *) blockName);
+ break;
+ }
+ if (neg)
+ return(!ret);
+ return(ret);
+}
+
+static int
+xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint) {
+ int i, ret = 0;
+ xmlRegRangePtr range;
+
+ if ((atom == NULL) || (!xmlIsChar(codepoint)))
+ return(-1);
+
+ switch (atom->type) {
+ case XML_REGEXP_SUBREG:
+ case XML_REGEXP_EPSILON:
+ return(-1);
+ case XML_REGEXP_CHARVAL:
+ return(codepoint == atom->codepoint);
+ case XML_REGEXP_RANGES: {
+ int accept = 0;
+ for (i = 0;i < atom->nbRanges;i++) {
+ range = atom->ranges[i];
+ if (range->neg) {
+ ret = xmlRegCheckCharacterRange(range->type, codepoint,
+ 0, range->start, range->end,
+ range->blockName);
+ if (ret != 0)
+ return(0); /* excluded char */
+ } else {
+ ret = xmlRegCheckCharacterRange(range->type, codepoint,
+ 0, range->start, range->end,
+ range->blockName);
+ if (ret != 0)
+ accept = 1; /* might still be excluded */
+ }
+ }
+ return(accept);
+ }
+ case XML_REGEXP_STRING:
+ printf("TODO: XML_REGEXP_STRING\n");
+ return(-1);
+ case XML_REGEXP_ANYCHAR:
+ case XML_REGEXP_ANYSPACE:
+ case XML_REGEXP_NOTSPACE:
+ case XML_REGEXP_INITNAME:
+ case XML_REGEXP_NOTINITNAME:
+ case XML_REGEXP_NAMECHAR:
+ case XML_REGEXP_NOTNAMECHAR:
+ case XML_REGEXP_DECIMAL:
+ case XML_REGEXP_NOTDECIMAL:
+ case XML_REGEXP_REALCHAR:
+ case XML_REGEXP_NOTREALCHAR:
+ case XML_REGEXP_LETTER:
+ case XML_REGEXP_LETTER_UPPERCASE:
+ case XML_REGEXP_LETTER_LOWERCASE:
+ case XML_REGEXP_LETTER_TITLECASE:
+ case XML_REGEXP_LETTER_MODIFIER:
+ case XML_REGEXP_LETTER_OTHERS:
+ case XML_REGEXP_MARK:
+ case XML_REGEXP_MARK_NONSPACING:
+ case XML_REGEXP_MARK_SPACECOMBINING:
+ case XML_REGEXP_MARK_ENCLOSING:
+ case XML_REGEXP_NUMBER:
+ case XML_REGEXP_NUMBER_DECIMAL:
+ case XML_REGEXP_NUMBER_LETTER:
+ case XML_REGEXP_NUMBER_OTHERS:
+ case XML_REGEXP_PUNCT:
+ case XML_REGEXP_PUNCT_CONNECTOR:
+ case XML_REGEXP_PUNCT_DASH:
+ case XML_REGEXP_PUNCT_OPEN:
+ case XML_REGEXP_PUNCT_CLOSE:
+ case XML_REGEXP_PUNCT_INITQUOTE:
+ case XML_REGEXP_PUNCT_FINQUOTE:
+ case XML_REGEXP_PUNCT_OTHERS:
+ case XML_REGEXP_SEPAR:
+ case XML_REGEXP_SEPAR_SPACE:
+ case XML_REGEXP_SEPAR_LINE:
+ case XML_REGEXP_SEPAR_PARA:
+ case XML_REGEXP_SYMBOL:
+ case XML_REGEXP_SYMBOL_MATH:
+ case XML_REGEXP_SYMBOL_CURRENCY:
+ case XML_REGEXP_SYMBOL_MODIFIER:
+ case XML_REGEXP_SYMBOL_OTHERS:
+ case XML_REGEXP_OTHER:
+ case XML_REGEXP_OTHER_CONTROL:
+ case XML_REGEXP_OTHER_FORMAT:
+ case XML_REGEXP_OTHER_PRIVATE:
+ case XML_REGEXP_OTHER_NA:
+ case XML_REGEXP_BLOCK_NAME:
+ ret = xmlRegCheckCharacterRange(atom->type, codepoint, 0, 0, 0,
+ (const xmlChar *)atom->valuep);
+ if (atom->neg)
+ ret = !ret;
+ break;
+ }
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * Saving an restoring state of an execution context *
+ * *
+ ************************************************************************/
+
+#ifdef DEBUG_REGEXP_EXEC
+static void
+xmlFARegDebugExec(xmlRegExecCtxtPtr exec) {
+ printf("state: %d:%d:idx %d", exec->state->no, exec->transno, exec->index);
+ if (exec->inputStack != NULL) {
+ int i;
+ printf(": ");
+ for (i = 0;(i < 3) && (i < exec->inputStackNr);i++)
+ printf("%s ", exec->inputStack[exec->inputStackNr - (i + 1)]);
+ } else {
+ printf(": %s", &(exec->inputString[exec->index]));
+ }
+ printf("\n");
+}
+#endif
+
+static void
+xmlFARegExecSave(xmlRegExecCtxtPtr exec) {
+#ifdef DEBUG_REGEXP_EXEC
+ printf("saving ");
+ exec->transno++;
+ xmlFARegDebugExec(exec);
+ exec->transno--;
+#endif
+
+ if (exec->maxRollbacks == 0) {
+ exec->maxRollbacks = 4;
+ exec->rollbacks = (xmlRegExecRollback *) xmlMalloc(exec->maxRollbacks *
+ sizeof(xmlRegExecRollback));
+ if (exec->rollbacks == NULL) {
+ fprintf(stderr, "exec save: allocation failed");
+ exec->maxRollbacks = 0;
+ return;
+ }
+ memset(exec->rollbacks, 0,
+ exec->maxRollbacks * sizeof(xmlRegExecRollback));
+ } else if (exec->nbRollbacks >= exec->maxRollbacks) {
+ xmlRegExecRollback *tmp;
+ int len = exec->maxRollbacks;
+
+ exec->maxRollbacks *= 2;
+ tmp = (xmlRegExecRollback *) xmlRealloc(exec->rollbacks,
+ exec->maxRollbacks * sizeof(xmlRegExecRollback));
+ if (tmp == NULL) {
+ fprintf(stderr, "exec save: allocation failed");
+ exec->maxRollbacks /= 2;
+ return;
+ }
+ exec->rollbacks = tmp;
+ tmp = &exec->rollbacks[len];
+ memset(tmp, 0, (exec->maxRollbacks - len) * sizeof(xmlRegExecRollback));
+ }
+ exec->rollbacks[exec->nbRollbacks].state = exec->state;
+ exec->rollbacks[exec->nbRollbacks].index = exec->index;
+ exec->rollbacks[exec->nbRollbacks].nextbranch = exec->transno + 1;
+ if (exec->comp->nbCounters > 0) {
+ if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
+ exec->rollbacks[exec->nbRollbacks].counts = (int *)
+ xmlMalloc(exec->comp->nbCounters * sizeof(int));
+ if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
+ fprintf(stderr, "exec save: allocation failed");
+ exec->status = -5;
+ return;
+ }
+ }
+ memcpy(exec->rollbacks[exec->nbRollbacks].counts, exec->counts,
+ exec->comp->nbCounters * sizeof(int));
+ }
+ exec->nbRollbacks++;
+}
+
+static void
+xmlFARegExecRollBack(xmlRegExecCtxtPtr exec) {
+ if (exec->nbRollbacks <= 0) {
+ exec->status = -1;
+#ifdef DEBUG_REGEXP_EXEC
+ printf("rollback failed on empty stack\n");
+#endif
+ return;
+ }
+ exec->nbRollbacks--;
+ exec->state = exec->rollbacks[exec->nbRollbacks].state;
+ exec->index = exec->rollbacks[exec->nbRollbacks].index;
+ exec->transno = exec->rollbacks[exec->nbRollbacks].nextbranch;
+ if (exec->comp->nbCounters > 0) {
+ if (exec->rollbacks[exec->nbRollbacks].counts == NULL) {
+ fprintf(stderr, "exec save: allocation failed");
+ exec->status = -6;
+ return;
+ }
+ memcpy(exec->counts, exec->rollbacks[exec->nbRollbacks].counts,
+ exec->comp->nbCounters * sizeof(int));
+ }
+
+#ifdef DEBUG_REGEXP_EXEC
+ printf("restored ");
+ xmlFARegDebugExec(exec);
+#endif
+}
+
+/************************************************************************
+ * *
+ * Verifyer, running an input against a compiled regexp *
+ * *
+ ************************************************************************/
+
+static int
+xmlFARegExec(xmlRegexpPtr comp, const xmlChar *content) {
+ xmlRegExecCtxt execval;
+ xmlRegExecCtxtPtr exec = &execval;
+ int ret, codepoint, len;
+
+ exec->inputString = content;
+ exec->index = 0;
+ exec->determinist = 1;
+ exec->maxRollbacks = 0;
+ exec->nbRollbacks = 0;
+ exec->rollbacks = NULL;
+ exec->status = 0;
+ exec->comp = comp;
+ exec->state = comp->states[0];
+ exec->transno = 0;
+ exec->transcount = 0;
+ if (comp->nbCounters > 0) {
+ exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
+ if (exec->counts == NULL)
+ return(-1);
+ memset(exec->counts, 0, comp->nbCounters * sizeof(int));
+ } else
+ exec->counts = NULL;
+ while ((exec->status == 0) &&
+ ((exec->inputString[exec->index] != 0) ||
+ (exec->state->type != XML_REGEXP_FINAL_STATE))) {
+ xmlRegTransPtr trans;
+ xmlRegAtomPtr atom;
+
+ /*
+ * End of input on non-terminal state, rollback, however we may
+ * still have epsilon like transition for counted transitions
+ * on counters, in that case don't break too early.
+ */
+ if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
+ goto rollback;
+
+ exec->transcount = 0;
+ for (;exec->transno < exec->state->nbTrans;exec->transno++) {
+ trans = &exec->state->trans[exec->transno];
+ if (trans->to < 0)
+ continue;
+ atom = trans->atom;
+ ret = 0;
+ if (trans->count >= 0) {
+ int count;
+ xmlRegCounterPtr counter;
+
+ /*
+ * A counted transition.
+ */
+
+ count = exec->counts[trans->count];
+ counter = &exec->comp->counters[trans->count];
+#ifdef DEBUG_REGEXP_EXEC
+ printf("testing count %d: val %d, min %d, max %d\n",
+ trans->count, count, counter->min, counter->max);
+#endif
+ ret = ((count >= counter->min) && (count <= counter->max));
+ } else if (atom == NULL) {
+ fprintf(stderr, "epsilon transition left at runtime\n");
+ exec->status = -2;
+ break;
+ } else if (exec->inputString[exec->index] != 0) {
+ codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
+ ret = xmlRegCheckCharacter(atom, codepoint);
+ if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
+ xmlRegStatePtr to = comp->states[trans->to];
+
+ /*
+ * this is a multiple input sequence
+ */
+ if (exec->state->nbTrans > exec->transno + 1) {
+ xmlFARegExecSave(exec);
+ }
+ exec->transcount = 1;
+ do {
+ /*
+ * Try to progress as much as possible on the input
+ */
+ if (exec->transcount == atom->max) {
+ break;
+ }
+ exec->index += len;
+ /*
+ * End of input: stop here
+ */
+ if (exec->inputString[exec->index] == 0) {
+ exec->index -= len;
+ break;
+ }
+ if (exec->transcount >= atom->min) {
+ int transno = exec->transno;
+ xmlRegStatePtr state = exec->state;
+
+ /*
+ * The transition is acceptable save it
+ */
+ exec->transno = -1; /* trick */
+ exec->state = to;
+ xmlFARegExecSave(exec);
+ exec->transno = transno;
+ exec->state = state;
+ }
+ codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
+ len);
+ ret = xmlRegCheckCharacter(atom, codepoint);
+ exec->transcount++;
+ } while (ret == 1);
+ if (exec->transcount < atom->min)
+ ret = 0;
+
+ /*
+ * If the last check failed but one transition was found
+ * possible, rollback
+ */
+ if (ret < 0)
+ ret = 0;
+ if (ret == 0) {
+ goto rollback;
+ }
+ }
+ }
+ if (ret == 1) {
+ if (exec->state->nbTrans > exec->transno + 1) {
+ xmlFARegExecSave(exec);
+ }
+ if (trans->counter >= 0) {
+#ifdef DEBUG_REGEXP_EXEC
+ printf("Increasing count %d\n", trans->counter);
+#endif
+ exec->counts[trans->counter]++;
+ }
+#ifdef DEBUG_REGEXP_EXEC
+ printf("entering state %d\n", trans->to);
+#endif
+ exec->state = comp->states[trans->to];
+ exec->transno = 0;
+ if (trans->atom != NULL) {
+ exec->index += len;
+ }
+ goto progress;
+ } else if (ret < 0) {
+ exec->status = -4;
+ break;
+ }
+ }
+ if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
+rollback:
+ /*
+ * Failed to find a way out
+ */
+ exec->determinist = 0;
+ xmlFARegExecRollBack(exec);
+ }
+progress:
+ continue;
+ }
+ if (exec->rollbacks != NULL) {
+ if (exec->counts != NULL) {
+ int i;
+
+ for (i = 0;i < exec->maxRollbacks;i++)
+ if (exec->rollbacks[i].counts != NULL)
+ xmlFree(exec->rollbacks[i].counts);
+ }
+ xmlFree(exec->rollbacks);
+ }
+ if (exec->counts != NULL)
+ xmlFree(exec->counts);
+ if (exec->status == 0)
+ return(1);
+ if (exec->status == -1)
+ return(0);
+ return(exec->status);
+}
+
+/************************************************************************
+ * *
+ * Progressive interface to the verifyer one atom at a time *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlRegExecCtxtPtr:
+ * @comp: a precompiled regular expression
+ * @callback: a callback function used for handling progresses in the
+ * automata matching phase
+ * @data: the context data associated to the callback in this context
+ *
+ * Build a context used for progressive evaluation of a regexp.
+ */
+xmlRegExecCtxtPtr
+xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
+ xmlRegExecCtxtPtr exec;
+
+ if (comp == NULL)
+ return(NULL);
+ exec = (xmlRegExecCtxtPtr) xmlMalloc(sizeof(xmlRegExecCtxt));
+ if (exec == NULL) {
+ return(NULL);
+ }
+ memset(exec, 0, sizeof(xmlRegExecCtxt));
+ exec->inputString = NULL;
+ exec->index = 0;
+ exec->determinist = 1;
+ exec->maxRollbacks = 0;
+ exec->nbRollbacks = 0;
+ exec->rollbacks = NULL;
+ exec->status = 0;
+ exec->comp = comp;
+ exec->state = comp->states[0];
+ exec->transno = 0;
+ exec->transcount = 0;
+ exec->callback = callback;
+ exec->data = data;
+ if (comp->nbCounters > 0) {
+ exec->counts = (int *) xmlMalloc(comp->nbCounters * sizeof(int));
+ if (exec->counts == NULL) {
+ xmlFree(exec);
+ return(NULL);
+ }
+ memset(exec->counts, 0, comp->nbCounters * sizeof(int));
+ } else
+ exec->counts = NULL;
+ exec->inputStackMax = 0;
+ exec->inputStackNr = 0;
+ exec->inputStack = NULL;
+ return(exec);
+}
+
+/**
+ * xmlRegFreeExecCtxt:
+ * @exec: a regular expression evaulation context
+ *
+ * Free the structures associated to a regular expression evaulation context.
+ */
+void
+xmlRegFreeExecCtxt(xmlRegExecCtxtPtr exec) {
+ if (exec == NULL)
+ return;
+
+ if (exec->rollbacks != NULL) {
+ if (exec->counts != NULL) {
+ int i;
+
+ for (i = 0;i < exec->maxRollbacks;i++)
+ if (exec->rollbacks[i].counts != NULL)
+ xmlFree(exec->rollbacks[i].counts);
+ }
+ xmlFree(exec->rollbacks);
+ }
+ if (exec->counts != NULL)
+ xmlFree(exec->counts);
+ if (exec->inputStack != NULL) {
+ int i;
+
+ for (i = 0;i < exec->inputStackNr;i++)
+ xmlFree(exec->inputStack[i].value);
+ xmlFree(exec->inputStack);
+ }
+ xmlFree(exec);
+}
+
+static void
+xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
+ void *data) {
+#ifdef DEBUG_PUSH
+ printf("saving value: %d:%s\n", exec->inputStackNr, value);
+#endif
+ if (exec->inputStackMax == 0) {
+ exec->inputStackMax = 4;
+ exec->inputStack = (xmlRegInputTokenPtr)
+ xmlMalloc(exec->inputStackMax * sizeof(xmlRegInputToken));
+ if (exec->inputStack == NULL) {
+ fprintf(stderr, "push input: allocation failed");
+ exec->inputStackMax = 0;
+ return;
+ }
+ } else if (exec->inputStackNr + 1 >= exec->inputStackMax) {
+ xmlRegInputTokenPtr tmp;
+
+ exec->inputStackMax *= 2;
+ tmp = (xmlRegInputTokenPtr) xmlRealloc(exec->inputStack,
+ exec->inputStackMax * sizeof(xmlRegInputToken));
+ if (tmp == NULL) {
+ fprintf(stderr, "push input: allocation failed");
+ exec->inputStackMax /= 2;
+ return;
+ }
+ exec->inputStack = tmp;
+ }
+ exec->inputStack[exec->inputStackNr].value = xmlStrdup(value);
+ exec->inputStack[exec->inputStackNr].data = data;
+ exec->inputStackNr++;
+ exec->inputStack[exec->inputStackNr].value = NULL;
+ exec->inputStack[exec->inputStackNr].data = NULL;
+}
+
+
+/**
+ * xmlRegExecPushString:
+ * @exec: a regexp execution context
+ * @value: a string token input
+ * @data: data associated to the token to reuse in callbacks
+ *
+ * Push one input token in the execution context
+ *
+ * Returns: 1 if the regexp reached a final state, 0 if non-final, and
+ * a negative value in case of error.
+ */
+int
+xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
+ void *data) {
+ xmlRegTransPtr trans;
+ xmlRegAtomPtr atom;
+ int ret;
+ int final = 0;
+
+ if (exec == NULL)
+ return(-1);
+ if (exec->status != 0)
+ return(exec->status);
+
+ if (value == NULL) {
+ if (exec->state->type == XML_REGEXP_FINAL_STATE)
+ return(1);
+ final = 1;
+ }
+
+#ifdef DEBUG_PUSH
+ printf("value pushed: %s\n", value);
+#endif
+ /*
+ * If we have an active rollback stack push the new value there
+ * and get back to where we were left
+ */
+ if ((value != NULL) && (exec->inputStackNr > 0)) {
+ xmlFARegExecSaveInputString(exec, value, data);
+ value = exec->inputStack[exec->index].value;
+ data = exec->inputStack[exec->index].data;
+#ifdef DEBUG_PUSH
+ printf("value loaded: %s\n", value);
+#endif
+ }
+
+ while ((exec->status == 0) &&
+ ((value != NULL) ||
+ ((final == 1) &&
+ (exec->state->type != XML_REGEXP_FINAL_STATE)))) {
+
+ /*
+ * End of input on non-terminal state, rollback, however we may
+ * still have epsilon like transition for counted transitions
+ * on counters, in that case don't break too early.
+ */
+ if (value == NULL)
+ goto rollback;
+
+ exec->transcount = 0;
+ for (;exec->transno < exec->state->nbTrans;exec->transno++) {
+ trans = &exec->state->trans[exec->transno];
+ if (trans->to < 0)
+ continue;
+ atom = trans->atom;
+ ret = 0;
+ if (trans->count >= 0) {
+ int count;
+ xmlRegCounterPtr counter;
+
+ /*
+ * A counted transition.
+ */
+
+ count = exec->counts[trans->count];
+ counter = &exec->comp->counters[trans->count];
+#ifdef DEBUG_PUSH
+ printf("testing count %d: val %d, min %d, max %d\n",
+ trans->count, count, counter->min, counter->max);
+#endif
+ ret = ((count >= counter->min) && (count <= counter->max));
+ } else if (atom == NULL) {
+ fprintf(stderr, "epsilon transition left at runtime\n");
+ exec->status = -2;
+ break;
+ } else if (value != NULL) {
+ ret = xmlStrEqual(value, atom->valuep);
+ if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
+ xmlRegStatePtr to = exec->comp->states[trans->to];
+
+ /*
+ * this is a multiple input sequence
+ */
+ if (exec->state->nbTrans > exec->transno + 1) {
+ if (exec->inputStackNr <= 0) {
+ xmlFARegExecSaveInputString(exec, value, data);
+ }
+ xmlFARegExecSave(exec);
+ }
+ exec->transcount = 1;
+ do {
+ /*
+ * Try to progress as much as possible on the input
+ */
+ if (exec->transcount == atom->max) {
+ break;
+ }
+ exec->index++;
+ value = exec->inputStack[exec->index].value;
+ data = exec->inputStack[exec->index].data;
+#ifdef DEBUG_PUSH
+ printf("value loaded: %s\n", value);
+#endif
+
+ /*
+ * End of input: stop here
+ */
+ if (value == NULL) {
+ exec->index --;
+ break;
+ }
+ if (exec->transcount >= atom->min) {
+ int transno = exec->transno;
+ xmlRegStatePtr state = exec->state;
+
+ /*
+ * The transition is acceptable save it
+ */
+ exec->transno = -1; /* trick */
+ exec->state = to;
+ if (exec->inputStackNr <= 0) {
+ xmlFARegExecSaveInputString(exec, value, data);
+ }
+ xmlFARegExecSave(exec);
+ exec->transno = transno;
+ exec->state = state;
+ }
+ ret = xmlStrEqual(value, atom->valuep);
+ exec->transcount++;
+ } while (ret == 1);
+ if (exec->transcount < atom->min)
+ ret = 0;
+
+ /*
+ * If the last check failed but one transition was found
+ * possible, rollback
+ */
+ if (ret < 0)
+ ret = 0;
+ if (ret == 0) {
+ goto rollback;
+ }
+ }
+ }
+ if (ret == 1) {
+ if ((exec->callback != NULL) && (atom != NULL)) {
+ exec->callback(exec->data, atom->valuep,
+ atom->data, data);
+ }
+ if (exec->state->nbTrans > exec->transno + 1) {
+ if (exec->inputStackNr <= 0) {
+ xmlFARegExecSaveInputString(exec, value, data);
+ }
+ xmlFARegExecSave(exec);
+ }
+ if (trans->counter >= 0) {
+#ifdef DEBUG_PUSH
+ printf("Increasing count %d\n", trans->counter);
+#endif
+ exec->counts[trans->counter]++;
+ }
+#ifdef DEBUG_PUSH
+ printf("entering state %d\n", trans->to);
+#endif
+ exec->state = exec->comp->states[trans->to];
+ exec->transno = 0;
+ if (trans->atom != NULL) {
+ if (exec->inputStack != NULL) {
+ exec->index++;
+ if (exec->index < exec->inputStackNr) {
+ value = exec->inputStack[exec->index].value;
+ data = exec->inputStack[exec->index].data;
+#ifdef DEBUG_PUSH
+ printf("value loaded: %s\n", value);
+#endif
+ } else {
+ value = NULL;
+ data = NULL;
+#ifdef DEBUG_PUSH
+ printf("end of input\n");
+#endif
+ }
+ } else {
+ value = NULL;
+ data = NULL;
+#ifdef DEBUG_PUSH
+ printf("end of input\n");
+#endif
+ }
+ }
+ goto progress;
+ } else if (ret < 0) {
+ exec->status = -4;
+ break;
+ }
+ }
+ if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
+rollback:
+ /*
+ * Failed to find a way out
+ */
+ exec->determinist = 0;
+ xmlFARegExecRollBack(exec);
+ if (exec->status == 0) {
+ value = exec->inputStack[exec->index].value;
+ data = exec->inputStack[exec->index].data;
+#ifdef DEBUG_PUSH
+ printf("value loaded: %s\n", value);
+#endif
+ }
+ }
+progress:
+ continue;
+ }
+ if (exec->status == 0) {
+ return(exec->state->type == XML_REGEXP_FINAL_STATE);
+ }
+ return(exec->status);
+}
+
+#if 0
+static int
+xmlRegExecPushChar(xmlRegExecCtxtPtr exec, int UCS) {
+ xmlRegTransPtr trans;
+ xmlRegAtomPtr atom;
+ int ret;
+ int codepoint, len;
+
+ if (exec == NULL)
+ return(-1);
+ if (exec->status != 0)
+ return(exec->status);
+
+ while ((exec->status == 0) &&
+ ((exec->inputString[exec->index] != 0) ||
+ (exec->state->type != XML_REGEXP_FINAL_STATE))) {
+
+ /*
+ * End of input on non-terminal state, rollback, however we may
+ * still have epsilon like transition for counted transitions
+ * on counters, in that case don't break too early.
+ */
+ if ((exec->inputString[exec->index] == 0) && (exec->counts == NULL))
+ goto rollback;
+
+ exec->transcount = 0;
+ for (;exec->transno < exec->state->nbTrans;exec->transno++) {
+ trans = &exec->state->trans[exec->transno];
+ if (trans->to < 0)
+ continue;
+ atom = trans->atom;
+ ret = 0;
+ if (trans->count >= 0) {
+ int count;
+ xmlRegCounterPtr counter;
+
+ /*
+ * A counted transition.
+ */
+
+ count = exec->counts[trans->count];
+ counter = &exec->comp->counters[trans->count];
+#ifdef DEBUG_REGEXP_EXEC
+ printf("testing count %d: val %d, min %d, max %d\n",
+ trans->count, count, counter->min, counter->max);
+#endif
+ ret = ((count >= counter->min) && (count <= counter->max));
+ } else if (atom == NULL) {
+ fprintf(stderr, "epsilon transition left at runtime\n");
+ exec->status = -2;
+ break;
+ } else if (exec->inputString[exec->index] != 0) {
+ codepoint = CUR_SCHAR(&(exec->inputString[exec->index]), len);
+ ret = xmlRegCheckCharacter(atom, codepoint);
+ if ((ret == 1) && (atom->min > 0) && (atom->max > 0)) {
+ xmlRegStatePtr to = exec->comp->states[trans->to];
+
+ /*
+ * this is a multiple input sequence
+ */
+ if (exec->state->nbTrans > exec->transno + 1) {
+ xmlFARegExecSave(exec);
+ }
+ exec->transcount = 1;
+ do {
+ /*
+ * Try to progress as much as possible on the input
+ */
+ if (exec->transcount == atom->max) {
+ break;
+ }
+ exec->index += len;
+ /*
+ * End of input: stop here
+ */
+ if (exec->inputString[exec->index] == 0) {
+ exec->index -= len;
+ break;
+ }
+ if (exec->transcount >= atom->min) {
+ int transno = exec->transno;
+ xmlRegStatePtr state = exec->state;
+
+ /*
+ * The transition is acceptable save it
+ */
+ exec->transno = -1; /* trick */
+ exec->state = to;
+ xmlFARegExecSave(exec);
+ exec->transno = transno;
+ exec->state = state;
+ }
+ codepoint = CUR_SCHAR(&(exec->inputString[exec->index]),
+ len);
+ ret = xmlRegCheckCharacter(atom, codepoint);
+ exec->transcount++;
+ } while (ret == 1);
+ if (exec->transcount < atom->min)
+ ret = 0;
+
+ /*
+ * If the last check failed but one transition was found
+ * possible, rollback
+ */
+ if (ret < 0)
+ ret = 0;
+ if (ret == 0) {
+ goto rollback;
+ }
+ }
+ }
+ if (ret == 1) {
+ if (exec->state->nbTrans > exec->transno + 1) {
+ xmlFARegExecSave(exec);
+ }
+ if (trans->counter >= 0) {
+#ifdef DEBUG_REGEXP_EXEC
+ printf("Increasing count %d\n", trans->counter);
+#endif
+ exec->counts[trans->counter]++;
+ }
+#ifdef DEBUG_REGEXP_EXEC
+ printf("entering state %d\n", trans->to);
+#endif
+ exec->state = exec->comp->states[trans->to];
+ exec->transno = 0;
+ if (trans->atom != NULL) {
+ exec->index += len;
+ }
+ goto progress;
+ } else if (ret < 0) {
+ exec->status = -4;
+ break;
+ }
+ }
+ if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
+rollback:
+ /*
+ * Failed to find a way out
+ */
+ exec->determinist = 0;
+ xmlFARegExecRollBack(exec);
+ }
+progress:
+ continue;
+ }
+}
+#endif
+/************************************************************************
+ * *
+ * Parser for the Shemas Datatype Regular Expressions *
+ * http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#regexs *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlFAIsChar:
+ * ctxt: a regexp parser context
+ *
+ * [10] Char ::= [^.\?*+()|#x5B#x5D]
+ */
+static int
+xmlFAIsChar(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+ int len;
+
+ cur = CUR_SCHAR(ctxt->cur, len);
+ if ((cur == '.') || (cur == '\\') || (cur == '?') ||
+ (cur == '*') || (cur == '+') || (cur == '(') ||
+ (cur == ')') || (cur == '|') || (cur == 0x5B) ||
+ (cur == 0x5D) || (cur == 0))
+ return(-1);
+ return(cur);
+}
+
+/**
+ * xmlFAParseCharProp:
+ * ctxt: a regexp parser context
+ *
+ * [27] charProp ::= IsCategory | IsBlock
+ * [28] IsCategory ::= Letters | Marks | Numbers | Punctuation |
+ * Separators | Symbols | Others
+ * [29] Letters ::= 'L' [ultmo]?
+ * [30] Marks ::= 'M' [nce]?
+ * [31] Numbers ::= 'N' [dlo]?
+ * [32] Punctuation ::= 'P' [cdseifo]?
+ * [33] Separators ::= 'Z' [slp]?
+ * [34] Symbols ::= 'S' [mcko]?
+ * [35] Others ::= 'C' [cfon]?
+ * [36] IsBlock ::= 'Is' [a-zA-Z0-9#x2D]+
+ */
+static void
+xmlFAParseCharProp(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+ xmlRegAtomType type = 0;
+ xmlChar *blockName = NULL;
+
+ cur = CUR;
+ if (cur == 'L') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'u') {
+ NEXT;
+ type = XML_REGEXP_LETTER_UPPERCASE;
+ } else if (cur == 'l') {
+ NEXT;
+ type = XML_REGEXP_LETTER_LOWERCASE;
+ } else if (cur == 't') {
+ NEXT;
+ type = XML_REGEXP_LETTER_TITLECASE;
+ } else if (cur == 'm') {
+ NEXT;
+ type = XML_REGEXP_LETTER_MODIFIER;
+ } else if (cur == 'o') {
+ NEXT;
+ type = XML_REGEXP_LETTER_OTHERS;
+ } else {
+ type = XML_REGEXP_LETTER;
+ }
+ } else if (cur == 'M') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'n') {
+ NEXT;
+ /* nonspacing */
+ type = XML_REGEXP_MARK_NONSPACING;
+ } else if (cur == 'c') {
+ NEXT;
+ /* spacing combining */
+ type = XML_REGEXP_MARK_SPACECOMBINING;
+ } else if (cur == 'e') {
+ NEXT;
+ /* enclosing */
+ type = XML_REGEXP_MARK_ENCLOSING;
+ } else {
+ /* all marks */
+ type = XML_REGEXP_MARK;
+ }
+ } else if (cur == 'N') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'd') {
+ NEXT;
+ /* digital */
+ type = XML_REGEXP_NUMBER_DECIMAL;
+ } else if (cur == 'l') {
+ NEXT;
+ /* letter */
+ type = XML_REGEXP_NUMBER_LETTER;
+ } else if (cur == 'o') {
+ NEXT;
+ /* other */
+ type = XML_REGEXP_NUMBER_OTHERS;
+ } else {
+ /* all numbers */
+ type = XML_REGEXP_NUMBER;
+ }
+ } else if (cur == 'P') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'c') {
+ NEXT;
+ /* connector */
+ type = XML_REGEXP_PUNCT_CONNECTOR;
+ } else if (cur == 'd') {
+ NEXT;
+ /* dash */
+ type = XML_REGEXP_PUNCT_DASH;
+ } else if (cur == 's') {
+ NEXT;
+ /* open */
+ type = XML_REGEXP_PUNCT_OPEN;
+ } else if (cur == 'e') {
+ NEXT;
+ /* close */
+ type = XML_REGEXP_PUNCT_CLOSE;
+ } else if (cur == 'i') {
+ NEXT;
+ /* initial quote */
+ type = XML_REGEXP_PUNCT_INITQUOTE;
+ } else if (cur == 'f') {
+ NEXT;
+ /* final quote */
+ type = XML_REGEXP_PUNCT_FINQUOTE;
+ } else if (cur == 'o') {
+ NEXT;
+ /* other */
+ type = XML_REGEXP_PUNCT_OTHERS;
+ } else {
+ /* all punctuation */
+ type = XML_REGEXP_PUNCT;
+ }
+ } else if (cur == 'Z') {
+ NEXT;
+ cur = CUR;
+ if (cur == 's') {
+ NEXT;
+ /* space */
+ type = XML_REGEXP_SEPAR_SPACE;
+ } else if (cur == 'l') {
+ NEXT;
+ /* line */
+ type = XML_REGEXP_SEPAR_LINE;
+ } else if (cur == 'p') {
+ NEXT;
+ /* paragraph */
+ type = XML_REGEXP_SEPAR_PARA;
+ } else {
+ /* all separators */
+ type = XML_REGEXP_SEPAR;
+ }
+ } else if (cur == 'S') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'm') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_MATH;
+ /* math */
+ } else if (cur == 'c') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_CURRENCY;
+ /* currency */
+ } else if (cur == 'k') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_MODIFIER;
+ /* modifiers */
+ } else if (cur == 'o') {
+ NEXT;
+ type = XML_REGEXP_SYMBOL_OTHERS;
+ /* other */
+ } else {
+ /* all symbols */
+ type = XML_REGEXP_SYMBOL;
+ }
+ } else if (cur == 'C') {
+ NEXT;
+ cur = CUR;
+ if (cur == 'c') {
+ NEXT;
+ /* control */
+ type = XML_REGEXP_OTHER_CONTROL;
+ } else if (cur == 'f') {
+ NEXT;
+ /* format */
+ type = XML_REGEXP_OTHER_FORMAT;
+ } else if (cur == 'o') {
+ NEXT;
+ /* private use */
+ type = XML_REGEXP_OTHER_PRIVATE;
+ } else if (cur == 'n') {
+ NEXT;
+ /* not assigned */
+ type = XML_REGEXP_OTHER_NA;
+ } else {
+ /* all others */
+ type = XML_REGEXP_OTHER;
+ }
+ } else if (cur == 'I') {
+ const xmlChar *start;
+ NEXT;
+ cur = CUR;
+ if (cur != 's') {
+ ERROR("IsXXXX expected");
+ return;
+ }
+ NEXT;
+ start = ctxt->cur;
+ cur = CUR;
+ if (((cur >= 'a') && (cur <= 'z')) ||
+ ((cur >= 'A') && (cur <= 'Z')) ||
+ ((cur >= '0') && (cur <= '9')) ||
+ (cur == 0x2D)) {
+ NEXT;
+ cur = CUR;
+ while (((cur >= 'a') && (cur <= 'z')) ||
+ ((cur >= 'A') && (cur <= 'Z')) ||
+ ((cur >= '0') && (cur <= '9')) ||
+ (cur == 0x2D)) {
+ NEXT;
+ cur = CUR;
+ }
+ }
+ type = XML_REGEXP_BLOCK_NAME;
+ blockName = xmlStrndup(start, ctxt->cur - start);
+ } else {
+ ERROR("Unknown char property");
+ return;
+ }
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, type);
+ if (ctxt->atom != NULL)
+ ctxt->atom->valuep = blockName;
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ type, 0, 0, blockName);
+ }
+}
+
+/**
+ * xmlFAParseCharClassEsc:
+ * ctxt: a regexp parser context
+ *
+ * [23] charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
+ * [24] SingleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
+ * [25] catEsc ::= '\p{' charProp '}'
+ * [26] complEsc ::= '\P{' charProp '}'
+ * [37] MultiCharEsc ::= '.' | ('\' [sSiIcCdDwW])
+ */
+static void
+xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+
+ if (CUR == '.') {
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_ANYCHAR);
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_ANYCHAR, 0, 0, NULL);
+ }
+ NEXT;
+ return;
+ }
+ if (CUR != '\\') {
+ ERROR("Escaped sequence: expecting \\");
+ return;
+ }
+ NEXT;
+ cur = CUR;
+ if (cur == 'p') {
+ NEXT;
+ if (CUR != '{') {
+ ERROR("Expecting '{'");
+ return;
+ }
+ NEXT;
+ xmlFAParseCharProp(ctxt);
+ if (CUR != '}') {
+ ERROR("Expecting '}'");
+ return;
+ }
+ NEXT;
+ } else if (cur == 'P') {
+ NEXT;
+ if (CUR != '{') {
+ ERROR("Expecting '{'");
+ return;
+ }
+ NEXT;
+ xmlFAParseCharProp(ctxt);
+ ctxt->atom->neg = 1;
+ if (CUR != '}') {
+ ERROR("Expecting '}'");
+ return;
+ }
+ NEXT;
+ } else if ((cur == 'n') || (cur == 'r') || (cur == 't') || (cur == '\\') ||
+ (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
+ (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
+ (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
+ (cur == 0x5E)) {
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
+ if (ctxt->atom != NULL)
+ ctxt->atom->codepoint = cur;
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, cur, cur, NULL);
+ }
+ NEXT;
+ } else if ((cur == 's') || (cur == 'S') || (cur == 'i') || (cur == 'I') ||
+ (cur == 'c') || (cur == 'C') || (cur == 'd') || (cur == 'D') ||
+ (cur == 'w') || (cur == 'W')) {
+ xmlRegAtomType type;
+
+ switch (cur) {
+ case 's':
+ type = XML_REGEXP_ANYSPACE;
+ break;
+ case 'S':
+ type = XML_REGEXP_NOTSPACE;
+ break;
+ case 'i':
+ type = XML_REGEXP_INITNAME;
+ break;
+ case 'I':
+ type = XML_REGEXP_NOTINITNAME;
+ break;
+ case 'c':
+ type = XML_REGEXP_NAMECHAR;
+ break;
+ case 'C':
+ type = XML_REGEXP_NOTNAMECHAR;
+ break;
+ case 'd':
+ type = XML_REGEXP_DECIMAL;
+ break;
+ case 'D':
+ type = XML_REGEXP_NOTDECIMAL;
+ break;
+ case 'w':
+ type = XML_REGEXP_REALCHAR;
+ break;
+ case 'W':
+ type = XML_REGEXP_NOTREALCHAR;
+ break;
+ }
+ NEXT;
+ if (ctxt->atom == NULL) {
+ ctxt->atom = xmlRegNewAtom(ctxt, type);
+ } else if (ctxt->atom->type == XML_REGEXP_RANGES) {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ type, 0, 0, NULL);
+ }
+ }
+}
+
+/**
+ * xmlFAParseCharRef:
+ * ctxt: a regexp parser context
+ *
+ * [19] XmlCharRef ::= ( '&#' [0-9]+ ';' ) | (' &#x' [0-9a-fA-F]+ ';' )
+ */
+static int
+xmlFAParseCharRef(xmlRegParserCtxtPtr ctxt) {
+ int ret = 0, cur;
+
+ if ((CUR != '&') || (NXT(1) != '#'))
+ return(-1);
+ NEXT;
+ NEXT;
+ cur = CUR;
+ if (cur == 'x') {
+ NEXT;
+ cur = CUR;
+ if (((cur >= '0') && (cur <= '9')) ||
+ ((cur >= 'a') && (cur <= 'f')) ||
+ ((cur >= 'A') && (cur <= 'F'))) {
+ while (((cur >= '0') && (cur <= '9')) ||
+ ((cur >= 'A') && (cur <= 'F'))) {
+ if ((cur >= '0') && (cur <= '9'))
+ ret = ret * 16 + cur - '0';
+ else if ((cur >= 'a') && (cur <= 'f'))
+ ret = ret * 16 + 10 + (cur - 'a');
+ else
+ ret = ret * 16 + 10 + (cur - 'A');
+ NEXT;
+ cur = CUR;
+ }
+ } else {
+ ERROR("Char ref: expecting [0-9A-F]");
+ return(-1);
+ }
+ } else {
+ if ((cur >= '0') && (cur <= '9')) {
+ while ((cur >= '0') && (cur <= '9')) {
+ ret = ret * 10 + cur - '0';
+ NEXT;
+ cur = CUR;
+ }
+ } else {
+ ERROR("Char ref: expecting [0-9]");
+ return(-1);
+ }
+ }
+ if (cur != ';') {
+ ERROR("Char ref: expecting ';'");
+ return(-1);
+ } else {
+ NEXT;
+ }
+ return(ret);
+}
+
+/**
+ * xmlFAParseCharRange:
+ * ctxt: a regexp parser context
+ *
+ * [17] charRange ::= seRange | XmlCharRef | XmlCharIncDash
+ * [18] seRange ::= charOrEsc '-' charOrEsc
+ * [20] charOrEsc ::= XmlChar | SingleCharEsc
+ * [21] XmlChar ::= [^\#x2D#x5B#x5D]
+ * [22] XmlCharIncDash ::= [^\#x5B#x5D]
+ */
+static void
+xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+ int start = -1;
+ int end = -1;
+
+ if ((CUR == '&') && (NXT(1) == '#')) {
+ end = start = xmlFAParseCharRef(ctxt);
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, start, end, NULL);
+ return;
+ }
+ cur = CUR;
+ if (cur == '\\') {
+ NEXT;
+ cur = CUR;
+ switch (cur) {
+ case 'n': start = 0xA; break;
+ case 'r': start = 0xD; break;
+ case 't': start = 0x9; break;
+ case '\\': case '|': case '.': case '-': case '^': case '?':
+ case '*': case '+': case '{': case '}': case '(': case ')':
+ case '[': case ']':
+ start = cur; break;
+ default:
+ ERROR("Invalid escape value");
+ return;
+ }
+ end = start;
+ } else if ((cur != 0x5B) && (cur != 0x5D)) {
+ end = start = cur;
+ } else {
+ ERROR("Expecting a char range");
+ return;
+ }
+ NEXT;
+ if (start == '-') {
+ return;
+ }
+ cur = CUR;
+ if (cur != '-') {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, start, end, NULL);
+ return;
+ }
+ NEXT;
+ cur = CUR;
+ if (cur == '\\') {
+ NEXT;
+ cur = CUR;
+ switch (cur) {
+ case 'n': end = 0xA; break;
+ case 'r': end = 0xD; break;
+ case 't': end = 0x9; break;
+ case '\\': case '|': case '.': case '-': case '^': case '?':
+ case '*': case '+': case '{': case '}': case '(': case ')':
+ case '[': case ']':
+ end = cur; break;
+ default:
+ ERROR("Invalid escape value");
+ return;
+ }
+ } else if ((cur != 0x5B) && (cur != 0x5D)) {
+ end = cur;
+ } else {
+ ERROR("Expecting the end of a char range");
+ return;
+ }
+ NEXT;
+ /* TODO check that the values are acceptable character ranges for XML */
+ if (end < start) {
+ ERROR("End of range is before start of range");
+ } else {
+ xmlRegAtomAddRange(ctxt, ctxt->atom, ctxt->neg,
+ XML_REGEXP_CHARVAL, start, end, NULL);
+ }
+ return;
+}
+
+/**
+ * xmlFAParsePosCharGroup:
+ * ctxt: a regexp parser context
+ *
+ * [14] posCharGroup ::= ( charRange | charClassEsc )+
+ */
+static void
+xmlFAParsePosCharGroup(xmlRegParserCtxtPtr ctxt) {
+ do {
+ if ((CUR == '\\') || (CUR == '.')) {
+ xmlFAParseCharClassEsc(ctxt);
+ } else {
+ xmlFAParseCharRange(ctxt);
+ }
+ } while ((CUR != ']') && (CUR != '^') && (CUR != '-') &&
+ (ctxt->error == 0));
+}
+
+/**
+ * xmlFAParseCharGroup:
+ * ctxt: a regexp parser context
+ *
+ * [13] charGroup ::= posCharGroup | negCharGroup | charClassSub
+ * [15] negCharGroup ::= '^' posCharGroup
+ * [16] charClassSub ::= ( posCharGroup | negCharGroup ) '-' charClassExpr
+ * [12] charClassExpr ::= '[' charGroup ']'
+ */
+static void
+xmlFAParseCharGroup(xmlRegParserCtxtPtr ctxt) {
+ int n = ctxt->neg;
+ while ((CUR != ']') && (ctxt->error == 0)) {
+ if (CUR == '^') {
+ int neg = ctxt->neg;
+
+ NEXT;
+ ctxt->neg = !ctxt->neg;
+ xmlFAParsePosCharGroup(ctxt);
+ ctxt->neg = neg;
+ } else if (CUR == '-') {
+ NEXT;
+ ctxt->neg = !ctxt->neg;
+ if (CUR != '[') {
+ ERROR("charClassExpr: '[' expected");
+ break;
+ }
+ NEXT;
+ xmlFAParseCharGroup(ctxt);
+ if (CUR == ']') {
+ NEXT;
+ } else {
+ ERROR("charClassExpr: ']' expected");
+ break;
+ }
+ break;
+ } else if (CUR != ']') {
+ xmlFAParsePosCharGroup(ctxt);
+ }
+ }
+ ctxt->neg = n;
+}
+
+/**
+ * xmlFAParseCharClass:
+ * ctxt: a regexp parser context
+ *
+ * [11] charClass ::= charClassEsc | charClassExpr
+ * [12] charClassExpr ::= '[' charGroup ']'
+ */
+static void
+xmlFAParseCharClass(xmlRegParserCtxtPtr ctxt) {
+ if (CUR == '[') {
+ NEXT;
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_RANGES);
+ if (ctxt->atom == NULL)
+ return;
+ xmlFAParseCharGroup(ctxt);
+ if (CUR == ']') {
+ NEXT;
+ } else {
+ ERROR("xmlFAParseCharClass: ']' expected");
+ }
+ } else {
+ xmlFAParseCharClassEsc(ctxt);
+ }
+}
+
+/**
+ * xmlFAParseQuantExact:
+ * ctxt: a regexp parser context
+ *
+ * [8] QuantExact ::= [0-9]+
+ */
+static int
+xmlFAParseQuantExact(xmlRegParserCtxtPtr ctxt) {
+ int ret = 0;
+ int ok = 0;
+
+ while ((CUR >= '0') && (CUR <= '9')) {
+ ret = ret * 10 + (CUR - '0');
+ ok = 1;
+ NEXT;
+ }
+ if (ok != 1) {
+ return(-1);
+ }
+ return(ret);
+}
+
+/**
+ * xmlFAParseQuantifier:
+ * ctxt: a regexp parser context
+ *
+ * [4] quantifier ::= [?*+] | ( '{' quantity '}' )
+ * [5] quantity ::= quantRange | quantMin | QuantExact
+ * [6] quantRange ::= QuantExact ',' QuantExact
+ * [7] quantMin ::= QuantExact ','
+ * [8] QuantExact ::= [0-9]+
+ */
+static int
+xmlFAParseQuantifier(xmlRegParserCtxtPtr ctxt) {
+ int cur;
+
+ cur = CUR;
+ if ((cur == '?') || (cur == '*') || (cur == '+')) {
+ if (ctxt->atom != NULL) {
+ if (cur == '?')
+ ctxt->atom->quant = XML_REGEXP_QUANT_OPT;
+ else if (cur == '*')
+ ctxt->atom->quant = XML_REGEXP_QUANT_MULT;
+ else if (cur == '+')
+ ctxt->atom->quant = XML_REGEXP_QUANT_PLUS;
+ }
+ NEXT;
+ return(1);
+ }
+ if (cur == '{') {
+ int min = 0, max = 0;
+
+ NEXT;
+ cur = xmlFAParseQuantExact(ctxt);
+ if (cur >= 0)
+ min = cur;
+ if (CUR == ',') {
+ NEXT;
+ cur = xmlFAParseQuantExact(ctxt);
+ if (cur >= 0)
+ max = cur;
+ }
+ if (CUR == '}') {
+ NEXT;
+ } else {
+ ERROR("Unterminated quantifier");
+ }
+ if (max == 0)
+ max = min;
+ if (ctxt->atom != NULL) {
+ ctxt->atom->quant = XML_REGEXP_QUANT_RANGE;
+ ctxt->atom->min = min;
+ ctxt->atom->max = max;
+ }
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * xmlFAParseAtom:
+ * ctxt: a regexp parser context
+ *
+ * [9] atom ::= Char | charClass | ( '(' regExp ')' )
+ */
+static int
+xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
+ int codepoint, len;
+
+ codepoint = xmlFAIsChar(ctxt);
+ if (codepoint > 0) {
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
+ if (ctxt->atom == NULL)
+ return(-1);
+ codepoint = CUR_SCHAR(ctxt->cur, len);
+ ctxt->atom->codepoint = codepoint;
+ NEXTL(len);
+ return(1);
+ } else if (CUR == '|') {
+ return(0);
+ } else if (CUR == 0) {
+ return(0);
+ } else if (CUR == ')') {
+ return(0);
+ } else if (CUR == '(') {
+ xmlRegStatePtr start, oldend;
+
+ NEXT;
+ xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
+ start = ctxt->state;
+ oldend = ctxt->end;
+ ctxt->end = NULL;
+ ctxt->atom = NULL;
+ xmlFAParseRegExp(ctxt, 0);
+ if (CUR == ')') {
+ NEXT;
+ } else {
+ ERROR("xmlFAParseAtom: expecting ')'");
+ }
+ ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_SUBREG);
+ if (ctxt->atom == NULL)
+ return(-1);
+ ctxt->atom->start = start;
+ ctxt->atom->stop = ctxt->state;
+ ctxt->end = oldend;
+ return(1);
+ } else if ((CUR == '[') || (CUR == '\\') || (CUR == '.')) {
+ xmlFAParseCharClass(ctxt);
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * xmlFAParsePiece:
+ * ctxt: a regexp parser context
+ *
+ * [3] piece ::= atom quantifier?
+ */
+static int
+xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) {
+ int ret;
+
+ ctxt->atom = NULL;
+ ret = xmlFAParseAtom(ctxt);
+ if (ret == 0)
+ return(0);
+ if (ctxt->atom == NULL) {
+ ERROR("internal: no atom generated");
+ }
+ xmlFAParseQuantifier(ctxt);
+ return(1);
+}
+
+/**
+ * xmlFAParseBranch:
+ * ctxt: a regexp parser context
+ * first: is taht the first
+ *
+ * [2] branch ::= piece*
+ */
+static void
+xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) {
+ xmlRegStatePtr previous;
+ xmlRegAtomPtr prevatom = NULL;
+ int ret;
+
+ previous = ctxt->state;
+ ret = xmlFAParsePiece(ctxt);
+ if (ret != 0) {
+ if (first) {
+ xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
+ previous = ctxt->state;
+ } else {
+ prevatom = ctxt->atom;
+ }
+ ctxt->atom = NULL;
+ }
+ while ((ret != 0) && (ctxt->error == 0)) {
+ ret = xmlFAParsePiece(ctxt);
+ if (ret != 0) {
+ if (first) {
+ xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom);
+ } else {
+ xmlFAGenerateTransitions(ctxt, previous, NULL, prevatom);
+ prevatom = ctxt->atom;
+ }
+ previous = ctxt->state;
+ ctxt->atom = NULL;
+ }
+ }
+ if (!first) {
+ xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom);
+ }
+}
+
+/**
+ * xmlFAParseRegExp:
+ * ctxt: a regexp parser context
+ * top: is that the top-level expressions ?
+ *
+ * [1] regExp ::= branch ( '|' branch )*
+ */
+static void
+xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) {
+ xmlRegStatePtr start, end, oldend;
+
+ oldend = ctxt->end;
+
+ start = ctxt->state;
+ xmlFAParseBranch(ctxt, (ctxt->end == NULL));
+ if (CUR != '|') {
+ ctxt->end = ctxt->state;
+ return;
+ }
+ end = ctxt->state;
+ while ((CUR == '|') && (ctxt->error == 0)) {
+ NEXT;
+ ctxt->state = start;
+ ctxt->end = end;
+ xmlFAParseBranch(ctxt, 0);
+ }
+ if (!top)
+ ctxt->end = oldend;
+}
+
+/************************************************************************
+ * *
+ * The basic API *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlRegexpPrint:
+ * @output: the file for the output debug
+ * @regexp: the compiled regexp
+ *
+ * Print the content of the compiled regular expression
+ */
+void
+xmlRegexpPrint(FILE *output, xmlRegexpPtr regexp) {
+ int i;
+
+ fprintf(output, " regexp: ");
+ if (regexp == NULL) {
+ fprintf(output, "NULL\n");
+ return;
+ }
+ fprintf(output, "'%s' ", regexp->string);
+ fprintf(output, "\n");
+ fprintf(output, "%d atoms:\n", regexp->nbAtoms);
+ for (i = 0;i < regexp->nbAtoms; i++) {
+ fprintf(output, " %02d ", i);
+ xmlRegPrintAtom(output, regexp->atoms[i]);
+ }
+ fprintf(output, "%d states:", regexp->nbStates);
+ fprintf(output, "\n");
+ for (i = 0;i < regexp->nbStates; i++) {
+ xmlRegPrintState(output, regexp->states[i]);
+ }
+ fprintf(output, "%d counters:\n", regexp->nbCounters);
+ for (i = 0;i < regexp->nbCounters; i++) {
+ fprintf(output, " %d: min %d max %d\n", i, regexp->counters[i].min,
+ regexp->counters[i].max);
+ }
+}
+
+/**
+ * xmlRegexpCompile:
+ * @regexp: a regular expression string
+ *
+ * Parses a regular expression conforming to XML Schemas Part 2 Datatype
+ * Appendix F and build an automata suitable for testing strings against
+ * that regular expression
+ *
+ * Returns the compiled expression or NULL in case of error
+ */
+xmlRegexpPtr
+xmlRegexpCompile(const xmlChar *regexp) {
+ xmlRegexpPtr ret;
+ xmlRegParserCtxtPtr ctxt;
+
+ ctxt = xmlRegNewParserCtxt(regexp);
+ if (ctxt == NULL)
+ return(NULL);
+
+ /* initialize the parser */
+ ctxt->end = NULL;
+ ctxt->start = ctxt->state = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, ctxt->start);
+
+ /* parse the expression building an automata */
+ xmlFAParseRegExp(ctxt, 1);
+ if (CUR != 0) {
+ ERROR("xmlFAParseRegExp: extra characters");
+ }
+ ctxt->end = ctxt->state;
+ ctxt->start->type = XML_REGEXP_START_STATE;
+ ctxt->end->type = XML_REGEXP_FINAL_STATE;
+
+ /* remove the Epsilon except for counted transitions */
+ xmlFAEliminateEpsilonTransitions(ctxt);
+
+
+ if (ctxt->error != 0) {
+ xmlRegFreeParserCtxt(ctxt);
+ return(NULL);
+ }
+ ret = xmlRegEpxFromParse(ctxt);
+ xmlRegFreeParserCtxt(ctxt);
+ return(ret);
+}
+
+/**
+ * xmlRegexpExec:
+ * @comp: the compiled regular expression
+ * @content: the value to check against the regular expression
+ *
+ * Check if the regular expression generate the value
+ *
+ * Returns 1 if it matches, 0 if not and a negativa value in case of error
+ */
+int
+xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
+ if ((comp == NULL) || (content == NULL))
+ return(-1);
+ return(xmlFARegExec(comp, content));
+}
+
+/**
+ * xmlRegFreeRegexp:
+ * @regexp: the regexp
+ *
+ * Free a regexp
+ */
+void
+xmlRegFreeRegexp(xmlRegexpPtr regexp) {
+ int i;
+ if (regexp == NULL)
+ return;
+
+ if (regexp->string != NULL)
+ xmlFree(regexp->string);
+ if (regexp->states != NULL) {
+ for (i = 0;i < regexp->nbStates;i++)
+ xmlRegFreeState(regexp->states[i]);
+ xmlFree(regexp->states);
+ }
+ if (regexp->atoms != NULL) {
+ for (i = 0;i < regexp->nbAtoms;i++)
+ xmlRegFreeAtom(regexp->atoms[i]);
+ xmlFree(regexp->atoms);
+ }
+ if (regexp->counters != NULL)
+ xmlFree(regexp->counters);
+ xmlFree(regexp);
+}
+
+#ifdef LIBXML_AUTOMATA_ENABLED
+/************************************************************************
+ * *
+ * The Automata interface *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlNewAutomata:
+ *
+ * Create a new automata
+ *
+ * Returns the new object or NULL in case of failure
+ */
+xmlAutomataPtr
+xmlNewAutomata(void) {
+ xmlAutomataPtr ctxt;
+
+ ctxt = xmlRegNewParserCtxt(NULL);
+ if (ctxt == NULL)
+ return(NULL);
+
+ /* initialize the parser */
+ ctxt->end = NULL;
+ ctxt->start = ctxt->state = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, ctxt->start);
+
+ return(ctxt);
+}
+
+/**
+ * xmlFreeAutomata:
+ * @am: an automata
+ *
+ * Free an automata
+ */
+void
+xmlFreeAutomata(xmlAutomataPtr am) {
+ if (am == NULL)
+ return;
+ xmlRegFreeParserCtxt(am);
+}
+
+/**
+ * xmlAutomataGetInitState:
+ * @am: an automata
+ *
+ * Returns the initial state of the automata
+ */
+xmlAutomataStatePtr
+xmlAutomataGetInitState(xmlAutomataPtr am) {
+ if (am == NULL)
+ return(NULL);
+ return(am->start);
+}
+
+/**
+ * xmlAutomataSetFinalState:
+ * @am: an automata
+ * @state: a state in this automata
+ *
+ * Makes that state a final state
+ *
+ * Returns 0 or -1 in case of error
+ */
+int
+xmlAutomataSetFinalState(xmlAutomataPtr am, xmlAutomataStatePtr state) {
+ if ((am == NULL) || (state == NULL))
+ return(-1);
+ state->type = XML_REGEXP_FINAL_STATE;
+ return(0);
+}
+
+/**
+ * xmlAutomataNewTransition:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ * @token: the input string associated to that transition
+ * @data: data passed to the callback function if the transition is activated
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a transition from the @from state to the target state
+ * activated by the value of @token
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewTransition(xmlAutomataPtr am, xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to, const xmlChar *token,
+ void *data) {
+ xmlRegAtomPtr atom;
+
+ if ((am == NULL) || (from == NULL) || (token == NULL))
+ return(NULL);
+ atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
+ atom->data = data;
+ if (atom == NULL)
+ return(NULL);
+ atom->valuep = xmlStrdup(token);
+
+ xmlFAGenerateTransitions(am, from, to, atom);
+ if (to == NULL)
+ return(am->state);
+ return(to);
+}
+
+/**
+ * xmlAutomataNewCountTrans:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ * @token: the input string associated to that transition
+ * @min: the minimum successive occurences of token
+ * @min: the maximum successive occurences of token
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a transition from the @from state to the target state
+ * activated by a succession of input of value @token and whose number
+ * is between @min and @max
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to, const xmlChar *token,
+ int min, int max, void *data) {
+ xmlRegAtomPtr atom;
+
+ if ((am == NULL) || (from == NULL) || (token == NULL))
+ return(NULL);
+ if (min < 0)
+ return(NULL);
+ if ((max < min) || (max < 1))
+ return(NULL);
+ atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
+ if (atom == NULL)
+ return(NULL);
+ atom->valuep = xmlStrdup(token);
+ atom->data = data;
+ if (min == 0)
+ atom->min = 1;
+ else
+ atom->min = min;
+ atom->max = max;
+
+ xmlFAGenerateTransitions(am, from, to, atom);
+ if (to == NULL)
+ to = am->state;
+ if (to == NULL)
+ return(NULL);
+ if (min == 0)
+ xmlFAGenerateEpsilonTransition(am, from, to);
+ return(to);
+}
+
+/**
+ * xmlAutomataNewState:
+ * @am: an automata
+ *
+ * Create a new disconnected state in the automata
+ *
+ * Returns the new state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewState(xmlAutomataPtr am) {
+ xmlAutomataStatePtr to;
+
+ if (am == NULL)
+ return(NULL);
+ to = xmlRegNewState(am);
+ xmlRegStatePush(am, to);
+ return(to);
+}
+
+/**
+ * xmlAutomataNewTransition:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a an epsilon transition from the @from state to the
+ * target state
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewEpsilon(xmlAutomataPtr am, xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to) {
+ if ((am == NULL) || (from == NULL))
+ return(NULL);
+ xmlFAGenerateEpsilonTransition(am, from, to);
+ if (to == NULL)
+ return(am->state);
+ return(to);
+}
+
+#if 0
+int xmlAutomataNewCounter (xmlAutomataPtr am);
+#endif
+
+/**
+ * xmlAutomataCompile:
+ * @am: an automata
+ *
+ * Compile the automata into a Reg Exp ready for being executed.
+ * The automata should be free after this point.
+ *
+ * Returns the compiled regexp or NULL in case of error
+ */
+xmlRegexpPtr
+xmlAutomataCompile(xmlAutomataPtr am) {
+ xmlRegexpPtr ret;
+
+ xmlFAEliminateEpsilonTransitions(am);
+ ret = xmlRegEpxFromParse(am);
+
+ return(ret);
+}
+#endif /* LIBXML_AUTOMATA_ENABLED */
+#endif /* LIBXML_REGEXP_ENABLED */
diff --git a/xmlschemas.c b/xmlschemas.c
new file mode 100644
index 0000000..f211095
--- /dev/null
+++ b/xmlschemas.c
@@ -0,0 +1,4941 @@
+/*
+ * schemas.c : implementation of the XML Schema handling and
+ * schema validity checking
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_SCHEMAS_ENABLED
+
+#include <string.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/hash.h>
+
+#include <libxml/xmlschemas.h>
+#include <libxml/schemasInternals.h>
+#include <libxml/xmlschemastypes.h>
+#include <libxml/xmlautomata.h>
+#include <libxml/xmlregexp.h>
+
+#define DEBUG 1 /* very verobose output */
+/* #define DEBUG_CONTENT 1 */
+/* #define DEBUG_AUTOMATA 1 */
+
+#define UNBOUNDED (1 << 30)
+#define TODO \
+ xmlGenericError(xmlGenericErrorContext, \
+ "Unimplemented block at %s:%d\n", \
+ __FILE__, __LINE__);
+
+/*
+ * The XML Schemas namespaces
+ */
+static const xmlChar *xmlSchemaNs = (const xmlChar *)
+ "http://www.w3.org/2001/XMLSchema";
+
+static const xmlChar *xmlSchemaInstanceNs = (const xmlChar *)
+ "http://www.w3.org/2001/XMLSchema-instance";
+
+#define IS_SCHEMA(node, type) \
+ ((node != NULL) && (node->ns != NULL) && \
+ (xmlStrEqual(node->name, (const xmlChar *) type)) && \
+ (xmlStrEqual(node->ns->href, xmlSchemaNs)))
+
+#define XML_SCHEMAS_PARSE_ERROR 1
+
+struct _xmlSchemaParserCtxt {
+ void *userData; /* user specific data block */
+ xmlSchemaValidityErrorFunc error; /* the callback in case of errors */
+ xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */
+
+ xmlSchemaPtr schema; /* The schema in use */
+ xmlChar *container; /* the current element, group, ... */
+ int counter;
+
+ xmlChar *URL;
+ xmlDocPtr doc;
+
+ /*
+ * Used to build complex element content models
+ */
+ xmlAutomataPtr am;
+ xmlAutomataStatePtr start;
+ xmlAutomataStatePtr end;
+ xmlAutomataStatePtr state;
+};
+
+
+#define XML_SCHEMAS_ATTR_UNKNOWN 1
+#define XML_SCHEMAS_ATTR_CHECKED 2
+
+typedef struct _xmlSchemaAttrState xmlSchemaAttrState;
+typedef xmlSchemaAttrState *xmlSchemaAttrStatePtr;
+struct _xmlSchemaAttrState {
+ xmlAttrPtr attr;
+ int state;
+};
+
+/**
+ * xmlSchemaValidCtxt:
+ *
+ * A Schemas validation context
+ */
+
+struct _xmlSchemaValidCtxt {
+ void *userData; /* user specific data block */
+ xmlSchemaValidityErrorFunc error; /* the callback in case of errors */
+ xmlSchemaValidityWarningFunc warning;/* the callback in case of warning */
+
+ xmlSchemaPtr schema; /* The schema in use */
+ xmlDocPtr doc;
+ xmlParserInputBufferPtr input;
+ xmlCharEncoding enc;
+ xmlSAXHandlerPtr sax;
+ void *user_data;
+
+ xmlDocPtr myDoc;
+ int err;
+
+ xmlNodePtr node;
+ xmlSchemaTypePtr type;
+
+ xmlRegExecCtxtPtr regexp;
+ xmlSchemaValPtr value;
+
+ int attrNr;
+ int attrBase;
+ int attrMax;
+ xmlSchemaAttrStatePtr attr;
+};
+
+
+/************************************************************************
+ * *
+ * Some predeclarations *
+ * *
+ ************************************************************************/
+static int xmlSchemaValidateSimpleValue(xmlSchemaValidCtxtPtr ctxt,
+ xmlSchemaTypePtr type,
+ xmlChar *value);
+
+/************************************************************************
+ * *
+ * Allocation functions *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaNewSchema:
+ * @ctxt: a schema validation context (optional)
+ *
+ * Allocate a new Schema structure.
+ *
+ * Returns the newly allocated structure or NULL in case or error
+ */
+static xmlSchemaPtr
+xmlSchemaNewSchema(xmlSchemaParserCtxtPtr ctxt)
+{
+ xmlSchemaPtr ret;
+
+ ret = (xmlSchemaPtr) xmlMalloc(sizeof(xmlSchema));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchema));
+
+ return (ret);
+}
+
+/**
+ * xmlSchemaNewFacet:
+ * @ctxt: a schema validation context (optional)
+ *
+ * Allocate a new Facet structure.
+ *
+ * Returns the newly allocated structure or NULL in case or error
+ */
+static xmlSchemaFacetPtr
+xmlSchemaNewFacet(xmlSchemaParserCtxtPtr ctxt)
+{
+ xmlSchemaFacetPtr ret;
+
+ ret = (xmlSchemaFacetPtr) xmlMalloc(sizeof(xmlSchemaFacet));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaFacet));
+
+ return (ret);
+}
+
+/**
+ * xmlSchemaNewAnnot:
+ * @ctxt: a schema validation context (optional)
+ * @node: a node
+ *
+ * Allocate a new annotation structure.
+ *
+ * Returns the newly allocated structure or NULL in case or error
+ */
+static xmlSchemaAnnotPtr
+xmlSchemaNewAnnot(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node)
+{
+ xmlSchemaAnnotPtr ret;
+
+ ret = (xmlSchemaAnnotPtr) xmlMalloc(sizeof(xmlSchemaAnnot));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaAnnot));
+ ret->content = node;
+ return (ret);
+}
+
+/**
+ * xmlSchemaFreeNotation:
+ * @schema: a schema notation structure
+ *
+ * Deallocate a Schema Notation structure.
+ */
+static void
+xmlSchemaFreeNotation(xmlSchemaNotationPtr nota)
+{
+ if (nota == NULL)
+ return;
+ if (nota->name != NULL)
+ xmlFree((xmlChar *) nota->name);
+ xmlFree(nota);
+}
+
+/**
+ * xmlSchemaFreeAttribute:
+ * @schema: a schema attribute structure
+ *
+ * Deallocate a Schema Attribute structure.
+ */
+static void
+xmlSchemaFreeAttribute(xmlSchemaAttributePtr attr)
+{
+ if (attr == NULL)
+ return;
+ if (attr->name != NULL)
+ xmlFree((xmlChar *) attr->name);
+ if (attr->ref != NULL)
+ xmlFree((xmlChar *) attr->ref);
+ if (attr->refNs != NULL)
+ xmlFree((xmlChar *) attr->refNs);
+ if (attr->typeName != NULL)
+ xmlFree((xmlChar *) attr->typeName);
+ if (attr->typeNs != NULL)
+ xmlFree((xmlChar *) attr->typeNs);
+ xmlFree(attr);
+}
+
+/**
+ * xmlSchemaFreeAttributeGroup:
+ * @schema: a schema attribute group structure
+ *
+ * Deallocate a Schema Attribute Group structure.
+ */
+static void
+xmlSchemaFreeAttributeGroup(xmlSchemaAttributeGroupPtr attr)
+{
+ if (attr == NULL)
+ return;
+ if (attr->name != NULL)
+ xmlFree((xmlChar *) attr->name);
+ xmlFree(attr);
+}
+
+/**
+ * xmlSchemaFreeElement:
+ * @schema: a schema element structure
+ *
+ * Deallocate a Schema Element structure.
+ */
+static void
+xmlSchemaFreeElement(xmlSchemaElementPtr elem)
+{
+ if (elem == NULL)
+ return;
+ if (elem->name != NULL)
+ xmlFree((xmlChar *) elem->name);
+ if (elem->namedType != NULL)
+ xmlFree((xmlChar *) elem->namedType);
+ if (elem->namedTypeNs != NULL)
+ xmlFree((xmlChar *) elem->namedTypeNs);
+ if (elem->ref != NULL)
+ xmlFree((xmlChar *) elem->ref);
+ if (elem->refNs != NULL)
+ xmlFree((xmlChar *) elem->refNs);
+ if (elem->contModel != NULL)
+ xmlRegFreeRegexp(elem->contModel);
+ xmlFree(elem);
+}
+
+/**
+ * xmlSchemaFreeFacet:
+ * @facet: a schema facet structure
+ *
+ * Deallocate a Schema Facet structure.
+ */
+static void
+xmlSchemaFreeFacet(xmlSchemaFacetPtr facet)
+{
+ if (facet == NULL)
+ return;
+ if (facet->value != NULL)
+ xmlFree((xmlChar *) facet->value);
+ if (facet->id != NULL)
+ xmlFree((xmlChar *) facet->id);
+ if (facet->val != NULL)
+ xmlSchemaFreeValue(facet->val);
+ if (facet->regexp != NULL)
+ xmlRegFreeRegexp(facet->regexp);
+ xmlFree(facet);
+}
+
+/**
+ * xmlSchemaFreeType:
+ * @type: a schema type structure
+ *
+ * Deallocate a Schema Type structure.
+ */
+void
+xmlSchemaFreeType(xmlSchemaTypePtr type)
+{
+ if (type == NULL)
+ return;
+ if (type->name != NULL)
+ xmlFree((xmlChar *) type->name);
+ if (type->base != NULL)
+ xmlFree((xmlChar *) type->base);
+ if (type->baseNs != NULL)
+ xmlFree((xmlChar *) type->baseNs);
+ if (type->annot != NULL)
+ xmlFree((xmlChar *) type->annot);
+ if (type->facets != NULL) {
+ xmlSchemaFacetPtr facet, next;
+
+ facet = type->facets;
+ while (facet != NULL) {
+ next = facet->next;
+ xmlSchemaFreeFacet(facet);
+ facet = next;
+ }
+ }
+ xmlFree(type);
+}
+
+/**
+ * xmlSchemaFreeAnnot:
+ * @annot: a schema type structure
+ *
+ * Deallocate a annotation structure
+ */
+static void
+xmlSchemaFreeAnnot(xmlSchemaAnnotPtr annot)
+{
+ if (annot == NULL)
+ return;
+ xmlFree(annot);
+}
+
+/**
+ * xmlSchemaFree:
+ * @schema: a schema structure
+ *
+ * Deallocate a Schema structure.
+ */
+void
+xmlSchemaFree(xmlSchemaPtr schema)
+{
+ if (schema == NULL)
+ return;
+
+ if (schema->name != NULL)
+ xmlFree((xmlChar *) schema->name);
+ if (schema->notaDecl != NULL)
+ xmlHashFree(schema->notaDecl,
+ (xmlHashDeallocator) xmlSchemaFreeNotation);
+ if (schema->attrDecl != NULL)
+ xmlHashFree(schema->attrDecl,
+ (xmlHashDeallocator) xmlSchemaFreeAttribute);
+ if (schema->attrgrpDecl != NULL)
+ xmlHashFree(schema->attrgrpDecl,
+ (xmlHashDeallocator) xmlSchemaFreeAttributeGroup);
+ if (schema->elemDecl != NULL)
+ xmlHashFree(schema->elemDecl,
+ (xmlHashDeallocator) xmlSchemaFreeElement);
+ if (schema->typeDecl != NULL)
+ xmlHashFree(schema->typeDecl,
+ (xmlHashDeallocator) xmlSchemaFreeType);
+ if (schema->annot != NULL)
+ xmlSchemaFreeAnnot(schema->annot);
+ if (schema->doc != NULL)
+ xmlFreeDoc(schema->doc);
+
+ xmlFree(schema);
+}
+
+/************************************************************************
+ * *
+ * Error functions *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaErrorContext:
+ * @ctxt: the parsing context
+ * @schema: the schema being built
+ * @node: the node being processed
+ * @child: the child being processed
+ *
+ * Dump a SchemaType structure
+ */
+static void
+xmlSchemaErrorContext(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node, xmlNodePtr child)
+{
+ int line = 0;
+ const xmlChar *file = NULL;
+ const xmlChar *name = NULL;
+ const char *type = "error";
+
+ if ((ctxt == NULL) || (ctxt->error == NULL))
+ return;
+
+ if (child != NULL)
+ node = child;
+
+ if (node != NULL) {
+ if ((node->type == XML_DOCUMENT_NODE) ||
+ (node->type == XML_HTML_DOCUMENT_NODE)) {
+ xmlDocPtr doc = (xmlDocPtr) node;
+
+ file = doc->URL;
+ } else {
+ /*
+ * Try to find contextual informations to report
+ */
+ if (node->type == XML_ELEMENT_NODE) {
+ line = (int) node->content;
+ } else if ((node->prev != NULL) &&
+ (node->prev->type == XML_ELEMENT_NODE)) {
+ line = (int) node->prev->content;
+ } else if ((node->parent != NULL) &&
+ (node->parent->type == XML_ELEMENT_NODE)) {
+ line = (int) node->parent->content;
+ }
+ if ((node->doc != NULL) && (node->doc->URL != NULL))
+ file = node->doc->URL;
+ if (node->name != NULL)
+ name = node->name;
+ }
+ }
+
+ if (ctxt != NULL)
+ type = "compilation error";
+ else if (schema != NULL)
+ type = "runtime error";
+
+ if ((file != NULL) && (line != 0) && (name != NULL))
+ ctxt->error(ctxt->userData, "%s: file %s line %d element %s\n",
+ type, file, line, name);
+ else if ((file != NULL) && (name != NULL))
+ ctxt->error(ctxt->userData, "%s: file %s element %s\n",
+ type, file, name);
+ else if ((file != NULL) && (line != 0))
+ ctxt->error(ctxt->userData, "%s: file %s line %d\n", type, file, line);
+ else if (file != NULL)
+ ctxt->error(ctxt->userData, "%s: file %s\n", type, file);
+ else if (name != NULL)
+ ctxt->error(ctxt->userData, "%s: element %s\n", type, name);
+ else
+ ctxt->error(ctxt->userData, "%s\n", type);
+}
+
+/************************************************************************
+ * *
+ * Debug functions *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaElementDump:
+ * @elem: an element
+ * @output: the file output
+ *
+ * Dump the element
+ */
+static void
+xmlSchemaElementDump(xmlSchemaElementPtr elem, FILE * output,
+ ATTRIBUTE_UNUSED const xmlChar *name,
+ ATTRIBUTE_UNUSED const xmlChar *context,
+ ATTRIBUTE_UNUSED const xmlChar *namespace)
+{
+ if (elem == NULL)
+ return;
+
+ fprintf(output, "Element ");
+ if (elem->flags & XML_SCHEMAS_ELEM_TOPLEVEL)
+ fprintf(output, "toplevel ");
+ fprintf(output, ": %s ", elem->name);
+ if (namespace != NULL)
+ fprintf(output, "namespace '%s' ", namespace);
+
+ if (elem->flags & XML_SCHEMAS_ELEM_NILLABLE)
+ fprintf(output, "nillable ");
+ if (elem->flags & XML_SCHEMAS_ELEM_GLOBAL)
+ fprintf(output, "global ");
+ if (elem->flags & XML_SCHEMAS_ELEM_DEFAULT)
+ fprintf(output, "default ");
+ if (elem->flags & XML_SCHEMAS_ELEM_FIXED)
+ fprintf(output, "fixed ");
+ if (elem->flags & XML_SCHEMAS_ELEM_ABSTRACT)
+ fprintf(output, "abstract ");
+ if (elem->flags & XML_SCHEMAS_ELEM_REF)
+ fprintf(output, "ref '%s' ", elem->ref);
+ if (elem->id != NULL)
+ fprintf(output, "id '%s' ", elem->id);
+ fprintf(output, "\n");
+ if ((elem->minOccurs != 1) || (elem->maxOccurs != 1)) {
+ fprintf(output, " ");
+ if (elem->minOccurs != 1)
+ fprintf(output, "min: %d ", elem->minOccurs);
+ if (elem->maxOccurs >= UNBOUNDED)
+ fprintf(output, "max: unbounded\n");
+ else if (elem->maxOccurs != 1)
+ fprintf(output, "max: %d\n", elem->maxOccurs);
+ else
+ fprintf(output, "\n");
+ }
+ if (elem->namedType != NULL) {
+ fprintf(output, " type: %s", elem->namedType);
+ if (elem->namedTypeNs != NULL)
+ fprintf(output, " ns %s\n", elem->namedTypeNs);
+ else
+ fprintf(output, "\n");
+ }
+ if (elem->substGroup != NULL) {
+ fprintf(output, " substitutionGroup: %s", elem->substGroup);
+ if (elem->substGroupNs != NULL)
+ fprintf(output, " ns %s\n", elem->substGroupNs);
+ else
+ fprintf(output, "\n");
+ }
+ if (elem->value != NULL)
+ fprintf(output, " default: %s", elem->value);
+}
+
+/**
+ * xmlSchemaAnnotDump:
+ * @output: the file output
+ * @annot: a annotation
+ *
+ * Dump the annotation
+ */
+static void
+xmlSchemaAnnotDump(FILE * output, xmlSchemaAnnotPtr annot)
+{
+ xmlChar *content;
+
+ if (annot == NULL)
+ return;
+
+ content = xmlNodeGetContent(annot->content);
+ if (content != NULL) {
+ fprintf(output, " Annot: %s\n", content);
+ xmlFree(content);
+ } else
+ fprintf(output, " Annot: empty\n");
+}
+
+/**
+ * xmlSchemaTypeDump:
+ * @output: the file output
+ * @type: a type structure
+ *
+ * Dump a SchemaType structure
+ */
+static void
+xmlSchemaTypeDump(xmlSchemaTypePtr type, FILE * output)
+{
+ if (type == NULL) {
+ fprintf(output, "Type: NULL\n");
+ return;
+ }
+ fprintf(output, "Type: ");
+ if (type->name != NULL)
+ fprintf(output, "%s, ", type->name);
+ else
+ fprintf(output, "no name");
+ switch (type->type) {
+ case XML_SCHEMA_TYPE_BASIC:
+ fprintf(output, "basic ");
+ break;
+ case XML_SCHEMA_TYPE_SIMPLE:
+ fprintf(output, "simple ");
+ break;
+ case XML_SCHEMA_TYPE_COMPLEX:
+ fprintf(output, "complex ");
+ break;
+ case XML_SCHEMA_TYPE_SEQUENCE:
+ fprintf(output, "sequence ");
+ break;
+ case XML_SCHEMA_TYPE_CHOICE:
+ fprintf(output, "choice ");
+ break;
+ case XML_SCHEMA_TYPE_ALL:
+ fprintf(output, "all ");
+ break;
+ case XML_SCHEMA_TYPE_UR:
+ fprintf(output, "ur ");
+ break;
+ case XML_SCHEMA_TYPE_RESTRICTION:
+ fprintf(output, "restriction ");
+ break;
+ case XML_SCHEMA_TYPE_EXTENSION:
+ fprintf(output, "extension ");
+ break;
+ default:
+ fprintf(output, "unknowntype%d ", type->type);
+ break;
+ }
+ if (type->base != NULL) {
+ fprintf(output, "base %s, ", type->base);
+ }
+ switch (type->contentType) {
+ case XML_SCHEMA_CONTENT_UNKNOWN:
+ fprintf(output, "unknown ");
+ break;
+ case XML_SCHEMA_CONTENT_EMPTY:
+ fprintf(output, "empty ");
+ break;
+ case XML_SCHEMA_CONTENT_ELEMENTS:
+ fprintf(output, "element ");
+ break;
+ case XML_SCHEMA_CONTENT_MIXED:
+ fprintf(output, "mixed ");
+ break;
+ case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS:
+ fprintf(output, "mixed_or_elems ");
+ break;
+ case XML_SCHEMA_CONTENT_BASIC:
+ fprintf(output, "basic ");
+ break;
+ case XML_SCHEMA_CONTENT_SIMPLE:
+ fprintf(output, "simple ");
+ break;
+ }
+ fprintf(output, "\n");
+ if ((type->minOccurs != 1) || (type->maxOccurs != 1)) {
+ fprintf(output, " ");
+ if (type->minOccurs != 1)
+ fprintf(output, "min: %d ", type->minOccurs);
+ if (type->maxOccurs >= UNBOUNDED)
+ fprintf(output, "max: unbounded\n");
+ else if (type->maxOccurs != 1)
+ fprintf(output, "max: %d\n", type->maxOccurs);
+ else
+ fprintf(output, "\n");
+ }
+ if (type->annot != NULL)
+ xmlSchemaAnnotDump(output, type->annot);
+ if (type->subtypes != NULL) {
+ xmlSchemaTypePtr sub = type->subtypes;
+
+ fprintf(output, " subtypes: ");
+ while (sub != NULL) {
+ fprintf(output, "%s ", sub->name);
+ sub = sub->next;
+ }
+ fprintf(output, "\n");
+ }
+
+}
+
+/**
+ * xmlSchemaDump:
+ * @output: the file output
+ * @schema: a schema structure
+ *
+ * Dump a Schema structure.
+ */
+void
+xmlSchemaDump(FILE * output, xmlSchemaPtr schema)
+{
+ if (schema == NULL) {
+ fprintf(output, "Schemas: NULL\n");
+ return;
+ }
+ fprintf(output, "Schemas: ");
+ if (schema->name != NULL)
+ fprintf(output, "%s, ", schema->name);
+ else
+ fprintf(output, "no name, ");
+ if (schema->targetNamespace != NULL)
+ fprintf(output, "%s", schema->targetNamespace);
+ else
+ fprintf(output, "no target namespace");
+ fprintf(output, "\n");
+ if (schema->annot != NULL)
+ xmlSchemaAnnotDump(output, schema->annot);
+
+ xmlHashScan(schema->typeDecl, (xmlHashScanner) xmlSchemaTypeDump,
+ output);
+ xmlHashScanFull(schema->elemDecl,
+ (xmlHashScannerFull) xmlSchemaElementDump, output);
+}
+
+/************************************************************************
+ * *
+ * Parsing functions *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaGetType:
+ * @schema: the schemas context
+ * @name: the type name
+ * @ns: the type namespace
+ *
+ * Lookup a type in the schemas or the predefined types
+ *
+ * Returns 1 if the string is NULL or made of blanks chars, 0 otherwise
+ */
+static xmlSchemaTypePtr
+xmlSchemaGetType(xmlSchemaPtr schema, const xmlChar * name,
+ const xmlChar * namespace) {
+ xmlSchemaTypePtr ret;
+
+ if (name == NULL)
+ return(NULL);
+ if (schema != NULL) {
+ ret = xmlHashLookup2(schema->typeDecl, name, namespace);
+ if (ret != NULL)
+ return(ret);
+ }
+ ret = xmlSchemaGetPredefinedType(name, namespace);
+#ifdef DEBUG
+ if (ret == NULL) {
+ if (namespace == NULL)
+ fprintf(stderr, "Unable to lookup type %s", name);
+ else
+ fprintf(stderr, "Unable to lookup type %s:%s", name, namespace);
+ }
+#endif
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * Parsing functions *
+ * *
+ ************************************************************************/
+
+#define IS_BLANK_NODE(n) \
+ (((n)->type == XML_TEXT_NODE) && (xmlSchemaIsBlank((n)->content)))
+
+/**
+ * xmlSchemaIsBlank:
+ * @str: a string
+ *
+ * Check if a string is ignorable
+ *
+ * Returns 1 if the string is NULL or made of blanks chars, 0 otherwise
+ */
+static int
+xmlSchemaIsBlank(xmlChar *str) {
+ if (str == NULL)
+ return(1);
+ while (*str != 0) {
+ if (!(IS_BLANK(*str))) return(0);
+ str++;
+ }
+ return(1);
+}
+
+/**
+ * xmlSchemaAddNotation:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @name: the item name
+ *
+ * Add an XML schema Attrribute declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the new struture or NULL in case of error
+ */
+static xmlSchemaNotationPtr
+xmlSchemaAddNotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ const xmlChar * name)
+{
+ xmlSchemaNotationPtr ret = NULL;
+ int val;
+
+ if ((ctxt == NULL) || (schema == NULL) || (name == NULL))
+ return (NULL);
+
+ if (schema->notaDecl == NULL)
+ schema->notaDecl = xmlHashCreate(10);
+ if (schema->notaDecl == NULL)
+ return (NULL);
+
+ ret = (xmlSchemaNotationPtr) xmlMalloc(sizeof(xmlSchemaNotation));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaNotation));
+ ret->name = xmlStrdup(name);
+ val = xmlHashAddEntry2(schema->notaDecl, name, schema->targetNamespace,
+ ret);
+ if (val != 0) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Could not add notation %s\n",
+ name);
+ xmlFree((char *) ret->name);
+ xmlFree(ret);
+ return (NULL);
+ }
+ return (ret);
+}
+
+
+/**
+ * xmlSchemaAddAttribute:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @name: the item name
+ * @container: the container's name
+ *
+ * Add an XML schema Attrribute declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the new struture or NULL in case of error
+ */
+static xmlSchemaAttributePtr
+xmlSchemaAddAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ const xmlChar * name)
+{
+ xmlSchemaAttributePtr ret = NULL;
+ int val;
+
+ if ((ctxt == NULL) || (schema == NULL) || (name == NULL))
+ return (NULL);
+
+ if (schema->attrDecl == NULL)
+ schema->attrDecl = xmlHashCreate(10);
+ if (schema->attrDecl == NULL)
+ return (NULL);
+
+ ret = (xmlSchemaAttributePtr) xmlMalloc(sizeof(xmlSchemaAttribute));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaAttribute));
+ ret->name = xmlStrdup(name);
+ val = xmlHashAddEntry3(schema->attrDecl, name,
+ schema->targetNamespace, ctxt->container, ret);
+ if (val != 0) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Could not add attribute %s\n",
+ name);
+ xmlFree((char *) ret->name);
+ xmlFree(ret);
+ return (NULL);
+ }
+ return (ret);
+}
+
+/**
+ * xmlSchemaAddAttributeGroup:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @name: the item name
+ *
+ * Add an XML schema Attrribute Group declaration
+ *
+ * Returns the new struture or NULL in case of error
+ */
+static xmlSchemaAttributeGroupPtr
+xmlSchemaAddAttributeGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ const xmlChar * name)
+{
+ xmlSchemaAttributeGroupPtr ret = NULL;
+ int val;
+
+ if ((ctxt == NULL) || (schema == NULL) || (name == NULL))
+ return (NULL);
+
+ if (schema->attrgrpDecl == NULL)
+ schema->attrgrpDecl = xmlHashCreate(10);
+ if (schema->attrgrpDecl == NULL)
+ return (NULL);
+
+ ret = (xmlSchemaAttributeGroupPtr) xmlMalloc(sizeof(xmlSchemaAttributeGroup));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaAttributeGroup));
+ ret->name = xmlStrdup(name);
+ val = xmlHashAddEntry3(schema->attrgrpDecl, name,
+ schema->targetNamespace, ctxt->container, ret);
+ if (val != 0) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Could not add attribute group %s\n",
+ name);
+ xmlFree((char *) ret->name);
+ xmlFree(ret);
+ return (NULL);
+ }
+ return (ret);
+}
+
+/**
+ * xmlSchemaAddElement:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @name: the type name
+ * @namespace: the type namespace
+ *
+ * Add an XML schema Element declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the new struture or NULL in case of error
+ */
+static xmlSchemaElementPtr
+xmlSchemaAddElement(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ const xmlChar * name, const xmlChar * namespace)
+{
+ xmlSchemaElementPtr ret = NULL;
+ int val;
+
+ if ((ctxt == NULL) || (schema == NULL) || (name == NULL))
+ return (NULL);
+
+ if (schema->elemDecl == NULL)
+ schema->elemDecl = xmlHashCreate(10);
+ if (schema->elemDecl == NULL)
+ return (NULL);
+
+ ret = (xmlSchemaElementPtr) xmlMalloc(sizeof(xmlSchemaElement));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaElement));
+ ret->name = xmlStrdup(name);
+ val = xmlHashAddEntry3(schema->elemDecl, name,
+ namespace, ctxt->container, ret);
+ if (val != 0) {
+ char buf[100];
+
+ snprintf(buf, 99, "privatieelem%d", ctxt->counter++ + 1);
+ val = xmlHashAddEntry3(schema->elemDecl, name, (xmlChar *) buf,
+ namespace, ret);
+ if (val != 0) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Could not add element %s\n",
+ name);
+ xmlFree((char *) ret->name);
+ xmlFree(ret);
+ return (NULL);
+ }
+ }
+ return (ret);
+}
+
+/**
+ * xmlSchemaAddType:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @name: the item name
+ *
+ * Add an XML schema Simple Type definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the new struture or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaAddType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ const xmlChar * name)
+{
+ xmlSchemaTypePtr ret = NULL;
+ int val;
+
+ if ((ctxt == NULL) || (schema == NULL) || (name == NULL))
+ return (NULL);
+
+ if (schema->typeDecl == NULL)
+ schema->typeDecl = xmlHashCreate(10);
+ if (schema->typeDecl == NULL)
+ return (NULL);
+
+ ret = (xmlSchemaTypePtr) xmlMalloc(sizeof(xmlSchemaType));
+ if (ret == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Out of memory\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaType));
+ ret->name = xmlStrdup(name);
+ val = xmlHashAddEntry2(schema->typeDecl, name, schema->targetNamespace,
+ ret);
+ if (val != 0) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Could not add type %s\n", name);
+ xmlFree((char *) ret->name);
+ xmlFree(ret);
+ return (NULL);
+ }
+ ret->minOccurs = 1;
+ ret->maxOccurs = 1;
+
+ return (ret);
+}
+
+/************************************************************************
+ * *
+ * Utilities for parsing *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlGetQNameProp:
+ * @ctxt: a schema validation context
+ * @node: a subtree containing XML Schema informations
+ * @name: the attribute name
+ * @namespace: the result namespace if any
+ *
+ * Extract a QName Attribute value
+ *
+ * Returns the NCName or NULL if not found, and also update @namespace
+ * with the namespace URI
+ */
+static xmlChar *
+xmlGetQNameProp(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node,
+ const char *name,
+ xmlChar **namespace) {
+ xmlChar *val, *ret, *prefix;
+ xmlNsPtr ns;
+
+
+ if (namespace != NULL)
+ *namespace = NULL;
+ val = xmlGetProp(node, (const xmlChar *) name);
+ if (val == NULL)
+ return(NULL);
+
+ ret = xmlSplitQName2(val, &prefix);
+ if (ret == NULL)
+ return(val);
+ xmlFree(val);
+
+ ns = xmlSearchNs(node->doc, node, prefix);
+ if (ns == NULL) {
+ xmlSchemaErrorContext(ctxt, NULL, node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Attribute %s: the QName prefix %s is undefined\n",
+ name, prefix);
+ } else {
+ *namespace = xmlStrdup(ns->href);
+ }
+ xmlFree(prefix);
+ return(ret);
+}
+
+/**
+ * xmlGetMaxOccurs:
+ * @ctxt: a schema validation context
+ * @node: a subtree containing XML Schema informations
+ *
+ * Get the maxOccurs property
+ *
+ * Returns the default if not found, or the value
+ */
+static int
+xmlGetMaxOccurs(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) {
+ xmlChar *val, *cur;
+ int ret = 0;
+
+ val = xmlGetProp(node, (const xmlChar *) "maxOccurs");
+ if (val == NULL)
+ return(1);
+
+ if (xmlStrEqual(val, (const xmlChar *) "unbounded")) {
+ xmlFree(val);
+ return(UNBOUNDED); /* encoding it with -1 might be another option */
+ }
+
+ cur = val;
+ while (IS_BLANK(*cur)) cur++;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ ret = ret * 10 + (*cur - '0');
+ cur++;
+ }
+ while (IS_BLANK(*cur)) cur++;
+ if (*cur != 0) {
+ xmlSchemaErrorContext(ctxt, NULL, node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "invalid value for minOccurs: %s\n",
+ val);
+ xmlFree(val);
+ return(1);
+ }
+ xmlFree(val);
+ return(ret);
+}
+
+/**
+ * xmlGetMinOccurs:
+ * @ctxt: a schema validation context
+ * @node: a subtree containing XML Schema informations
+ *
+ * Get the minOccurs property
+ *
+ * Returns the default if not found, or the value
+ */
+static int
+xmlGetMinOccurs(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node) {
+ xmlChar *val, *cur;
+ int ret = 0;
+
+ val = xmlGetProp(node, (const xmlChar *) "minOccurs");
+ if (val == NULL)
+ return(1);
+
+ cur = val;
+ while (IS_BLANK(*cur)) cur++;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ ret = ret * 10 + (*cur - '0');
+ cur++;
+ }
+ while (IS_BLANK(*cur)) cur++;
+ if (*cur != 0) {
+ xmlSchemaErrorContext(ctxt, NULL, node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "invalid value for minOccurs: %s\n",
+ val);
+ xmlFree(val);
+ return(1);
+ }
+ xmlFree(val);
+ return(ret);
+}
+
+/**
+ * xmlGetBooleanProp:
+ * @ctxt: a schema validation context
+ * @node: a subtree containing XML Schema informations
+ * @name: the attribute name
+ * @def: the default value
+ *
+ * Get is a bolean property is set
+ *
+ * Returns the default if not found, 0 if found to be false,
+ * 1 if found to be true
+ */
+static int
+xmlGetBooleanProp(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node,
+ const char *name, int def) {
+ xmlChar *val;
+
+ val = xmlGetProp(node, (const xmlChar *) name);
+ if (val == NULL)
+ return(def);
+
+ if (xmlStrEqual(val, BAD_CAST"true"))
+ def = 1;
+ else if (xmlStrEqual(val, BAD_CAST"false"))
+ def = 0;
+ else {
+ xmlSchemaErrorContext(ctxt, NULL, node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Attribute %s: the value %s is not boolean\n",
+ name, val);
+ }
+ xmlFree(val);
+ return(def);
+}
+
+/************************************************************************
+ * *
+ * Shema extraction from an Infoset *
+ * *
+ ************************************************************************/
+static xmlSchemaTypePtr xmlSchemaParseSimpleType(xmlSchemaParserCtxtPtr
+ ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaTypePtr xmlSchemaParseComplexType(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaTypePtr xmlSchemaParseRestriction(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node,
+ int simple);
+static xmlSchemaTypePtr xmlSchemaParseSequence(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaTypePtr xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaAttributePtr xmlSchemaParseAttribute(xmlSchemaParserCtxtPtr
+ ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaAttributeGroupPtr
+xmlSchemaParseAttributeGroup(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema, xmlNodePtr node);
+static xmlSchemaTypePtr xmlSchemaParseChoice(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaTypePtr xmlSchemaParseList(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaPtr schema,
+ xmlNodePtr node);
+static xmlSchemaAttributePtr
+xmlSchemaParseAnyAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node);
+
+/**
+ * xmlSchemaParseAttrDecls:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ * @type: the hosting type
+ *
+ * parse a XML schema attrDecls declaration corresponding to
+ * <!ENTITY % attrDecls
+ * '((%attribute;| %attributeGroup;)*,(%anyAttribute;)?)'>
+ */
+static xmlNodePtr
+xmlSchemaParseAttrDecls(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr child, xmlSchemaTypePtr type)
+{
+ xmlSchemaAttributePtr lastattr, attr;
+
+ lastattr = NULL;
+ while ((IS_SCHEMA(child, "attribute")) ||
+ (IS_SCHEMA(child, "attributeGroup"))) {
+ attr = NULL;
+ if (IS_SCHEMA(child, "attribute")) {
+ attr = xmlSchemaParseAttribute(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "attributeGroup")) {
+ attr = (xmlSchemaAttributePtr)
+ xmlSchemaParseAttributeGroup(ctxt, schema, child);
+ }
+ if (attr != NULL) {
+ if (lastattr == NULL) {
+ type->attributes = attr;
+ lastattr = attr
+ ;
+ } else {
+ lastattr->next = attr;
+ lastattr = attr;
+ }
+ }
+ child = child->next;
+ }
+ if (IS_SCHEMA(child, "anyAttribute")) {
+ attr = xmlSchemaParseAnyAttribute(ctxt, schema, child);
+ if (attr != NULL) {
+ if (lastattr == NULL) {
+ type->attributes = attr;
+ lastattr = attr
+ ;
+ } else {
+ lastattr->next = attr;
+ lastattr = attr;
+ }
+ }
+ child = child->next;
+ }
+ return(child);
+}
+
+/**
+ * xmlSchemaParseAnnotation:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Attrribute declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaAnnotPtr
+xmlSchemaParseAnnotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaAnnotPtr ret;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+ ret = xmlSchemaNewAnnot(ctxt, node);
+
+ return (ret);
+}
+
+/**
+ * xmlSchemaParseFacet:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Facet declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the new type structure or NULL in case of error
+ */
+static xmlSchemaFacetPtr
+xmlSchemaParseFacet(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaFacetPtr facet;
+ xmlNodePtr child = NULL;
+ xmlChar *value;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+ facet = xmlSchemaNewFacet(ctxt);
+ if (facet == NULL)
+ return (NULL);
+ facet->node = node;
+ value = xmlGetProp(node, (const xmlChar *) "value");
+ if (value == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Facet %s has no value\n", node->name);
+ xmlSchemaFreeFacet(facet);
+ return (NULL);
+ }
+ if (IS_SCHEMA(node, "minInclusive")) {
+ facet->type = XML_SCHEMA_FACET_MININCLUSIVE;
+ } else if (IS_SCHEMA(node, "minExclusive")) {
+ facet->type = XML_SCHEMA_FACET_MINEXCLUSIVE;
+ } else if (IS_SCHEMA(node, "maxInclusive")) {
+ facet->type = XML_SCHEMA_FACET_MAXINCLUSIVE;
+ } else if (IS_SCHEMA(node, "maxExclusive")) {
+ facet->type = XML_SCHEMA_FACET_MAXEXCLUSIVE;
+ } else if (IS_SCHEMA(node, "totalDigits")) {
+ facet->type = XML_SCHEMA_FACET_TOTALDIGITS;
+ } else if (IS_SCHEMA(node, "fractionDigits")) {
+ facet->type = XML_SCHEMA_FACET_FRACTIONDIGITS;
+ } else if (IS_SCHEMA(node, "pattern")) {
+ facet->type = XML_SCHEMA_FACET_PATTERN;
+ } else if (IS_SCHEMA(node, "enumeration")) {
+ facet->type = XML_SCHEMA_FACET_ENUMERATION;
+ } else if (IS_SCHEMA(node, "whiteSpace")) {
+ facet->type = XML_SCHEMA_FACET_WHITESPACE;
+ } else if (IS_SCHEMA(node, "length")) {
+ facet->type = XML_SCHEMA_FACET_LENGTH;
+ } else if (IS_SCHEMA(node, "maxLength")) {
+ facet->type = XML_SCHEMA_FACET_MAXLENGTH;
+ } else if (IS_SCHEMA(node, "minLength")) {
+ facet->type = XML_SCHEMA_FACET_MINLENGTH;
+ } else {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Unknown facet type %s\n", node->name);
+ xmlSchemaFreeFacet(facet);
+ return(NULL);
+ }
+ facet->id = xmlGetProp(node, (const xmlChar *) "id");
+ facet->value = value;
+ child = node->children;
+
+ if (IS_SCHEMA(child, "annotation")) {
+ facet->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Facet %s has unexpected child content\n",
+ node->name);
+ }
+ return (facet);
+}
+
+/**
+ * xmlSchemaParseAny:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Any declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the new type structure or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseAny(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+ snprintf((char *)name, 30, "any %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_ANY;
+ child = node->children;
+ type->minOccurs = xmlGetMinOccurs(ctxt, node);
+ type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
+
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Sequence %s has unexpected content\n",
+ type->name);
+ }
+
+ return (type);
+}
+
+/**
+ * xmlSchemaParseNotation:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Notation declaration
+ *
+ * Returns the new structure or NULL in case of error
+ */
+static xmlSchemaNotationPtr
+xmlSchemaParseNotation(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlChar *name;
+ xmlSchemaNotationPtr ret;
+ xmlNodePtr child = NULL;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Notation has no name\n");
+ return (NULL);
+ }
+ ret = xmlSchemaAddNotation(ctxt, schema, name);
+ if (ret == NULL) {
+ xmlFree(name);
+ return (NULL);
+ }
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "notation %s has unexpected content\n",
+ name);
+ }
+
+ return (ret);
+}
+
+/**
+ * xmlSchemaParseAnyAttribute:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema AnyAttrribute declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns an attribute def structure or NULL
+ */
+static xmlSchemaAttributePtr
+xmlSchemaParseAnyAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlChar *processContents;
+ xmlSchemaAttributePtr ret;
+ xmlNodePtr child = NULL;
+ char name[100];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+ snprintf(name, 99, "anyattr %d", ctxt->counter++ + 1);
+ ret = xmlSchemaAddAttribute(ctxt, schema, (xmlChar *)name);
+ if (ret == NULL) {
+ return (NULL);
+ }
+ ret->id = xmlGetProp(node, (const xmlChar *) "id");
+ processContents = xmlGetProp(node, (const xmlChar *) "processContents");
+ if ((processContents == NULL) ||
+ (xmlStrEqual(processContents, (const xmlChar *)"strict"))) {
+ ret->occurs = XML_SCHEMAS_ANYATTR_STRICT;
+ } else if (xmlStrEqual(processContents, (const xmlChar *)"skip")) {
+ ret->occurs = XML_SCHEMAS_ANYATTR_SKIP;
+ } else if (xmlStrEqual(processContents, (const xmlChar *)"lax")) {
+ ret->occurs = XML_SCHEMAS_ANYATTR_LAX;
+ } else {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "anyAttribute has unexpected content for processContents: %s\n",
+ processContents);
+ ret->occurs = XML_SCHEMAS_ANYATTR_STRICT;
+ }
+ if (processContents != NULL)
+ xmlFree(processContents);
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "anyAttribute %s has unexpected content\n",
+ name);
+ }
+
+ return (ret);
+}
+
+
+/**
+ * xmlSchemaParseAttribute:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Attrribute declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaAttributePtr
+xmlSchemaParseAttribute(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlChar *name, *refNs = NULL, *ref = NULL;
+ xmlSchemaAttributePtr ret;
+ xmlNodePtr child = NULL;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ char buf[100];
+
+ ref = xmlGetQNameProp(ctxt, node, "ref", &refNs);
+ if (ref == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Attribute has no name nor ref\n");
+ return (NULL);
+ }
+ snprintf(buf, 99, "anonattr%d", ctxt->counter++ + 1);
+ name = xmlStrdup((xmlChar *) buf);
+ }
+ ret = xmlSchemaAddAttribute(ctxt, schema, name);
+ if (ret == NULL) {
+ xmlFree(name);
+ if (ref != NULL)
+ xmlFree(ref);
+ return (NULL);
+ }
+ xmlFree(name);
+ ret->ref = ref;
+ ret->refNs = refNs;
+ ret->typeName = xmlGetQNameProp(ctxt, node, "type", &(ret->typeNs));
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (IS_SCHEMA(child, "simpleType")) {
+ ret->base = xmlSchemaParseSimpleType(ctxt, schema, child);
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "attribute %s has unexpected content\n",
+ name);
+ }
+
+ return (ret);
+}
+
+/**
+ * xmlSchemaParseAttributeGroup:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Attribute Group declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the attribute group or NULL in case of error.
+ */
+static xmlSchemaAttributeGroupPtr
+xmlSchemaParseAttributeGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlChar *name, *refNs = NULL, *ref = NULL;
+ xmlSchemaAttributeGroupPtr ret;
+ xmlSchemaAttributePtr last = NULL, attr;
+ xmlNodePtr child = NULL;
+ xmlChar *oldcontainer;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+ oldcontainer = ctxt->container;
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ char buf[100];
+
+ ref = xmlGetQNameProp(ctxt, node, "ref", &refNs);
+ if (ref == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "AttributeGroup has no name nor ref\n");
+ return (NULL);
+ }
+ snprintf(buf, 99, "anonattrgroup%d", ctxt->counter++ + 1);
+ name = xmlStrdup((xmlChar *) buf);
+ }
+ ret = xmlSchemaAddAttributeGroup(ctxt, schema, name);
+ if (ret == NULL) {
+ xmlFree(name);
+ if (ref != NULL)
+ xmlFree(ref);
+ return (NULL);
+ }
+ ret->ref = ref;
+ ret->refNs = refNs;
+ child = node->children;
+ ctxt->container = name;
+ if (IS_SCHEMA(child, "annotation")) {
+ ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ while ((IS_SCHEMA(child, "attribute")) ||
+ (IS_SCHEMA(child, "attributeGroup"))) {
+ attr = NULL;
+ if (IS_SCHEMA(child, "attribute")) {
+ attr = xmlSchemaParseAttribute(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "attributeGroup")) {
+ attr = (xmlSchemaAttributePtr)
+ xmlSchemaParseAttributeGroup(ctxt, schema, child);
+ }
+ if (attr != NULL) {
+ if (last == NULL) {
+ ret->attributes = attr;
+ last = attr;
+ } else {
+ last->next = attr;
+ last = attr;
+ }
+ }
+ child = child->next;
+ }
+ if (IS_SCHEMA(child, "anyAttribute")) {
+ TODO
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "attribute group %s has unexpected content\n",
+ name);
+ }
+
+ ctxt->container = oldcontainer;
+ return (ret);
+}
+
+/**
+ * xmlSchemaParseElement:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Element declaration
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaElementPtr
+xmlSchemaParseElement(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node, int toplevel)
+{
+ xmlChar *name, *refNs = NULL, *ref = NULL, *namespace, *fixed;
+ xmlSchemaElementPtr ret;
+ xmlNodePtr child = NULL;
+ xmlChar *oldcontainer;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+ oldcontainer = ctxt->container;
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ char buf[100];
+
+ ref = xmlGetQNameProp(ctxt, node, "ref", &refNs);
+ if (ref == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Element has no name nor ref\n");
+ return (NULL);
+ }
+ snprintf(buf, 99, "anonelem%d", ctxt->counter++ + 1);
+ name = xmlStrdup((xmlChar *) buf);
+ }
+ namespace = xmlGetProp(node, (const xmlChar *) "targetNamespace");
+ if (namespace == NULL)
+ ret =
+ xmlSchemaAddElement(ctxt, schema, name,
+ schema->targetNamespace);
+ else
+ ret = xmlSchemaAddElement(ctxt, schema, name, namespace);
+ if (namespace != NULL)
+ xmlFree(namespace);
+ if (ret == NULL) {
+ xmlFree(name);
+ if (ref != NULL)
+ xmlFree(ref);
+ return (NULL);
+ }
+ ret->type = XML_SCHEMA_TYPE_ELEMENT;
+ ret->ref = ref;
+ ret->refNs = refNs;
+ if (ref != NULL)
+ ret->flags |= XML_SCHEMAS_ELEM_REF;
+ if (toplevel)
+ ret->flags |= XML_SCHEMAS_ELEM_TOPLEVEL;
+ if (xmlGetBooleanProp(ctxt, node, "nillable", 0))
+ ret->flags |= XML_SCHEMAS_ELEM_NILLABLE;
+ if (xmlGetBooleanProp(ctxt, node, "abstract", 0))
+ ret->flags |= XML_SCHEMAS_ELEM_NILLABLE;
+ ctxt->container = name;
+
+ ret->id = xmlGetProp(node, BAD_CAST "id");
+ ret->namedType = xmlGetQNameProp(ctxt, node, "type", &(ret->namedTypeNs));
+ ret->substGroup = xmlGetQNameProp(ctxt, node, "substitutionGroup",
+ &(ret->substGroupNs));
+ fixed = xmlGetProp(node, BAD_CAST "fixed");
+ ret->minOccurs = xmlGetMinOccurs(ctxt, node);
+ ret->maxOccurs = xmlGetMaxOccurs(ctxt, node);
+
+ ret->value = xmlGetProp(node, BAD_CAST "default");
+ if ((ret->value != NULL) && (fixed != NULL)) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ ctxt->error(ctxt->userData,
+ "Element %s has both default and fixed\n",
+ ret->name);
+ xmlFree(fixed);
+ } else if (fixed != NULL) {
+ ret->flags |= XML_SCHEMAS_ELEM_FIXED;
+ ret->value = fixed;
+ }
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ ret->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (IS_SCHEMA(child, "complexType")) {
+ ret->subtypes = xmlSchemaParseComplexType(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "simpleType")) {
+ ret->subtypes = xmlSchemaParseSimpleType(ctxt, schema, child);
+ child = child->next;
+ }
+ while ((IS_SCHEMA(child, "unique")) ||
+ (IS_SCHEMA(child, "key")) ||
+ (IS_SCHEMA(child, "keyref"))) {
+ TODO
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "element %s has unexpected content\n",
+ name);
+ }
+
+ ctxt->container = oldcontainer;
+ xmlFree(name);
+ return (ret);
+}
+
+/**
+ * xmlSchemaParseUnion:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Union definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseUnion(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype, last = NULL;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ snprintf((char *)name, 30, "union %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_LIST;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->ref = xmlGetProp(node, BAD_CAST "memberTypes");
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ while (IS_SCHEMA(child, "simpleType")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseSimpleType(ctxt, schema, child);
+ if (subtype != NULL) {
+ if (last == NULL) {
+ type->subtypes = subtype;
+ last = subtype;
+ } else {
+ last->next = subtype;
+ last = subtype;
+ }
+ last->next = NULL;
+ }
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Union %s has unexpected content\n",
+ type->name);
+ }
+ return (type);
+}
+
+/**
+ * xmlSchemaParseList:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema List definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseList(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+ snprintf((char *)name, 30, "list %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_LIST;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->ref = xmlGetQNameProp(ctxt, node, "ref", &(type->refNs));
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+ if (IS_SCHEMA(child, "simpleType")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseSimpleType(ctxt, schema, child);
+ child = child->next;
+ type->subtypes = subtype;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "List %s has unexpected content\n",
+ type->name);
+ }
+ return (type);
+}
+/**
+ * xmlSchemaParseSimpleType:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Simple Type definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseSimpleType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar *name;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ char buf[100];
+
+ snprintf(buf, 99, "simpletype%d", ctxt->counter++ + 1);
+ name = xmlStrdup((xmlChar *) buf);
+ }
+ if (name == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "simpleType has no name\n");
+ return (NULL);
+ }
+ type = xmlSchemaAddType(ctxt, schema, name);
+ xmlFree(name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_SIMPLE;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+ if (IS_SCHEMA(child, "restriction")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseRestriction(ctxt, schema, child, 1);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "list")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseList(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "union")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseUnion(ctxt, schema, child);
+ child = child->next;
+ }
+ type->subtypes = subtype;
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "SimpleType %s has unexpected content\n",
+ type->name);
+ }
+
+ return (type);
+}
+
+
+/**
+ * xmlSchemaParseGroup:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Group definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseGroup(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar *name, *ref = NULL, *refNs = NULL;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ char buf[100];
+
+ ref = xmlGetQNameProp(ctxt, node, "ref", &refNs);
+ if (ref == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "Group has no name nor ref\n");
+ return (NULL);
+ }
+ snprintf(buf, 99, "anongroup%d", ctxt->counter++ + 1);
+ name = xmlStrdup((xmlChar *) buf);
+ }
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_GROUP;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->ref = ref;
+ type->refNs = refNs;
+ type->minOccurs = xmlGetMinOccurs(ctxt, node);
+ type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+ if (IS_SCHEMA(child, "all")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseAll(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "choice")) {
+ subtype = xmlSchemaParseChoice(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "sequence")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseSequence(ctxt, schema, child);
+ child = child->next;
+ }
+ if (subtype != NULL)
+ type->subtypes = subtype;
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Group %s has unexpected content\n",
+ type->name);
+ }
+
+ return (type);
+}
+
+/**
+ * xmlSchemaParseAll:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema All definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseAll(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype, last = NULL;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ snprintf((char *)name, 30, "all%d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_SEQUENCE;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->minOccurs = xmlGetMinOccurs(ctxt, node);
+ type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ while (IS_SCHEMA(child, "element")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseElement(ctxt, schema, child, 0);
+ if (subtype != NULL) {
+ if (last == NULL) {
+ type->subtypes = subtype;
+ last = subtype;
+ } else {
+ last->next = subtype;
+ last = subtype;
+ }
+ last->next = NULL;
+ }
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "All %s has unexpected content\n",
+ type->name);
+ }
+
+ return (type);
+}
+
+/**
+ * xmlSchemaParseChoice:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Choice definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseChoice(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype, last = NULL;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ snprintf((char *)name, 30, "choice %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_CHOICE;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->minOccurs = xmlGetMinOccurs(ctxt, node);
+ type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ while ((IS_SCHEMA(child, "element")) ||
+ (IS_SCHEMA(child, "group")) ||
+ (IS_SCHEMA(child, "any")) ||
+ (IS_SCHEMA(child, "choice")) ||
+ (IS_SCHEMA(child, "sequence"))) {
+ subtype = NULL;
+ if (IS_SCHEMA(child, "element")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseElement(ctxt, schema, child, 0);
+ } else if (IS_SCHEMA(child, "group")) {
+ subtype = xmlSchemaParseGroup(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "any")) {
+ subtype = xmlSchemaParseAny(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "sequence")) {
+ subtype = xmlSchemaParseSequence(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "choice")) {
+ subtype = xmlSchemaParseChoice(ctxt, schema, child);
+ }
+ if (subtype != NULL) {
+ if (last == NULL) {
+ type->subtypes = subtype;
+ last = subtype;
+ } else {
+ last->next = subtype;
+ last = subtype;
+ }
+ last->next = NULL;
+ }
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Choice %s has unexpected content\n",
+ type->name);
+ }
+
+ return (type);
+}
+
+/**
+ * xmlSchemaParseSequence:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Sequence definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns -1 in case of error, 0 if the declaration is inproper and
+ * 1 in case of success.
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseSequence(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype, last = NULL;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ snprintf((char *)name, 30, "sequence %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_SEQUENCE;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->minOccurs = xmlGetMinOccurs(ctxt, node);
+ type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ while ((IS_SCHEMA(child, "element")) ||
+ (IS_SCHEMA(child, "group")) ||
+ (IS_SCHEMA(child, "any")) ||
+ (IS_SCHEMA(child, "choice")) ||
+ (IS_SCHEMA(child, "sequence"))) {
+ subtype = NULL;
+ if (IS_SCHEMA(child, "element")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseElement(ctxt, schema, child, 0);
+ } else if (IS_SCHEMA(child, "group")) {
+ subtype = xmlSchemaParseGroup(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "any")) {
+ subtype = xmlSchemaParseAny(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "choice")) {
+ subtype = xmlSchemaParseChoice(ctxt, schema, child);
+ } else if (IS_SCHEMA(child, "sequence")) {
+ subtype = xmlSchemaParseSequence(ctxt, schema, child);
+ }
+ if (subtype != NULL) {
+ if (last == NULL) {
+ type->subtypes = subtype;
+ last = subtype;
+ } else {
+ last->next = subtype;
+ last = subtype;
+ }
+ last->next = NULL;
+ }
+ child = child->next;
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Sequence %s has unexpected content\n",
+ type->name);
+ }
+
+ return (type);
+}
+
+/**
+ * xmlSchemaParseRestriction:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ * @simple: is that part of a simple type.
+ *
+ * parse a XML schema Restriction definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the type definition or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseRestriction(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node, int simple)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlSchemaFacetPtr facet, lastfacet = NULL;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+ xmlChar *oldcontainer;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+ oldcontainer = ctxt->container;
+
+ snprintf((char *)name, 30, "restriction %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_RESTRICTION;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ type->base = xmlGetQNameProp(ctxt, node, "base", &(type->baseNs));
+ if ((!simple) && (type->base == NULL)) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Restriction %s has no base\n",
+ type->name);
+ }
+ ctxt->container = name;
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+
+ if (IS_SCHEMA(child, "all")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseAll(ctxt, schema, child);
+ child = child->next;
+ type->subtypes = subtype;
+ } else if (IS_SCHEMA(child, "choice")) {
+ subtype = xmlSchemaParseChoice(ctxt, schema, child);
+ child = child->next;
+ type->subtypes = subtype;
+ } else if (IS_SCHEMA(child, "sequence")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseSequence(ctxt, schema, child);
+ child = child->next;
+ type->subtypes = subtype;
+ } else if (IS_SCHEMA(child, "group")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseGroup(ctxt, schema, child);
+ child = child->next;
+ type->subtypes = subtype;
+ } else {
+ if (IS_SCHEMA(child, "simpleType")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseSimpleType(ctxt, schema, child);
+ child = child->next;
+ type->baseType = subtype;
+ }
+ /*
+ * Facets
+ */
+ while ((IS_SCHEMA(child, "minInclusive")) ||
+ (IS_SCHEMA(child, "minExclusive")) ||
+ (IS_SCHEMA(child, "maxInclusive")) ||
+ (IS_SCHEMA(child, "maxExclusive")) ||
+ (IS_SCHEMA(child, "totalDigits")) ||
+ (IS_SCHEMA(child, "fractionDigits")) ||
+ (IS_SCHEMA(child, "pattern")) ||
+ (IS_SCHEMA(child, "enumeration")) ||
+ (IS_SCHEMA(child, "whiteSpace")) ||
+ (IS_SCHEMA(child, "length")) ||
+ (IS_SCHEMA(child, "maxLength")) ||
+ (IS_SCHEMA(child, "minLength"))) {
+ facet = xmlSchemaParseFacet(ctxt, schema, child);
+ if (facet != NULL) {
+ if (lastfacet == NULL) {
+ type->facets = facet;
+ lastfacet = facet;
+ } else {
+ lastfacet->next = facet;
+ lastfacet = facet;
+ }
+ lastfacet->next = NULL;
+ }
+ child = child->next;
+ }
+ }
+ child = xmlSchemaParseAttrDecls(ctxt, schema, child, type);
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Restriction %s has unexpected content\n",
+ type->name);
+ }
+ ctxt->container = oldcontainer;
+ return (type);
+}
+
+/**
+ * xmlSchemaParseExtension:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Extension definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the type definition or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseExtension(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+ xmlChar *oldcontainer;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+ oldcontainer = ctxt->container;
+
+ snprintf((char *)name, 30, "extension %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_EXTENSION;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ ctxt->container = name;
+
+ type->base = xmlGetQNameProp(ctxt, node, "base", &(type->baseNs));
+ if (type->base == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Extension %s has no base\n",
+ type->name);
+ }
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+
+ if (IS_SCHEMA(child, "all")) {
+ subtype = xmlSchemaParseAll(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "choice")) {
+ subtype = xmlSchemaParseChoice(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "sequence")) {
+ subtype = xmlSchemaParseSequence(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "group")) {
+ subtype = xmlSchemaParseGroup(ctxt, schema, child);
+ child = child->next;
+ }
+ if (subtype != NULL)
+ type->subtypes = subtype;
+ child = xmlSchemaParseAttrDecls(ctxt, schema, child, type);
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Extension %s has unexpected content\n",
+ type->name);
+ }
+ ctxt->container = oldcontainer;
+ return (type);
+}
+
+/**
+ * xmlSchemaParseSimpleContent:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema SimpleContent definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the type definition or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseSimpleContent(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ snprintf((char *)name, 30, "complexContent %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_SIMPLE_CONTENT;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+ if (IS_SCHEMA(child, "restriction")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseRestriction(ctxt, schema, child, 0);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "extension")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseExtension(ctxt, schema, child);
+ child = child->next;
+ }
+ type->subtypes = subtype;
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "SimpleContent %s has unexpected content\n",
+ type->name);
+ }
+ return (type);
+}
+
+/**
+ * xmlSchemaParseComplexContent:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema ComplexContent definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the type definition or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseComplexContent(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar name[30];
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+
+ snprintf((char *)name, 30, "complexContent %d", ctxt->counter++ + 1);
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL)
+ return (NULL);
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_COMPLEX_CONTENT;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ subtype = NULL;
+ if (IS_SCHEMA(child, "restriction")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseRestriction(ctxt, schema, child, 0);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "extension")) {
+ subtype = (xmlSchemaTypePtr)
+ xmlSchemaParseExtension(ctxt, schema, child);
+ child = child->next;
+ }
+ type->subtypes = subtype;
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "ComplexContent %s has unexpected content\n",
+ type->name);
+ }
+ return (type);
+}
+
+/**
+ * xmlSchemaParseComplexType:
+ * @ctxt: a schema validation context
+ * @schema: the schema being built
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema Complex Type definition
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the type definition or NULL in case of error
+ */
+static xmlSchemaTypePtr
+xmlSchemaParseComplexType(xmlSchemaParserCtxtPtr ctxt, xmlSchemaPtr schema,
+ xmlNodePtr node)
+{
+ xmlSchemaTypePtr type, subtype;
+ xmlNodePtr child = NULL;
+ xmlChar *name;
+ xmlChar *oldcontainer;
+
+ if ((ctxt == NULL) || (schema == NULL) || (node == NULL))
+ return (NULL);
+
+ oldcontainer = ctxt->container;
+ name = xmlGetProp(node, (const xmlChar *) "name");
+ if (name == NULL) {
+ char buf[100];
+
+ snprintf(buf, 99, "anontype%d", ctxt->counter++ + 1);
+ name = xmlStrdup((xmlChar *) buf);
+ }
+ if (name == NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData, "complexType has no name\n");
+ return (NULL);
+ }
+ type = xmlSchemaAddType(ctxt, schema, name);
+ if (type == NULL) {
+ xmlFree(name);
+ return (NULL);
+ }
+ type->node = node;
+ type->type = XML_SCHEMA_TYPE_COMPLEX;
+ type->id = xmlGetProp(node, BAD_CAST "id");
+ ctxt->container = name;
+
+ child = node->children;
+ if (IS_SCHEMA(child, "annotation")) {
+ type->annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ child = child->next;
+ }
+ if (IS_SCHEMA(child, "simpleContent")) {
+ subtype = xmlSchemaParseSimpleContent(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "complexContent")) {
+ type->subtypes = xmlSchemaParseComplexContent(ctxt, schema, child);
+ child = child->next;
+ } else {
+ subtype = NULL;
+
+ if (IS_SCHEMA(child, "all")) {
+ subtype = xmlSchemaParseAll(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "choice")) {
+ subtype = xmlSchemaParseChoice(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "sequence")) {
+ subtype = xmlSchemaParseSequence(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "group")) {
+ subtype = xmlSchemaParseGroup(ctxt, schema, child);
+ child = child->next;
+ }
+ if (subtype != NULL)
+ type->subtypes = subtype;
+ child = xmlSchemaParseAttrDecls(ctxt, schema, child, type);
+ }
+ if (child != NULL) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "ComplexType %s has unexpected content\n",
+ type->name);
+ }
+ ctxt->container = oldcontainer;
+ xmlFree(name);
+ return (type);
+}
+
+
+/**
+ * xmlSchemaParseSchema:
+ * @ctxt: a schema validation context
+ * @node: a subtree containing XML Schema informations
+ *
+ * parse a XML schema definition from a node set
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the internal XML Schema structure built from the resource or
+ * NULL in case of error
+ */
+static xmlSchemaPtr
+xmlSchemaParseSchema(xmlSchemaParserCtxtPtr ctxt, xmlNodePtr node)
+{
+ xmlSchemaPtr schema = NULL;
+ xmlSchemaAnnotPtr annot;
+ xmlNodePtr child = NULL;
+ xmlChar *val;
+
+ if ((ctxt == NULL) || (node == NULL))
+ return (NULL);
+
+ if (IS_SCHEMA(node, "schema")) {
+ schema = xmlSchemaNewSchema(ctxt);
+ if (schema == NULL)
+ return(NULL);
+ schema->targetNamespace = xmlGetProp(node, BAD_CAST "targetNamespace");
+ schema->id = xmlGetProp(node, BAD_CAST "id");
+ schema->version = xmlGetProp(node, BAD_CAST "version");
+ val = xmlGetProp(node, BAD_CAST "elementFormDefault");
+ if (val != NULL) {
+ if (xmlStrEqual(val, BAD_CAST "qualified"))
+ schema->flags |= XML_SCHEMAS_QUALIF_ELEM;
+ else if (!xmlStrEqual(val, BAD_CAST "unqualified")) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL)) {
+ ctxt->error(ctxt->userData,
+ "Invalid value %s for elementFormDefault\n",
+ val);
+ }
+ }
+ xmlFree(val);
+ }
+ val = xmlGetProp(node, BAD_CAST "attributeFormDefault");
+ if (val != NULL) {
+ if (xmlStrEqual(val, BAD_CAST "qualified"))
+ schema->flags |= XML_SCHEMAS_QUALIF_ATTR;
+ else if (!xmlStrEqual(val, BAD_CAST "unqualified")) {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL)) {
+ ctxt->error(ctxt->userData,
+ "Invalid value %s for elementFormDefault\n",
+ val);
+ }
+ }
+ xmlFree(val);
+ }
+
+ child = node->children;
+ while ((IS_SCHEMA(child, "include")) ||
+ (IS_SCHEMA(child, "import")) ||
+ (IS_SCHEMA(child, "redefine")) ||
+ (IS_SCHEMA(child, "annotation"))) {
+ if (IS_SCHEMA(child, "annotation")) {
+ annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ if (schema->annot == NULL)
+ schema->annot = annot;
+ else
+ xmlSchemaFreeAnnot(annot);
+ } else if (IS_SCHEMA(child, "include")) {
+ TODO
+ } else if (IS_SCHEMA(child, "import")) {
+ TODO
+ } else if (IS_SCHEMA(child, "redefine")) {
+ TODO
+ }
+ child = child->next;
+ }
+ while (child != NULL) {
+ if (IS_SCHEMA(child, "complexType")) {
+ xmlSchemaParseComplexType(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "simpleType")) {
+ xmlSchemaParseSimpleType(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "element")) {
+ xmlSchemaParseElement(ctxt, schema, child, 1);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "attribute")) {
+ xmlSchemaParseAttribute(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "attributeGroup")) {
+ xmlSchemaParseAttributeGroup(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "group")) {
+ xmlSchemaParseGroup(ctxt, schema, child);
+ child = child->next;
+ } else if (IS_SCHEMA(child, "notation")) {
+ xmlSchemaParseNotation(ctxt, schema, child);
+ child = child->next;
+ } else {
+ xmlSchemaErrorContext(ctxt, schema, node, child);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: unexpected element %s here \n",
+ child->name);
+ child = child->next;
+ }
+ while (IS_SCHEMA(child, "annotation")) {
+ annot = xmlSchemaParseAnnotation(ctxt, schema, child);
+ if (schema->annot == NULL)
+ schema->annot = annot;
+ else
+ xmlSchemaFreeAnnot(annot);
+ child = child->next;
+ }
+ }
+ }
+#ifdef DEBUG
+ if (schema == NULL)
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlSchemaParse() failed\n");
+#endif
+
+ return (schema);
+}
+
+/************************************************************************
+ * *
+ * Validating using Schemas *
+ * *
+ ************************************************************************/
+
+/************************************************************************
+ * *
+ * Reading/Writing Schemas *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaNewParserCtxt:
+ * @URL: the location of the schema
+ *
+ * Create an XML Schemas parse context for that file/resource expected
+ * to contain an XML Schemas file.
+ *
+ * Returns the parser context or NULL in case of error
+ */
+xmlSchemaParserCtxtPtr
+xmlSchemaNewParserCtxt(const char *URL) {
+ xmlSchemaParserCtxtPtr ret;
+
+ if (URL == NULL)
+ return(NULL);
+
+ ret = (xmlSchemaParserCtxtPtr) xmlMalloc(sizeof(xmlSchemaParserCtxt));
+ if (ret == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Failed to allocate new schama parser context for %s\n", URL);
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaParserCtxt));
+ ret->URL = xmlStrdup((const xmlChar *)URL);
+ return (ret);
+}
+
+/**
+ * xmlSchemaFreeParserCtxt:
+ * @ctxt: the schema parser context
+ *
+ * Free the resources associated to the schema parser context
+ */
+void
+xmlSchemaFreeParserCtxt(xmlSchemaParserCtxtPtr ctxt) {
+ if (ctxt == NULL)
+ return;
+ if (ctxt->URL != NULL)
+ xmlFree(ctxt->URL);
+ xmlFree(ctxt);
+}
+
+/************************************************************************
+ * *
+ * Building the content models *
+ * *
+ ************************************************************************/
+/**
+ * xmlSchemaBuildAContentModel:
+ * @type: the schema type definition
+ * @ctxt: the schema parser context
+ * @name: the element name whose content is being built
+ *
+ * Generate the automata sequence needed for that type
+ */
+static void
+xmlSchemaBuildAContentModel(xmlSchemaTypePtr type,
+ xmlSchemaParserCtxtPtr ctxt,
+ const xmlChar *name) {
+ if (type == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Found unexpected type = NULL in %s content model\n",
+ name);
+ return;
+ }
+ switch (type->type) {
+ case XML_SCHEMA_TYPE_ANY:
+ /* TODO : handle the namespace too */
+ /* TODO : make that a specific transition type */
+ TODO
+ ctxt->state = xmlAutomataNewTransition(ctxt->am, ctxt->state,
+ NULL, BAD_CAST "*", NULL);
+ break;
+ case XML_SCHEMA_TYPE_ELEMENT: {
+ xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type;
+ /* TODO : handle the namespace too */
+ xmlAutomataStatePtr oldstate = ctxt->state;
+ if (elem->maxOccurs >= UNBOUNDED) {
+ if (elem->refDecl != NULL) {
+ xmlSchemaBuildAContentModel(
+ (xmlSchemaTypePtr) elem->refDecl,
+ ctxt, elem->refDecl->name);
+ } else {
+ ctxt->state = xmlAutomataNewTransition(ctxt->am,
+ ctxt->state, NULL, elem->name, type);
+ }
+ xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate);
+ if (elem->minOccurs == 0) {
+ /* basically an elem* */
+ xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state);
+ }
+ } else if (elem->maxOccurs > 1) {
+ if (elem->refDecl != NULL) {
+ TODO
+ xmlSchemaBuildAContentModel(
+ (xmlSchemaTypePtr) elem->refDecl,
+ ctxt, elem->refDecl->name);
+ } else {
+ ctxt->state = xmlAutomataNewCountTrans(ctxt->am,
+ ctxt->state, NULL, elem->name,
+ elem->minOccurs, elem->maxOccurs, type);
+ }
+ } else {
+ if (elem->refDecl != NULL) {
+ xmlSchemaBuildAContentModel(
+ (xmlSchemaTypePtr) elem->refDecl,
+ ctxt, elem->refDecl->name);
+ } else {
+ ctxt->state = xmlAutomataNewTransition(ctxt->am,
+ ctxt->state, NULL, elem->name, type);
+ }
+ if (elem->minOccurs == 0) {
+ /* basically an elem? */
+ xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state);
+ }
+ }
+ break;
+ }
+ case XML_SCHEMA_TYPE_SEQUENCE: {
+ xmlSchemaTypePtr subtypes;
+
+ /*
+ * Simply iterate over the subtypes
+ */
+ subtypes = type->subtypes;
+ while (subtypes != NULL) {
+ xmlSchemaBuildAContentModel(subtypes, ctxt, name);
+ subtypes = subtypes->next;
+ }
+ break;
+ }
+ case XML_SCHEMA_TYPE_CHOICE: {
+ xmlSchemaTypePtr subtypes;
+ xmlAutomataStatePtr start, end;
+
+ start = ctxt->state;
+ end = xmlAutomataNewState(ctxt->am);
+
+ /*
+ * iterate over the subtypes and remerge the end with an
+ * epsilon transition
+ */
+ subtypes = type->subtypes;
+ while (subtypes != NULL) {
+ ctxt->state = start;
+ xmlSchemaBuildAContentModel(subtypes, ctxt, name);
+ xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end);
+ subtypes = subtypes->next;
+ }
+ ctxt->state = end;
+ break;
+ }
+ case XML_SCHEMA_TYPE_ALL: {
+ TODO
+ break;
+ }
+ case XML_SCHEMA_TYPE_RESTRICTION:
+ case XML_SCHEMA_TYPE_EXTENSION:
+ case XML_SCHEMA_TYPE_GROUP:
+ case XML_SCHEMA_TYPE_COMPLEX:
+ case XML_SCHEMA_TYPE_COMPLEX_CONTENT:
+ if (type->subtypes != NULL)
+ xmlSchemaBuildAContentModel(type->subtypes, ctxt, name);
+ break;
+ default:
+ xmlGenericError(xmlGenericErrorContext,
+ "Found unexpected type %d in %s content model\n",
+ type->type, name);
+ return;
+ }
+}
+/**
+ * xmlSchemaBuildContentModel:
+ * @typeDecl: the schema type definition
+ * @ctxt: the schema parser context
+ *
+ * Fixes the content model of the element.
+ */
+static void
+xmlSchemaBuildContentModel(xmlSchemaElementPtr elem,
+ xmlSchemaParserCtxtPtr ctxt,
+ const xmlChar *name) {
+ xmlAutomataStatePtr start;
+
+#ifdef DEBUG_CONTENT
+ xmlGenericError(xmlGenericErrorContext,
+ "Building content model for %s\n", name);
+#endif
+
+ if (elem->contModel != NULL)
+ return;
+ if (elem->subtypes == NULL)
+ return;
+ if (elem->subtypes->type != XML_SCHEMA_TYPE_COMPLEX)
+ return;
+ ctxt->am = xmlNewAutomata();
+ if (ctxt->am == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Cannot create automata for elem %s\n", name);
+ return;
+ }
+ start = ctxt->state = xmlAutomataGetInitState(ctxt->am);
+ xmlSchemaBuildAContentModel(elem->subtypes, ctxt, name);
+ xmlAutomataSetFinalState(ctxt->am, ctxt->state);
+ elem->contModel = xmlAutomataCompile(ctxt->am);
+#ifdef DEBUG_CONTENT
+ printf("Content model of %s:\n", name);
+ xmlRegexpPrint(stdout, elem->contModel);
+#endif
+ ctxt->state = NULL;
+ xmlFreeAutomata(ctxt->am);
+ ctxt->am = NULL;
+}
+
+/**
+ * xmlSchemaRefFixupCallback:
+ * @elem: the schema element context
+ * @ctxt: the schema parser context
+ *
+ * Free the resources associated to the schema parser context
+ */
+static void
+xmlSchemaRefFixupCallback(xmlSchemaElementPtr elem,
+ xmlSchemaParserCtxtPtr ctxt,
+ const xmlChar *name,
+ ATTRIBUTE_UNUSED const xmlChar *context,
+ ATTRIBUTE_UNUSED const xmlChar *namespace)
+{
+ if ((ctxt == NULL) || (elem == NULL))
+ return;
+ if (elem->ref != NULL) {
+ xmlSchemaElementPtr elemDecl;
+
+ if (elem->subtypes != NULL) {
+ xmlSchemaErrorContext(ctxt, NULL, elem->node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: element %s have both ref and subtype\n",
+ name);
+ return;
+ }
+ elemDecl = xmlHashLookup2(ctxt->schema->elemDecl,
+ elem->ref, elem->refNs);
+
+ if (elemDecl == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: element %s ref to %s not found\n",
+ name, elem->ref);
+ return;
+ }
+ elem->refDecl = elemDecl;
+ } else if (elem->namedType != NULL) {
+ xmlSchemaTypePtr typeDecl;
+
+ if (elem->subtypes != NULL) {
+ xmlSchemaErrorContext(ctxt, NULL, elem->node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: element %s have both type and subtype\n",
+ name);
+ return;
+ }
+ typeDecl = xmlSchemaGetType(ctxt->schema, elem->namedType,
+ elem->namedTypeNs);
+
+ if (typeDecl == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: element %s type %s not found\n",
+ name, elem->namedType);
+ return;
+ }
+ elem->subtypes = typeDecl;
+ }
+}
+
+/**
+ * xmlSchemaTypeFixup:
+ * @typeDecl: the schema type definition
+ * @ctxt: the schema parser context
+ *
+ * Fixes the content model of the type.
+ */
+static void
+xmlSchemaTypeFixup(xmlSchemaTypePtr typeDecl,
+ xmlSchemaParserCtxtPtr ctxt,
+ const xmlChar *name)
+{
+ if (name == NULL)
+ name = typeDecl->name;
+ if (typeDecl->contentType == XML_SCHEMA_CONTENT_UNKNOWN) {
+ switch (typeDecl->type) {
+ case XML_SCHEMA_TYPE_SIMPLE_CONTENT: {
+ xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL);
+ typeDecl->contentType = typeDecl->subtypes->contentType;
+ break;
+ }
+ case XML_SCHEMA_TYPE_RESTRICTION: {
+ if (typeDecl->subtypes != NULL)
+ xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL);
+
+ if (typeDecl->base != NULL) {
+ xmlSchemaTypePtr baseType;
+
+ baseType = xmlSchemaGetType(ctxt->schema, typeDecl->base,
+ typeDecl->baseNs);
+ if (baseType == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s base type %s not found\n",
+ name, typeDecl->base);
+ }
+ typeDecl->baseType = baseType;
+ }
+ if (typeDecl->subtypes == NULL)
+ /* 1.1.1 */
+ typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY;
+ else if ((typeDecl->subtypes->subtypes == NULL) &&
+ ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_ALL) ||
+ (typeDecl->subtypes->type == XML_SCHEMA_TYPE_SEQUENCE)))
+ /* 1.1.2 */
+ typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY;
+ else if ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_CHOICE) &&
+ (typeDecl->subtypes->subtypes == NULL))
+ /* 1.1.3 */
+ typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY;
+ else {
+ /* 1.2 and 2.X are applied at the other layer */
+ typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS;
+ }
+ break;
+ }
+ case XML_SCHEMA_TYPE_EXTENSION: {
+ xmlSchemaContentType explicitContentType;
+ xmlSchemaTypePtr base;
+
+ if (typeDecl->base != NULL) {
+ xmlSchemaTypePtr baseType;
+
+ baseType = xmlSchemaGetType(ctxt->schema, typeDecl->base,
+ typeDecl->baseNs);
+ if (baseType == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s base type %s not found\n",
+ name, typeDecl->base);
+ }
+ typeDecl->baseType = baseType;
+ }
+ if (typeDecl->subtypes != NULL)
+ xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL);
+
+ if (typeDecl->subtypes == NULL)
+ /* 1.1.1 */
+ explicitContentType = XML_SCHEMA_CONTENT_EMPTY;
+ else if ((typeDecl->subtypes->subtypes == NULL) &&
+ ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_ALL) ||
+ (typeDecl->subtypes->type == XML_SCHEMA_TYPE_SEQUENCE)))
+ /* 1.1.2 */
+ explicitContentType = XML_SCHEMA_CONTENT_EMPTY;
+ else if ((typeDecl->subtypes->type == XML_SCHEMA_TYPE_CHOICE) &&
+ (typeDecl->subtypes->subtypes == NULL))
+ /* 1.1.3 */
+ explicitContentType = XML_SCHEMA_CONTENT_EMPTY;
+
+ base = xmlSchemaGetType(ctxt->schema, typeDecl->base,
+ typeDecl->baseNs);
+ if (base == NULL) {
+ xmlSchemaErrorContext(ctxt, NULL, typeDecl->node, NULL);
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: base type %s of type %s not found\n",
+ typeDecl->base, name);
+ return;
+ }
+ xmlSchemaTypeFixup(base, ctxt, NULL);
+ if (explicitContentType == XML_SCHEMA_CONTENT_EMPTY) {
+ /* 2.1 */
+ typeDecl->contentType = base->contentType;
+ } else if (base->contentType == XML_SCHEMA_CONTENT_EMPTY) {
+ /* 2.2 imbitable ! */
+ typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS;
+ } else {
+ /* 2.3 imbitable pareil ! */
+ typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS;
+ }
+ break;
+ }
+ case XML_SCHEMA_TYPE_COMPLEX: {
+ if (typeDecl->subtypes == NULL) {
+ typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY;
+ } else {
+ if (typeDecl->flags & XML_SCHEMAS_TYPE_MIXED)
+ typeDecl->contentType = XML_SCHEMA_CONTENT_MIXED;
+ else {
+ xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL);
+ typeDecl->contentType = typeDecl->subtypes->contentType;
+ }
+ }
+ break;
+ }
+ case XML_SCHEMA_TYPE_COMPLEX_CONTENT: {
+ if (typeDecl->subtypes == NULL) {
+ typeDecl->contentType = XML_SCHEMA_CONTENT_EMPTY;
+ } else {
+ if (typeDecl->flags & XML_SCHEMAS_TYPE_MIXED)
+ typeDecl->contentType = XML_SCHEMA_CONTENT_MIXED;
+ else {
+ xmlSchemaTypeFixup(typeDecl->subtypes, ctxt, NULL);
+ typeDecl->contentType = typeDecl->subtypes->contentType;
+ }
+ }
+ break;
+ }
+ case XML_SCHEMA_TYPE_SEQUENCE:
+ case XML_SCHEMA_TYPE_GROUP:
+ case XML_SCHEMA_TYPE_ALL:
+ case XML_SCHEMA_TYPE_CHOICE:
+ typeDecl->contentType = XML_SCHEMA_CONTENT_ELEMENTS;
+ break;
+ case XML_SCHEMA_TYPE_BASIC:
+ case XML_SCHEMA_TYPE_ANY:
+ case XML_SCHEMA_TYPE_FACET:
+ case XML_SCHEMA_TYPE_SIMPLE:
+ case XML_SCHEMA_TYPE_UR:
+ case XML_SCHEMA_TYPE_ELEMENT:
+ case XML_SCHEMA_TYPE_ATTRIBUTE:
+ case XML_SCHEMA_TYPE_NOTATION:
+ case XML_SCHEMA_TYPE_LIST:
+ case XML_SCHEMA_TYPE_UNION:
+ case XML_SCHEMA_FACET_MININCLUSIVE:
+ case XML_SCHEMA_FACET_MINEXCLUSIVE:
+ case XML_SCHEMA_FACET_MAXINCLUSIVE:
+ case XML_SCHEMA_FACET_MAXEXCLUSIVE:
+ case XML_SCHEMA_FACET_TOTALDIGITS:
+ case XML_SCHEMA_FACET_FRACTIONDIGITS:
+ case XML_SCHEMA_FACET_PATTERN:
+ case XML_SCHEMA_FACET_ENUMERATION:
+ case XML_SCHEMA_FACET_WHITESPACE:
+ case XML_SCHEMA_FACET_LENGTH:
+ case XML_SCHEMA_FACET_MAXLENGTH:
+ case XML_SCHEMA_FACET_MINLENGTH:
+ typeDecl->contentType = XML_SCHEMA_CONTENT_SIMPLE;
+ break;
+ }
+ }
+}
+
+/**
+ * xmlSchemaCheckDefaults:
+ * @typeDecl: the schema type definition
+ * @ctxt: the schema parser context
+ *
+ * Checks the default values types, especially for facets
+ */
+static void
+xmlSchemaCheckDefaults(xmlSchemaTypePtr typeDecl,
+ xmlSchemaParserCtxtPtr ctxt,
+ const xmlChar *name)
+{
+ static xmlSchemaTypePtr nonNegativeIntegerType = NULL;
+ if (name == NULL)
+ name = typeDecl->name;
+ if (nonNegativeIntegerType == NULL) {
+ nonNegativeIntegerType = xmlSchemaGetPredefinedType(
+ BAD_CAST "nonNegativeInteger", xmlSchemaNs);
+ }
+ if (typeDecl->type == XML_SCHEMA_TYPE_RESTRICTION) {
+ if (typeDecl->facets != NULL) {
+ xmlSchemaFacetPtr facet = typeDecl->facets;
+ while (facet != NULL) {
+ switch (facet->type) {
+ case XML_SCHEMA_FACET_MININCLUSIVE:
+ case XML_SCHEMA_FACET_MINEXCLUSIVE:
+ case XML_SCHEMA_FACET_MAXINCLUSIVE:
+ case XML_SCHEMA_FACET_MAXEXCLUSIVE: {
+ /*
+ * Okay we need to validate the value
+ * at that point.
+ */
+ xmlSchemaValidCtxtPtr vctxt;
+
+ vctxt = xmlSchemaNewValidCtxt(NULL);
+ if (vctxt == NULL)
+ break;
+ xmlSchemaValidateSimpleValue(vctxt, typeDecl,
+ facet->value);
+ facet->val = vctxt->value;
+ vctxt->value = NULL;
+ if (facet->val == NULL) {
+ /* error code */
+ xmlSchemaErrorContext(ctxt, NULL,
+ facet->node, NULL);
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s facet value %s invalid\n",
+ name, facet->value);
+ }
+ xmlSchemaFreeValidCtxt(vctxt);
+ break;
+ }
+ case XML_SCHEMA_FACET_ENUMERATION: {
+ /*
+ * Okay we need to validate the value
+ * at that point.
+ */
+ xmlSchemaValidCtxtPtr vctxt;
+ int ret;
+
+ vctxt = xmlSchemaNewValidCtxt(NULL);
+ if (vctxt == NULL)
+ break;
+ ret = xmlSchemaValidateSimpleValue(vctxt, typeDecl,
+ facet->value);
+ if (ret != 0) {
+ xmlSchemaErrorContext(ctxt, NULL,
+ facet->node, NULL);
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s enumeration value %s invalid\n",
+ name, facet->value);
+ }
+ xmlSchemaFreeValidCtxt(vctxt);
+ break;
+ }
+ case XML_SCHEMA_FACET_PATTERN:
+ facet->regexp = xmlRegexpCompile(facet->value);
+ if (facet->regexp == NULL) {
+ /* error code */
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s facet regexp %s invalid\n",
+ name, facet->value);
+ }
+ break;
+ case XML_SCHEMA_FACET_TOTALDIGITS:
+ case XML_SCHEMA_FACET_FRACTIONDIGITS:
+ case XML_SCHEMA_FACET_LENGTH:
+ case XML_SCHEMA_FACET_MAXLENGTH:
+ case XML_SCHEMA_FACET_MINLENGTH: {
+ int ret;
+
+ ret = xmlSchemaValidatePredefinedType(
+ nonNegativeIntegerType, facet->value,
+ &facet->val);
+ if (ret != 0) {
+ /* error code */
+ xmlSchemaErrorContext(ctxt, NULL,
+ facet->node, NULL);
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s facet value %s invalid\n",
+ name, facet->value);
+ }
+ break;
+ }
+ case XML_SCHEMA_FACET_WHITESPACE: {
+ if (xmlStrEqual(facet->value, BAD_CAST"preserve")) {
+ facet->whitespace = XML_SCHEMAS_FACET_PRESERVE;
+ } else if (xmlStrEqual(facet->value,
+ BAD_CAST"replace")) {
+ facet->whitespace = XML_SCHEMAS_FACET_REPLACE;
+ } else if (xmlStrEqual(facet->value,
+ BAD_CAST"collapse")) {
+ facet->whitespace = XML_SCHEMAS_FACET_COLLAPSE;
+ } else {
+ xmlSchemaErrorContext(ctxt, NULL,
+ facet->node, NULL);
+ ctxt->error(ctxt->userData,
+ "Schemas: type %s whiteSpace value %s invalid\n",
+ name, facet->value);
+ }
+ }
+ default:
+ break;
+ }
+ facet = facet->next;
+ }
+ }
+ }
+}
+
+/**
+ * xmlSchemaAttrFixup:
+ * @attrDecl: the schema attribute definition
+ * @ctxt: the schema parser context
+ * @name: the attribute name
+ *
+ * Fixes finish doing the computations on the attributes definitions
+ */
+static void
+xmlSchemaAttrFixup(xmlSchemaAttributePtr attrDecl,
+ xmlSchemaParserCtxtPtr ctxt,
+ const xmlChar *name)
+{
+ if (name == NULL)
+ name = attrDecl->name;
+ if (attrDecl->subtypes != NULL)
+ return;
+ if (attrDecl->typeName != NULL) {
+ xmlSchemaTypePtr type;
+
+ type = xmlSchemaGetType(ctxt->schema, attrDecl->typeName,
+ attrDecl->typeNs);
+ if (type == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: attribute %s type %s not found\n",
+ name, attrDecl->typeName);
+ }
+ attrDecl->subtypes = type;
+ } else if (attrDecl->ref != NULL) {
+ xmlSchemaAttributePtr ref;
+
+ ref = xmlHashLookup2(ctxt->schema->attrDecl, attrDecl->ref,
+ attrDecl->refNs);
+ if (ref == NULL) {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: attribute %s reference %s not found\n",
+ name, attrDecl->ref);
+ return;
+ }
+ xmlSchemaAttrFixup(ref, ctxt, NULL);
+ attrDecl->subtypes = ref->subtypes;
+ } else {
+ if ((ctxt != NULL) && (ctxt->error != NULL))
+ ctxt->error(ctxt->userData,
+ "Schemas: attribute %s has no type nor reference\n",
+ name);
+ }
+}
+
+/**
+ * xmlSchemaParse:
+ * @ctxt: a schema validation context
+ * @URL: the location of the schema
+ *
+ * Load, XML parse a schema definition resource and build an internal
+ * XML Shema struture which can be used to validate instances.
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the internal XML Schema structure built from the resource or
+ * NULL in case of error
+ */
+xmlSchemaPtr
+xmlSchemaParse(xmlSchemaParserCtxtPtr ctxt)
+{
+ xmlSchemaPtr ret = NULL;
+ xmlDocPtr doc;
+ xmlNodePtr root, cur, delete;
+
+ xmlSchemaInitTypes();
+
+ if ((ctxt == NULL) || (ctxt->URL == NULL))
+ return (NULL);
+
+ ctxt->counter = 0;
+ ctxt->container = NULL;
+
+ /*
+ * First step is to parse the input document into an DOM/Infoset
+ */
+ doc = xmlParseFile((const char *) ctxt->URL);
+ if (doc == NULL) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "xmlSchemaParse: could not load %s\n", ctxt->URL);
+ return (NULL);
+ }
+
+ /*
+ * Then extract the root and Schema parse it
+ */
+ root = xmlDocGetRootElement(doc);
+ if (root == NULL) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "xmlSchemaParse: %s is empty\n",
+ ctxt->URL);
+ return (NULL);
+ }
+
+ /*
+ * Remove all the blank text nodes
+ */
+ delete = NULL;
+ cur = root;
+ while (cur != NULL) {
+ if (delete != NULL) {
+ xmlUnlinkNode(delete);
+ xmlFreeNode(delete);
+ delete = NULL;
+ }
+ if (cur->type == XML_TEXT_NODE) {
+ if (IS_BLANK_NODE(cur)) {
+ if (xmlNodeGetSpacePreserve(cur) != 1) {
+ delete = cur;
+ }
+ }
+ } else if ((cur->type != XML_ELEMENT_NODE) &&
+ (cur->type != XML_CDATA_SECTION_NODE)) {
+ delete = cur;
+ goto skip_children;
+ }
+
+ /*
+ * Skip to next node
+ */
+ if (cur->children != NULL) {
+ if ((cur->children->type != XML_ENTITY_DECL) &&
+ (cur->children->type != XML_ENTITY_REF_NODE) &&
+ (cur->children->type != XML_ENTITY_NODE)) {
+ cur = cur->children;
+ continue;
+ }
+ }
+skip_children:
+ if (cur->next != NULL) {
+ cur = cur->next;
+ continue;
+ }
+
+ do {
+ cur = cur->parent;
+ if (cur == NULL)
+ break;
+ if (cur == root) {
+ cur = NULL;
+ break;
+ }
+ if (cur->next != NULL) {
+ cur = cur->next;
+ break;
+ }
+ } while (cur != NULL);
+ }
+ if (delete != NULL) {
+ xmlUnlinkNode(delete);
+ xmlFreeNode(delete);
+ delete = NULL;
+ }
+
+ /*
+ * Then do the parsing for good
+ */
+ ret = xmlSchemaParseSchema(ctxt, root);
+ ret->doc = doc;
+
+ /*
+ * Then fix all the references.
+ */
+ ctxt->schema = ret;
+ xmlHashScanFull(ret->elemDecl,
+ (xmlHashScannerFull) xmlSchemaRefFixupCallback, ctxt);
+
+ /*
+ * Then fixup all types properties
+ */
+ xmlHashScan(ret->typeDecl, (xmlHashScanner) xmlSchemaTypeFixup, ctxt);
+
+ /*
+ * Then build the content model for all elements
+ */
+ xmlHashScan(ret->elemDecl,
+ (xmlHashScanner) xmlSchemaBuildContentModel, ctxt);
+
+ /*
+ * Then check the defaults part of the type like facets values
+ */
+ xmlHashScan(ret->typeDecl, (xmlHashScanner) xmlSchemaCheckDefaults, ctxt);
+
+ /*
+ * Then fixup all attributes declarations
+ */
+ xmlHashScan(ret->attrDecl, (xmlHashScanner) xmlSchemaAttrFixup, ctxt);
+
+ return (ret);
+}
+
+/**
+ * xmlSchemaParse:
+ * @ctxt: a schema validation context
+ * @URL: the location of the schema
+ *
+ * Load, XML parse a schema definition resource and build an internal
+ * XML Shema struture which can be used to validate instances.
+ * *WARNING* this interface is highly subject to change
+ *
+ * Returns the internal XML Schema structure built from the resource or
+ * NULL in case of error
+ */
+void
+xmlSchemaSetParserErrors(xmlSchemaParserCtxtPtr ctxt,
+ xmlSchemaValidityErrorFunc err,
+ xmlSchemaValidityWarningFunc warn, void *ctx) {
+ if (ctxt == NULL)
+ return;
+ ctxt->error = err;
+ ctxt->warning = warn;
+ ctxt->userData = ctx;
+}
+
+/************************************************************************
+ * *
+ * Simple type validation *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaValidateSimpleValue:
+ * @ctxt: a schema validation context
+ * @type: the type declaration
+ * @value: the value to validate
+ *
+ * Validate a value against a simple type
+ *
+ * Returns 0 if the value is valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateSimpleValue(xmlSchemaValidCtxtPtr ctxt,
+ xmlSchemaTypePtr type,
+ xmlChar *value) {
+ int ret = 0;
+ /*
+ * First normalize the value accordingly to Schema Datatype
+ * 4.3.6 whiteSpace definition of the whiteSpace facet of type
+ */
+ /*
+ * Then check the normalized value against the lexical space of the
+ * type.
+ */
+ if (type->type == XML_SCHEMA_TYPE_BASIC) {
+ if (ctxt->value != NULL) {
+ xmlSchemaFreeValue(ctxt->value);
+ ctxt->value = NULL;
+ }
+ ret = xmlSchemaValidatePredefinedType(type, value, &(ctxt->value));
+ } else if (type->type == XML_SCHEMA_TYPE_RESTRICTION) {
+ xmlSchemaTypePtr base;
+ xmlSchemaFacetPtr facet;
+ int tmp;
+
+ base = type->baseType;
+ if (base != NULL) {
+ ret = xmlSchemaValidateSimpleValue(ctxt, base, value);
+ } else if (type->subtypes != NULL) {
+
+ }
+ /*
+ * Do not validate facets when working on building the Schemas
+ */
+ if (ctxt->schema != NULL) {
+ if (ret == 0) {
+ facet = type->facets;
+ while (facet != NULL) {
+ tmp = xmlSchemaValidateFacet(base, facet, value,
+ ctxt->value);
+ if (tmp != 0)
+ ret = tmp;
+ facet = facet->next;
+ }
+ }
+ }
+ } else if (type->type == XML_SCHEMA_TYPE_SIMPLE) {
+ xmlSchemaTypePtr base;
+
+ base = type->subtypes;
+ if (base != NULL) {
+ ret = xmlSchemaValidateSimpleValue(ctxt, base, value);
+ } else {
+ TODO
+ }
+ } else if (type->type == XML_SCHEMA_TYPE_LIST) {
+ xmlSchemaTypePtr base;
+ xmlChar *cur, *end, tmp;
+ int ret2;
+
+ base = type->subtypes;
+ if (base == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL) {
+ xmlSchemaErrorContext(NULL, ctxt->schema, type->node, NULL);
+ ctxt->error(ctxt->userData,
+ "Internal: List type %s has no base type\n",
+ type->name);
+ }
+ return(-1);
+ }
+ cur = value;
+ do {
+ while (IS_BLANK(*cur)) cur++;
+ end = cur;
+ while ((*end != 0) && (!(IS_BLANK(*end)))) end++;
+ if (end == cur)
+ break;
+ tmp = *end;
+ *end = 0;
+ ret2 = xmlSchemaValidateSimpleValue(ctxt, base, cur);
+ if (ret2 != 0)
+ ret = 1;
+ *end = tmp;
+ cur = end;
+ } while (*cur != 0);
+ } else {
+ TODO
+ }
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * DOM Validation code *
+ * *
+ ************************************************************************/
+
+static int xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt,
+ xmlNodePtr node);
+static int xmlSchemaValidateAttributes(xmlSchemaValidCtxtPtr ctxt,
+ xmlNodePtr elem, xmlSchemaAttributePtr attributes);
+static int xmlSchemaValidateType(xmlSchemaValidCtxtPtr ctxt,
+ xmlNodePtr elem, xmlSchemaElementPtr elemDecl, xmlSchemaTypePtr type);
+
+/**
+ * xmlSchemaRegisterAttributes:
+ * @ctxt: a schema validation context
+ * @attrs: a list of attributes
+ *
+ * Register the list of attributes as the set to be validated on that element
+ *
+ * Returns -1 in case of error, 0 otherwise
+ */
+static int
+xmlSchemaRegisterAttributes(xmlSchemaValidCtxtPtr ctxt,
+ xmlAttrPtr attrs) {
+ while (attrs != NULL) {
+ if (ctxt->attrNr >= ctxt->attrMax) {
+ xmlSchemaAttrStatePtr tmp;
+
+ ctxt->attrMax *= 2;
+ tmp = (xmlSchemaAttrStatePtr)
+ xmlRealloc(ctxt->attr, ctxt->attrMax *
+ sizeof(xmlSchemaAttrState));
+ if (tmp == NULL) {
+ ctxt->attrMax /= 2;
+ return(-1);
+ }
+ ctxt->attr = tmp;
+ }
+ ctxt->attr[ctxt->attrNr].attr = attrs;
+ ctxt->attr[ctxt->attrNr].state = XML_SCHEMAS_ATTR_UNKNOWN;
+ ctxt->attrNr++;
+ attrs = attrs->next;
+ }
+ return(0);
+}
+
+/**
+ * xmlSchemaCheckAttributes:
+ * @ctxt: a schema validation context
+ * @node: the node carrying it.
+ *
+ * Check that the registered set of attributes on the current node
+ * has been properly validated.
+ *
+ * Returns 0 if validity constraints are met, 1 otherwise.
+ */
+static int
+xmlSchemaCheckAttributes(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
+ int ret = 0;
+ int i;
+
+ for (i = ctxt->attrBase;i < ctxt->attrNr;i++) {
+ if (ctxt->attr[i].attr == NULL)
+ break;
+ if (ctxt->attr[i].state == XML_SCHEMAS_ATTR_UNKNOWN) {
+ ret = 1;
+ ctxt->err = XML_SCHEMAS_ERR_ATTRUNKNOWN;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Attribute %s on %s is unknown\n",
+ ctxt->attr[i].attr->name,
+ node->name);
+ }
+ }
+ return(ret);
+}
+
+/**
+ * xmlSchemaValidateSimpleContent:
+ * @ctxt: a schema validation context
+ * @elem: an element
+ * @type: the type declaration
+ *
+ * Validate the content of an element expected to be a simple type
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateSimpleContent(xmlSchemaValidCtxtPtr ctxt,
+ ATTRIBUTE_UNUSED xmlNodePtr node) {
+ xmlNodePtr child;
+ xmlSchemaTypePtr type, base;
+ xmlChar *value;
+ int ret = 0, tmp;
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ /*
+ * Validation Rule: Element Locally Valid (Type): 3.1.3
+ */
+ value = xmlNodeGetContent(child);
+ /* xmlSchemaValidateSimpleValue(ctxt, type, value); */
+ switch (type->type) {
+ case XML_SCHEMA_TYPE_RESTRICTION: {
+ xmlSchemaFacetPtr facet;
+
+ base = type->baseType;
+ if (base != NULL) {
+ ret = xmlSchemaValidateSimpleValue(ctxt, base, value);
+ } else {
+ TODO
+ }
+ if (ret == 0) {
+ facet = type->facets;
+ while (facet != NULL) {
+ tmp = xmlSchemaValidateFacet(base, facet, value,
+ ctxt->value);
+ if (tmp != 0)
+ ret = tmp;
+ facet = facet->next;
+ }
+ }
+ break;
+ }
+ default:
+ TODO
+ }
+ if (value != NULL)
+ xmlFree(value);
+
+ return(ret);
+}
+
+/**
+ * xmlSchemaValidateCheckNodeList
+ * @nodelist: the list of nodes
+ *
+ * Check the node list is only made of text nodes and entities pointing
+ * to text nodes
+ *
+ * Returns 1 if true, 0 if false and -1 in case of error
+ */
+static int
+xmlSchemaValidateCheckNodeList(xmlNodePtr nodelist) {
+ while (nodelist != NULL) {
+ if (nodelist->type == XML_ENTITY_REF_NODE) {
+ TODO /* implement recursion in the entity content */
+ }
+ if ((nodelist->type != XML_TEXT_NODE) &&
+ (nodelist->type != XML_COMMENT_NODE) &&
+ (nodelist->type != XML_PI_NODE) &&
+ (nodelist->type != XML_PI_NODE)) {
+ return(0);
+ }
+ nodelist = nodelist->next;
+ }
+ return(1);
+}
+
+/**
+ * xmlSchemaSkipIgnored:
+ * @ctxt: a schema validation context
+ * @type: the current type context
+ * @node: the top node.
+ *
+ * Skip ignorable nodes in that context
+ *
+ * Returns the new sibling
+ * number otherwise and -1 in case of internal or API error.
+ */
+static xmlNodePtr
+xmlSchemaSkipIgnored(ATTRIBUTE_UNUSED xmlSchemaValidCtxtPtr ctxt,
+ xmlSchemaTypePtr type,
+ xmlNodePtr node) {
+ int mixed = 0;
+ /*
+ * TODO complete and handle entities
+ */
+ mixed = ((type->contentType == XML_SCHEMA_CONTENT_MIXED) ||
+ (type->contentType == XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS));
+ while ((node != NULL) &&
+ ((node->type == XML_COMMENT_NODE) ||
+ ((mixed == 1) && (node->type == XML_TEXT_NODE)) ||
+ (((type->contentType == XML_SCHEMA_CONTENT_ELEMENTS) &&
+ (node->type == XML_TEXT_NODE) &&
+ (IS_BLANK_NODE(node)))))) {
+ node = node->next;
+ }
+ return(node);
+}
+
+/**
+ * xmlSchemaValidateCallback:
+ * @ctxt: a schema validation context
+ * @name: the name of the element detected (might be NULL)
+ * @type: the type
+ *
+ * A transition has been made in the automata associated to an element
+ * content model
+ */
+static void
+xmlSchemaValidateCallback(xmlSchemaValidCtxtPtr ctxt,
+ ATTRIBUTE_UNUSED const xmlChar *name,
+ xmlSchemaTypePtr type,
+ xmlNodePtr node) {
+ xmlSchemaTypePtr oldtype = ctxt->type;
+ xmlNodePtr oldnode = ctxt->node;
+#ifdef DEBUG_CONTENT
+ printf("xmlSchemaValidateCallback: %s, %s, %s\n",
+ name, type->name, node->name);
+#endif
+ ctxt->type = type;
+ ctxt->node = node;
+ xmlSchemaValidateContent(ctxt, node);
+ ctxt->type = oldtype;
+ ctxt->node = oldnode;
+}
+
+
+#if 0
+/**
+ * xmlSchemaValidateSimpleRestrictionType:
+ * @ctxt: a schema validation context
+ * @node: the top node.
+ *
+ * Validate the content of a restriction type.
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateSimpleRestrictionType(xmlSchemaValidCtxtPtr ctxt,
+ xmlNodePtr node)
+{
+ xmlNodePtr child;
+ xmlSchemaTypePtr type;
+ int ret;
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ if ((ctxt == NULL) || (type == NULL)) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: xmlSchemaValidateSimpleRestrictionType %s\n",
+ node->name);
+ return (-1);
+ }
+ /*
+ * Only text and text based entities references shall be found there
+ */
+ ret = xmlSchemaValidateCheckNodeList(child);
+ if (ret < 0) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: xmlSchemaValidateSimpleType %s content\n",
+ node->name);
+ return (-1);
+ } else if (ret == 0) {
+ ctxt->err = XML_SCHEMAS_ERR_NOTSIMPLE;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s content is not a simple type\n",
+ node->name);
+ return (-1);
+ }
+ ctxt->type = type->subtypes;
+ xmlSchemaValidateContent(ctxt, node);
+ ctxt->type = type;
+ return (ret);
+}
+#endif
+
+/**
+ * xmlSchemaValidateSimpleType:
+ * @ctxt: a schema validation context
+ * @node: the top node.
+ *
+ * Validate the content of an simple type.
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateSimpleType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
+ xmlNodePtr child;
+ xmlSchemaTypePtr type;
+ xmlAttrPtr attr;
+ int ret;
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ if ((ctxt == NULL) || (type == NULL)) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: xmlSchemaValidateSimpleType %s\n",
+ node->name);
+ return(-1);
+ }
+ /*
+ * Only text and text based entities references shall be found there
+ */
+ ret = xmlSchemaValidateCheckNodeList(child);
+ if (ret < 0) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: xmlSchemaValidateSimpleType %s content\n",
+ node->name);
+ return(-1);
+ } else if (ret == 0) {
+ ctxt->err = XML_SCHEMAS_ERR_NOTSIMPLE;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s content is not a simple type\n",
+ node->name);
+ return(-1);
+ }
+ /*
+ * Validation Rule: Element Locally Valid (Type): 3.1.1
+ */
+ attr = node->properties;
+ while (attr != NULL) {
+ if ((attr->ns == NULL) ||
+ (!xmlStrEqual(attr->ns->href, xmlSchemaInstanceNs)) ||
+ ((!xmlStrEqual(attr->name, BAD_CAST"type")) &&
+ (!xmlStrEqual(attr->name, BAD_CAST"nil")) &&
+ (!xmlStrEqual(attr->name, BAD_CAST"schemasLocation")) &&
+ (!xmlStrEqual(attr->name, BAD_CAST"noNamespaceSchemaLocation")))) {
+ ctxt->err = XML_SCHEMAS_ERR_INVALIDATTR;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s: attribute %s should not be present\n",
+ child->name, attr->name);
+ return(ctxt->err);
+ }
+ }
+
+ ctxt->type = type->subtypes;
+ ret = xmlSchemaValidateSimpleContent(ctxt, node);
+ ctxt->type = type;
+ return(ret);
+}
+
+/**
+ * xmlSchemaValidateElementType:
+ * @ctxt: a schema validation context
+ * @node: the top node.
+ *
+ * Validate the content of an element type.
+ * Validation Rule: Element Locally Valid (Complex Type)
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateElementType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
+ xmlNodePtr child;
+ xmlSchemaTypePtr type;
+ xmlRegExecCtxtPtr oldregexp; /* cont model of the parent */
+ xmlSchemaElementPtr decl;
+ int ret, attrBase;
+
+ oldregexp = ctxt->regexp;
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ if ((ctxt == NULL) || (type == NULL)) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: xmlSchemaValidateElementType\n",
+ node->name);
+ return(-1);
+ }
+ if (child == NULL) {
+ if (type->minOccurs > 0) {
+ ctxt->err = XML_SCHEMAS_ERR_MISSING;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s: missing child %s\n",
+ node->name, type->name);
+ }
+ return(ctxt->err);
+ }
+
+ /*
+ * Verify the element matches
+ */
+ if (!xmlStrEqual(child->name, type->name)) {
+ ctxt->err = XML_SCHEMAS_ERR_WRONGELEM;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s: missing child %s found %s\n",
+ node->name, type->name, child->name);
+ return(ctxt->err);
+ }
+ /*
+ * Verify the attributes
+ */
+ attrBase = ctxt->attrBase;
+ ctxt->attrBase = ctxt->attrNr;
+ xmlSchemaRegisterAttributes(ctxt, child->properties);
+ xmlSchemaValidateAttributes(ctxt, child, type->attributes);
+ /*
+ * Verify the element content recursively
+ */
+ decl = (xmlSchemaElementPtr) type;
+ oldregexp = ctxt->regexp;
+ if (decl->contModel != NULL) {
+ ctxt->regexp = xmlRegNewExecCtxt(decl->contModel,
+ (xmlRegExecCallbacks) xmlSchemaValidateCallback,
+ ctxt);
+#ifdef DEBUG_AUTOMATA
+ xmlGenericError(xmlGenericErrorContext,
+ "====> %s\n", node->name);
+#endif
+ }
+ xmlSchemaValidateType(ctxt, child, (xmlSchemaElementPtr)type,
+ type->subtypes);
+
+ if (decl->contModel != NULL) {
+ ret = xmlRegExecPushString(ctxt->regexp, NULL, NULL);
+#ifdef DEBUG_AUTOMATA
+ xmlGenericError(xmlGenericErrorContext,
+ "====> %s : %d\n", node->name, ret);
+#endif
+ if (ret == 0) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s content check failed\n",
+ node->name);
+ } else if (ret < 0) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s content check failure\n",
+ node->name);
+#ifdef DEBUG_CONTENT
+ } else {
+ xmlGenericError(xmlGenericErrorContext,
+ "Element %s content check succeeded\n", node->name);
+
+#endif
+ }
+ xmlRegFreeExecCtxt(ctxt->regexp);
+ }
+ /*
+ * Verify that all attributes were Schemas-validated
+ */
+ xmlSchemaCheckAttributes(ctxt, node);
+ ctxt->attrNr = ctxt->attrBase;
+ ctxt->attrBase = attrBase;
+
+ ctxt->regexp = oldregexp;
+
+ ctxt->node = child;
+ ctxt->type = type;
+ return(ctxt->err);
+}
+
+/**
+ * xmlSchemaValidateBasicType:
+ * @ctxt: a schema validation context
+ * @node: the top node.
+ *
+ * Validate the content of an element expected to be a basic type type
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateBasicType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
+ int ret;
+ xmlNodePtr child, cur;
+ xmlSchemaTypePtr type;
+ xmlChar *value; /* lexical representation */
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ if ((ctxt == NULL) || (type == NULL)) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: xmlSchemaValidateBasicType\n",
+ node->name);
+ return(-1);
+ }
+ /*
+ * First check the content model of the node.
+ */
+ cur = child;
+ while (cur != NULL) {
+ switch (cur->type) {
+ case XML_TEXT_NODE:
+ case XML_CDATA_SECTION_NODE:
+ case XML_PI_NODE:
+ case XML_COMMENT_NODE:
+ case XML_XINCLUDE_START:
+ case XML_XINCLUDE_END:
+ break;
+ case XML_ENTITY_REF_NODE:
+ case XML_ENTITY_NODE:
+ TODO
+ break;
+ case XML_ELEMENT_NODE:
+ ctxt->err = XML_SCHEMAS_ERR_INVALIDELEM;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s: child %s should not be present\n",
+ node->name, cur->name);
+ return(ctxt->err);
+ case XML_ATTRIBUTE_NODE:
+ case XML_DOCUMENT_NODE:
+ case XML_DOCUMENT_TYPE_NODE:
+ case XML_DOCUMENT_FRAG_NODE:
+ case XML_NOTATION_NODE:
+ case XML_HTML_DOCUMENT_NODE:
+ case XML_DTD_NODE:
+ case XML_ELEMENT_DECL:
+ case XML_ATTRIBUTE_DECL:
+ case XML_ENTITY_DECL:
+ case XML_NAMESPACE_DECL:
+#ifdef LIBXML_DOCB_ENABLED
+ case XML_DOCB_DOCUMENT_NODE:
+#endif
+ ctxt->err = XML_SCHEMAS_ERR_INVALIDELEM;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s: node type %d unexpected here\n",
+ node->name, cur->type);
+ return(ctxt->err);
+ }
+ cur = cur->next;
+ }
+ if (child == NULL)
+ value = NULL;
+ else
+ value = xmlNodeGetContent(child->parent);
+
+ if (ctxt->value != NULL) {
+ xmlSchemaFreeValue(ctxt->value);
+ ctxt->value = NULL;
+ }
+ ret = xmlSchemaValidatePredefinedType(type, value, &(ctxt->value));
+ if (value != NULL)
+ xmlFree(value);
+ if (ret != 0) {
+ ctxt->error(ctxt->userData,
+ "Element %s: failed to validate basic type %s\n",
+ node->name, type->name);
+ }
+ return(ret);
+}
+
+/**
+ * xmlSchemaValidateComplexType:
+ * @ctxt: a schema validation context
+ * @node: the top node.
+ *
+ * Validate the content of an element expected to be a complex type type
+ * xmlschema-1.html#cvc-complex-type
+ * Validation Rule: Element Locally Valid (Complex Type)
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateComplexType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
+ xmlNodePtr child;
+ xmlSchemaTypePtr type, subtype, model;
+ int ret;
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ /* 3.4.4 1 was verified on the caller */
+
+ switch (type->contentType) {
+ case XML_SCHEMA_CONTENT_EMPTY:
+ if (child != NULL) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s is supposed to be empty\n",
+ node->name);
+ }
+ break;
+ case XML_SCHEMA_CONTENT_ELEMENTS:
+ case XML_SCHEMA_CONTENT_MIXED:
+ case XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS:
+ /*
+ * Skip ignorable nodes in that context
+ */
+ child = xmlSchemaSkipIgnored(ctxt, type, child);
+ subtype = type->subtypes;
+ ctxt->type = model;
+ while (child != NULL) {
+ if (child->type == XML_ELEMENT_NODE) {
+ ret = xmlRegExecPushString(ctxt->regexp,
+ child->name, child);
+#ifdef DEBUG_AUTOMATA
+ if (ret < 0)
+ xmlGenericError(xmlGenericErrorContext,
+ " --> %s Error\n", child->name);
+ else
+ xmlGenericError(xmlGenericErrorContext,
+ " --> %s\n", child->name);
+#endif
+ }
+ child = child->next;
+ /*
+ * Skip ignorable nodes in that context
+ */
+ child = xmlSchemaSkipIgnored(ctxt, type, child);
+ }
+ break;
+ default:
+ TODO
+ xmlGenericError(xmlGenericErrorContext,
+ "unimplemented content type %d\n",
+ type->contentType);
+ }
+ return(ctxt->err);
+}
+
+/**
+ * xmlSchemaValidateContent:
+ * @ctxt: a schema validation context
+ * @elem: an element
+ * @type: the type declaration
+ *
+ * Validate the content of an element against the type.
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateContent(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr node) {
+ xmlNodePtr child;
+ xmlSchemaTypePtr type;
+
+ child = ctxt->node;
+ type = ctxt->type;
+
+ switch (type->type) {
+ case XML_SCHEMA_TYPE_ANY:
+ /* Any type will do it, fine */
+ TODO /* handle recursivity */
+ break;
+ case XML_SCHEMA_TYPE_COMPLEX:
+ xmlSchemaValidateComplexType(ctxt, node);
+ break;
+ case XML_SCHEMA_TYPE_ELEMENT: {
+ xmlSchemaElementPtr decl = (xmlSchemaElementPtr) type;
+ /*
+ * Handle element reference here
+ */
+ if (decl->ref != NULL) {
+ if (decl->refDecl == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: element reference %s not resolved\n",
+ decl->ref);
+ return(-1);
+ }
+ ctxt->type = (xmlSchemaTypePtr) decl->refDecl;
+ decl = decl->refDecl;
+ }
+ xmlSchemaValidateElementType(ctxt, node);
+ ctxt->type = type;
+ break;
+ }
+ case XML_SCHEMA_TYPE_BASIC:
+ xmlSchemaValidateBasicType(ctxt, node);
+ break;
+ case XML_SCHEMA_TYPE_FACET:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_SIMPLE:
+ xmlSchemaValidateSimpleType(ctxt, node);
+ break;
+ case XML_SCHEMA_TYPE_SEQUENCE:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_CHOICE:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_ALL:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_SIMPLE_CONTENT:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_COMPLEX_CONTENT:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_UR:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_RESTRICTION:
+ /*xmlSchemaValidateRestrictionType(ctxt, node); */
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_EXTENSION:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_ATTRIBUTE:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_GROUP:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_NOTATION:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_LIST:
+ TODO
+ break;
+ case XML_SCHEMA_TYPE_UNION:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_MININCLUSIVE:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_MINEXCLUSIVE:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_MAXINCLUSIVE:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_MAXEXCLUSIVE:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_TOTALDIGITS:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_FRACTIONDIGITS:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_PATTERN:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_ENUMERATION:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_WHITESPACE:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_LENGTH:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_MAXLENGTH:
+ TODO
+ break;
+ case XML_SCHEMA_FACET_MINLENGTH:
+ TODO
+ break;
+ }
+ xmlSchemaValidateAttributes(ctxt, node, type->attributes);
+
+ if (ctxt->node == NULL)
+ return(ctxt->err);
+ ctxt->node = ctxt->node->next;
+ ctxt->type = type->next;
+ return(ctxt->err);
+}
+
+/**
+ * xmlSchemaValidateType:
+ * @ctxt: a schema validation context
+ * @elem: an element
+ * @type: the list of type declarations
+ *
+ * Validate the content of an element against the types.
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateType(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem,
+ xmlSchemaElementPtr elemDecl,
+ xmlSchemaTypePtr type) {
+ xmlChar *nil;
+
+ if ((elem->content == NULL) || (type == NULL) || (elemDecl == NULL))
+ return(0);
+ /*
+ * 3.3.4 : 2
+ */
+ if (elemDecl->flags & XML_SCHEMAS_ELEM_ABSTRACT) {
+ ctxt->err = XML_SCHEMAS_ERR_ISABSTRACT;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s is abstract\n", elem->name);
+ return(ctxt->err);
+ }
+ /*
+ * 3.3.4: 3
+ */
+ nil = xmlGetNsProp(elem, BAD_CAST "nil", xmlSchemaInstanceNs);
+ if (elemDecl->flags & XML_SCHEMAS_ELEM_NILLABLE) {
+ /* 3.3.4: 3.2 */
+ if (xmlStrEqual(nil, BAD_CAST "true")) {
+ if (elem->children != NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_NOTEMPTY;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s is not empty\n",
+ elem->name);
+ return(ctxt->err);
+ }
+ if ((elemDecl->flags & XML_SCHEMAS_ELEM_FIXED) &&
+ (elemDecl->value != NULL)) {
+ ctxt->err = XML_SCHEMAS_ERR_HAVEDEFAULT;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Empty element %s cannot get a fixed value\n",
+ elem->name);
+ return(ctxt->err);
+ }
+ }
+ } else {
+ /* 3.3.4: 3.1 */
+ if (nil != NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_NOTNILLABLE;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Element %s with xs:nil but not nillable\n",
+ elem->name);
+ xmlFree(nil);
+ return(ctxt->err);
+ }
+ }
+
+ /* TODO 3.3.4: 4 if the element carries xs:type*/
+
+ ctxt->type = elemDecl->subtypes;
+ ctxt->node = elem->children;
+ xmlSchemaValidateContent(ctxt, elem);
+ xmlSchemaValidateAttributes(ctxt, elem, elemDecl->attributes);
+
+ return(ctxt->err);
+}
+
+
+/**
+ * xmlSchemaValidateAttributes:
+ * @ctxt: a schema validation context
+ * @elem: an element
+ * @attributes: the list of attribute declarations
+ *
+ * Validate the attributes of an element.
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateAttributes(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem,
+ xmlSchemaAttributePtr attributes) {
+ int i, ret;
+ xmlAttrPtr attr;
+ xmlChar *value;
+
+ if (attributes == NULL)
+ return(0);
+ while (attributes != NULL) {
+ for (i = ctxt->attrBase;i < ctxt->attrNr;i++) {
+ attr = ctxt->attr[i].attr;
+ if (attr == NULL)
+ continue;
+ if (!xmlStrEqual(attr->name, attributes->name))
+ continue;
+ /*
+ * TODO: handle the mess about namespaces here.
+ */
+ if ((attr->ns != NULL) /* || (attributes->ns != NULL) */) {
+ TODO
+ }
+ if (attributes->subtypes == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_INTERNAL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "Internal error: attribute %s type not resolved\n",
+ attr->name);
+ continue;
+ }
+ value = xmlNodeListGetString(elem->doc, attr->children, 1);
+ ret = xmlSchemaValidateSimpleValue(ctxt, attributes->subtypes,
+ value);
+ if (ret != 0) {
+ ctxt->err = XML_SCHEMAS_ERR_ATTRINVALID;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData,
+ "attribute %s on %s does not match type\n",
+ attr->name, elem->name);
+ } else {
+ ctxt->attr[i].state = XML_SCHEMAS_ATTR_CHECKED;
+ }
+ if (value != NULL) {
+ xmlFree(value);
+ }
+ }
+ attributes = attributes->next;
+ }
+ return(ctxt->err);
+}
+
+/**
+ * xmlSchemaValidateElement:
+ * @ctxt: a schema validation context
+ * @elem: an element
+ *
+ * Validate an element in a tree
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateElement(xmlSchemaValidCtxtPtr ctxt, xmlNodePtr elem) {
+ xmlSchemaElementPtr elemDecl;
+ int ret, attrBase;
+
+ if (elem->ns != NULL)
+ elemDecl = xmlHashLookup3(ctxt->schema->elemDecl,
+ elem->name, elem->ns->href, NULL);
+ else
+ elemDecl = xmlHashLookup3(ctxt->schema->elemDecl,
+ elem->name, NULL, NULL);
+ /*
+ * 3.3.4 : 1
+ */
+ if (elemDecl == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_UNDECLAREDELEM;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s not declared\n",
+ elem->name);
+ return(ctxt->err);
+ }
+ if (elemDecl->subtypes == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_NOTYPE;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s has no type\n",
+ elem->name);
+ return(ctxt->err);
+ }
+ /*
+ * Verify the attributes
+ */
+ attrBase = ctxt->attrBase;
+ ctxt->attrBase = ctxt->attrNr;
+ xmlSchemaRegisterAttributes(ctxt, elem->properties);
+ xmlSchemaValidateAttributes(ctxt, elem, elemDecl->attributes);
+ /*
+ * Verify the element content recursively
+ */
+ if (elemDecl->contModel != NULL) {
+ ctxt->regexp = xmlRegNewExecCtxt(elemDecl->contModel,
+ (xmlRegExecCallbacks) xmlSchemaValidateCallback,
+ ctxt);
+#ifdef DEBUG_AUTOMATA
+ xmlGenericError(xmlGenericErrorContext,
+ "====> %s\n", elem->name);
+#endif
+ }
+ xmlSchemaValidateType(ctxt, elem, elemDecl, elemDecl->subtypes);
+ ret = xmlRegExecPushString(ctxt->regexp, NULL, NULL);
+#ifdef DEBUG_AUTOMATA
+ xmlGenericError(xmlGenericErrorContext,
+ "====> %s : %d\n", elem->name, ret);
+#endif
+ if (ret == 0) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s content check failed\n",
+ elem->name);
+ } else if (ret < 0) {
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s content check failed\n",
+ elem->name);
+#ifdef DEBUG_CONTENT
+ } else {
+ xmlGenericError(xmlGenericErrorContext,
+ "Element %s content check succeeded\n", elem->name);
+
+#endif
+ }
+ if (elemDecl->contModel != NULL) {
+ xmlRegFreeExecCtxt(ctxt->regexp);
+ }
+ /*
+ * Verify that all attributes were Schemas-validated
+ */
+ xmlSchemaCheckAttributes(ctxt, elem);
+ ctxt->attrNr = ctxt->attrBase;
+ ctxt->attrBase = attrBase;
+
+ return(ctxt->err);
+}
+
+/**
+ * xmlSchemaValidateDocument:
+ * @ctxt: a schema validation context
+ * @doc: a parsed document tree
+ *
+ * Validate a document tree in memory.
+ *
+ * Returns 0 if the document is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+static int
+xmlSchemaValidateDocument(xmlSchemaValidCtxtPtr ctxt, xmlDocPtr doc) {
+ xmlNodePtr root;
+ xmlSchemaElementPtr elemDecl;
+
+ root = xmlDocGetRootElement(doc);
+ if (root == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_NOROOT;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "document has no root\n");
+ return(ctxt->err);
+ }
+ if (root->ns != NULL)
+ elemDecl = xmlHashLookup3(ctxt->schema->elemDecl,
+ root->name, root->ns->href, NULL);
+ else
+ elemDecl = xmlHashLookup3(ctxt->schema->elemDecl,
+ root->name, NULL, NULL);
+ if (elemDecl == NULL) {
+ ctxt->err = XML_SCHEMAS_ERR_UNDECLAREDELEM;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Element %s not declared\n",
+ root->name);
+ }
+ if ((elemDecl->flags & XML_SCHEMAS_ELEM_TOPLEVEL) == 0) {
+ ctxt->err = XML_SCHEMAS_ERR_NOTTOPLEVEL;
+ if (ctxt->error != NULL)
+ ctxt->error(ctxt->userData, "Root element %s not toplevel\n",
+ root->name);
+ }
+ /*
+ * Okay, start the recursive validation
+ */
+ xmlSchemaValidateElement(ctxt, root);
+
+ return(ctxt->err);
+}
+
+/************************************************************************
+ * *
+ * SAX Validation code *
+ * *
+ ************************************************************************/
+
+/************************************************************************
+ * *
+ * Validation interfaces *
+ * *
+ ************************************************************************/
+
+/**
+ * xmlSchemaNewValidCtxt:
+ * @schema: a precompiled XML Schemas
+ *
+ * Create an XML Schemas validation context based on the given schema
+ *
+ * Returns the validation context or NULL in case of error
+ */
+xmlSchemaValidCtxtPtr
+xmlSchemaNewValidCtxt(xmlSchemaPtr schema) {
+ xmlSchemaValidCtxtPtr ret;
+
+ ret = (xmlSchemaValidCtxtPtr) xmlMalloc(sizeof(xmlSchemaValidCtxt));
+ if (ret == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Failed to allocate new schama validation context\n");
+ return (NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaValidCtxt));
+ ret->schema = schema;
+ ret->attrNr = 0;
+ ret->attrMax = 10;
+ ret->attr = (xmlSchemaAttrStatePtr) xmlMalloc(ret->attrMax *
+ sizeof(xmlSchemaAttrState));
+ if (ret->attr == NULL) {
+ free(ret);
+ return(NULL);
+ }
+ memset(ret->attr, 0, ret->attrMax * sizeof(xmlSchemaAttrState));
+ return (ret);
+}
+
+/**
+ * xmlSchemaFreeValidCtxt:
+ * @ctxt: the schema validation context
+ *
+ * Free the resources associated to the schema validation context
+ */
+void
+xmlSchemaFreeValidCtxt(xmlSchemaValidCtxtPtr ctxt) {
+ if (ctxt == NULL)
+ return;
+ if (ctxt->attr != NULL)
+ xmlFree(ctxt->attr);
+ xmlFree(ctxt);
+}
+
+/**
+ * xmlSchemaSetValidErrors:
+ * @ctxt: a schema validation context
+ * @err: the error function
+ * @warn: the warning function
+ * @ctxt: the functions context
+ *
+ * Set the error and warning callback informations
+ */
+void
+xmlSchemaSetValidErrors(xmlSchemaValidCtxtPtr ctxt,
+ xmlSchemaValidityErrorFunc err,
+ xmlSchemaValidityWarningFunc warn, void *ctx) {
+ if (ctxt == NULL)
+ return;
+ ctxt->error = err;
+ ctxt->warning = warn;
+ ctxt->userData = ctx;
+}
+
+/**
+ * xmlSchemaValidateDoc:
+ * @ctxt: a schema validation context
+ * @doc: a parsed document tree
+ *
+ * Validate a document tree in memory.
+ *
+ * Returns 0 if the document is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+int
+xmlSchemaValidateDoc(xmlSchemaValidCtxtPtr ctxt, xmlDocPtr doc) {
+ int ret;
+
+ if ((ctxt == NULL) || (doc == NULL))
+ return(-1);
+
+ ctxt->doc = doc;
+ ret = xmlSchemaValidateDocument(ctxt, doc);
+ return(ret);
+}
+
+/**
+ * xmlSchemaValidateStream:
+ * @ctxt: a schema validation context
+ * @input: the input to use for reading the data
+ * @enc: an optional encoding information
+ * @sax: a SAX handler for the resulting events
+ * @user_data: the context to provide to the SAX handler.
+ *
+ * Validate a document tree in memory.
+ *
+ * Returns 0 if the document is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+int
+xmlSchemaValidateStream(xmlSchemaValidCtxtPtr ctxt,
+ xmlParserInputBufferPtr input, xmlCharEncoding enc,
+ xmlSAXHandlerPtr sax, void *user_data) {
+ if ((ctxt == NULL) || (input == NULL))
+ return(-1);
+ ctxt->input = input;
+ ctxt->enc = enc;
+ ctxt->sax = sax;
+ ctxt->user_data = user_data;
+ TODO
+ return(0);
+}
+
+#endif /* LIBXML_SCHEMAS_ENABLED */
diff --git a/xmlschemastypes.c b/xmlschemastypes.c
new file mode 100644
index 0000000..4c8d420
--- /dev/null
+++ b/xmlschemastypes.c
@@ -0,0 +1,490 @@
+/*
+ * schemastypes.c : implementation of the XML Schema Datatypes
+ * definition and validity checking
+ *
+ * See Copyright for the status of this software.
+ *
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_SCHEMAS_ENABLED
+
+#include <string.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/hash.h>
+#include <libxml/valid.h>
+
+#include <libxml/xmlschemas.h>
+#include <libxml/schemasInternals.h>
+#include <libxml/xmlschemastypes.h>
+
+#define DEBUG
+
+#define TODO \
+ xmlGenericError(xmlGenericErrorContext, \
+ "Unimplemented block at %s:%d\n", \
+ __FILE__, __LINE__);
+
+#define XML_SCHEMAS_NAMESPACE_NAME \
+ (const xmlChar *)"http://www.w3.org/2001/XMLSchema"
+
+typedef enum {
+ XML_SCHEMAS_UNKNOWN = 0,
+ XML_SCHEMAS_STRING,
+ XML_SCHEMAS_NMTOKEN,
+ XML_SCHEMAS_DECIMAL,
+ XML_SCHEMAS_,
+ XML_SCHEMAS_XXX
+} xmlSchemaValType;
+
+unsigned long powten[10] = {
+ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000L,
+ 100000000L, 1000000000L
+};
+
+typedef struct _xmlSchemaValDecimal xmlSchemaValDecimal;
+typedef xmlSchemaValDecimal *xmlSchemaValDecimalPtr;
+struct _xmlSchemaValDecimal {
+ /* would use long long but not portable */
+ unsigned long base;
+ unsigned int extra;
+ int sign:1;
+ int frac:7;
+ int total:8;
+};
+
+struct _xmlSchemaVal {
+ xmlSchemaValType type;
+ union {
+ xmlSchemaValDecimal decimal;
+ } value;
+};
+
+static int xmlSchemaTypesInitialized = 0;
+static xmlHashTablePtr xmlSchemaTypesBank = NULL;
+
+static xmlSchemaTypePtr xmlSchemaTypeStringDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypeAnyTypeDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypeAnySimpleTypeDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypeDecimalDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypeDateDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypePositiveIntegerDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypeNonNegativeIntegerDef = NULL;
+static xmlSchemaTypePtr xmlSchemaTypeNmtoken = NULL;
+
+/*
+ * xmlSchemaInitBasicType:
+ * @name: the type name
+ *
+ * Initialize one default type
+ */
+static xmlSchemaTypePtr
+xmlSchemaInitBasicType(const char *name) {
+ xmlSchemaTypePtr ret;
+
+ ret = (xmlSchemaTypePtr) xmlMalloc(sizeof(xmlSchemaType));
+ if (ret == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "Could not initilize type %s: out of memory\n", name);
+ return(NULL);
+ }
+ memset(ret, 0, sizeof(xmlSchemaType));
+ ret->name = xmlStrdup((const xmlChar *)name);
+ ret->type = XML_SCHEMA_TYPE_BASIC;
+ ret->contentType = XML_SCHEMA_CONTENT_BASIC;
+ xmlHashAddEntry2(xmlSchemaTypesBank, ret->name,
+ XML_SCHEMAS_NAMESPACE_NAME, ret);
+ return(ret);
+}
+
+/*
+ * xmlSchemaInitTypes:
+ *
+ * Initialize the default XML Schemas type library
+ */
+void
+xmlSchemaInitTypes(void) {
+ if (xmlSchemaTypesInitialized != 0)
+ return;
+ xmlSchemaTypesBank = xmlHashCreate(40);
+
+ xmlSchemaTypeStringDef = xmlSchemaInitBasicType("string");
+ xmlSchemaTypeAnyTypeDef = xmlSchemaInitBasicType("anyType");
+ xmlSchemaTypeAnySimpleTypeDef = xmlSchemaInitBasicType("anySimpleType");
+ xmlSchemaTypeDecimalDef = xmlSchemaInitBasicType("decimal");
+ xmlSchemaTypeDateDef = xmlSchemaInitBasicType("date");
+ xmlSchemaTypePositiveIntegerDef = xmlSchemaInitBasicType("positiveInteger");
+ xmlSchemaTypeNonNegativeIntegerDef =
+ xmlSchemaInitBasicType("nonNegativeInteger");
+ xmlSchemaTypeNmtoken = xmlSchemaInitBasicType("NMTOKEN");
+
+ xmlSchemaTypesInitialized = 1;
+}
+
+/**
+ * xmlSchemaCleanupTypes:
+ *
+ * Cleanup the default XML Schemas type library
+ */
+void
+xmlSchemaCleanupTypes(void) {
+ if (xmlSchemaTypesInitialized == 0)
+ return;
+ xmlHashFree(xmlSchemaTypesBank, (xmlHashDeallocator) xmlSchemaFreeType);
+ xmlSchemaTypesInitialized = 0;
+}
+
+/**
+ * xmlSchemaNewValue:
+ * @type: the value type
+ *
+ * Allocate a new simple type value
+ *
+ * Returns a pointer to the new value or NULL in case of error
+ */
+static xmlSchemaValPtr
+xmlSchemaNewValue(xmlSchemaValType type) {
+ xmlSchemaValPtr value;
+
+ value = (xmlSchemaValPtr) xmlMalloc(sizeof(xmlSchemaVal));
+ if (value == NULL) {
+ return(NULL);
+ }
+ memset(value, 0, sizeof(xmlSchemaVal));
+ value->type = type;
+ return(value);
+}
+
+/**
+ * xmlSchemaFreeValue:
+ * @value: the value to free
+ *
+ * Cleanup the default XML Schemas type library
+ */
+void
+xmlSchemaFreeValue(xmlSchemaValPtr value) {
+ if (value == NULL)
+ return;
+ xmlFree(value);
+}
+
+/**
+ * xmlSchemaGetPredefinedType:
+ * @name: the type name
+ * @ns: the URI of the namespace usually "http://www.w3.org/2001/XMLSchema"
+ *
+ * Lookup a type in the default XML Schemas type library
+ *
+ * Returns the type if found, NULL otherwise
+ */
+xmlSchemaTypePtr
+xmlSchemaGetPredefinedType(const xmlChar *name, const xmlChar *ns) {
+ if (xmlSchemaTypesInitialized == 0)
+ xmlSchemaInitTypes();
+ if (name == NULL)
+ return(NULL);
+ return((xmlSchemaTypePtr) xmlHashLookup2(xmlSchemaTypesBank, name, ns));
+}
+/**
+ * xmlSchemaValidatePredefinedType:
+ * @type: the predefined type
+ * @value: the value to check
+ * @val: the return computed value
+ *
+ * Check that a value conforms to the lexical space of the predefined type.
+ * if true a value is computed and returned in @val.
+ *
+ * Returns 0 if this validates, a positive error code number otherwise
+ * and -1 in case of internal or API error.
+ */
+int
+xmlSchemaValidatePredefinedType(xmlSchemaTypePtr type, const xmlChar *value,
+ xmlSchemaValPtr *val) {
+ xmlSchemaValPtr v;
+
+ if (xmlSchemaTypesInitialized == 0)
+ return(-1);
+ if (type == NULL)
+ return(-1);
+ if (val != NULL)
+ *val = NULL;
+ if (type == xmlSchemaTypeStringDef) {
+ return(0);
+ } else if (type == xmlSchemaTypeAnyTypeDef) {
+ return(0);
+ } else if (type == xmlSchemaTypeAnySimpleTypeDef) {
+ return(0);
+ } else if (type == xmlSchemaTypeNmtoken) {
+ if (xmlValidateNmtokenValue(value))
+ return(0);
+ return(1);
+ } else if (type == xmlSchemaTypeDecimalDef) {
+ const xmlChar *cur = value, *tmp;
+ int frac = 0, main, neg = 0;
+ unsigned long base = 0;
+ if (cur == NULL)
+ return(1);
+ if (*cur == '+')
+ cur++;
+ else if (*cur == '-') {
+ neg = 1;
+ cur++;
+ }
+ tmp = cur;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ base = base * 10 + (*cur - '0');
+ cur++;
+ }
+ main = cur - tmp;
+ if (*cur == '.') {
+ cur++;
+ tmp = cur;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ base = base * 10 + (*cur - '0');
+ cur++;
+ }
+ frac = cur - tmp;
+ }
+ if (*cur != 0)
+ return(1);
+ if (val != NULL) {
+ v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL);
+ if (v != NULL) {
+ v->value.decimal.base = base;
+ v->value.decimal.sign = neg;
+ v->value.decimal.frac = frac;
+ v->value.decimal.total = frac + main;
+ *val = v;
+ }
+ }
+ return(0);
+ } else if (type == xmlSchemaTypeDateDef) {
+ const xmlChar *cur = value;
+ if (cur == NULL)
+ return(1);
+ if (*cur == '-')
+ cur++;
+ if ((*cur < '0') || (*cur > '9'))
+ return(1);
+ if ((*cur < '0') || (*cur > '9'))
+ return(1);
+ if ((*cur < '0') || (*cur > '9'))
+ return(1);
+ if ((*cur < '0') || (*cur > '9'))
+ return(1);
+ while ((*cur >= '0') && (*cur <= '9'))
+ cur++;
+ if (*cur != '-')
+ return(1);
+ cur++;
+ if ((*cur != '0') && (*cur != '1'))
+ return(1);
+ if ((*cur == '0') && (cur[1] == '0'))
+ return(1);
+ if ((*cur == '1') && ((cur[1] < '0') || (cur[1] > '2')))
+ return(1);
+ cur += 2;
+ if (*cur != '-')
+ return(1);
+ cur++;
+ if ((*cur < '0') || (*cur > '3'))
+ return(1);
+ if ((*cur == '0') && (cur[1] == '0'))
+ return(1);
+ if ((*cur == '3') && ((cur[1] < '0') || (cur[1] > '1')))
+ return(1);
+ cur += 2;
+ if (*cur != 0)
+ return(1);
+ return(0);
+ } else if (type == xmlSchemaTypePositiveIntegerDef) {
+ const xmlChar *cur = value;
+ unsigned long base = 0;
+ int total = 0;
+ if (cur == NULL)
+ return(1);
+ if (*cur == '+')
+ cur++;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ base = base * 10 + (*cur - '0');
+ total++;
+ cur++;
+ }
+ if (*cur != 0)
+ return(1);
+ if (val != NULL) {
+ v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL);
+ if (v != NULL) {
+ v->value.decimal.base = base;
+ v->value.decimal.sign = 0;
+ v->value.decimal.frac = 0;
+ v->value.decimal.total = total;
+ *val = v;
+ }
+ }
+ return(0);
+ } else if (type == xmlSchemaTypeNonNegativeIntegerDef) {
+ const xmlChar *cur = value;
+ unsigned long base = 0;
+ int total = 0;
+ int sign = 0;
+ if (cur == NULL)
+ return(1);
+ if (*cur == '-') {
+ sign = 1;
+ cur++;
+ } else if (*cur == '+')
+ cur++;
+ while ((*cur >= '0') && (*cur <= '9')) {
+ base = base * 10 + (*cur - '0');
+ total++;
+ cur++;
+ }
+ if (*cur != 0)
+ return(1);
+ if ((sign == 1) && (base != 0))
+ return(1);
+ if (val != NULL) {
+ v = xmlSchemaNewValue(XML_SCHEMAS_DECIMAL);
+ if (v != NULL) {
+ v->value.decimal.base = base;
+ v->value.decimal.sign = 0;
+ v->value.decimal.frac = 0;
+ v->value.decimal.total = total;
+ *val = v;
+ }
+ }
+ return(0);
+ } else {
+ TODO
+ return(0);
+ }
+}
+
+/**
+ * xmlSchemaCompareDecimals:
+ * @x: a first decimal value
+ * @y: a second decimal value
+ *
+ * Compare 2 decimals
+ *
+ * Returns -1 if x < y, 0 if x == y, 1 if x > y and -2 in case of error
+ */
+static int
+xmlSchemaCompareDecimals(xmlSchemaValPtr x, xmlSchemaValPtr y)
+{
+ xmlSchemaValPtr swp;
+ int order = 1;
+ unsigned long tmp;
+
+ if ((x->value.decimal.sign) && (x->value.decimal.sign))
+ order = -1;
+ else if (x->value.decimal.sign)
+ return (-1);
+ else if (y->value.decimal.sign)
+ return (1);
+ if (x->value.decimal.frac == y->value.decimal.frac) {
+ if (x->value.decimal.base < y->value.decimal.base)
+ return (-1);
+ return (x->value.decimal.base > y->value.decimal.base);
+ }
+ if (y->value.decimal.frac > x->value.decimal.frac) {
+ swp = y;
+ y = x;
+ x = swp;
+ order = -order;
+ }
+ tmp =
+ x->value.decimal.base / powten[x->value.decimal.frac -
+ y->value.decimal.frac];
+ if (tmp > y->value.decimal.base)
+ return (order);
+ if (tmp < y->value.decimal.base)
+ return (-order);
+ tmp =
+ y->value.decimal.base * powten[x->value.decimal.frac -
+ y->value.decimal.frac];
+ if (x->value.decimal.base < tmp)
+ return (-order);
+ if (x->value.decimal.base == tmp)
+ return (0);
+ return (order);
+}
+
+/**
+ * xmlSchemaCompareValues:
+ * @x: a first value
+ * @y: a second value
+ *
+ * Compare 2 values
+ *
+ * Returns -1 if x < y, 0 if x == y, 1 if x > y and -2 in case of error
+ */
+int
+xmlSchemaCompareValues(xmlSchemaValPtr x, xmlSchemaValPtr y) {
+ if ((x == NULL) || (y == NULL))
+ return(-2);
+
+ switch (x->type) {
+ case XML_SCHEMAS_STRING:
+ TODO
+ case XML_SCHEMAS_DECIMAL:
+ if (y->type == XML_SCHEMAS_DECIMAL)
+ return(xmlSchemaCompareDecimals(x, y));
+ else
+ return(-2);
+ default:
+ TODO
+ }
+}
+
+/**
+ * xmlSchemaValidateFacet:
+ * @type: the type declaration
+ * @facet: the facet to check
+ * @value: the lexical repr of the value to validate
+ * @val: the precomputed value
+ *
+ * Check a value against a facet condition
+ *
+ * Returns 0 if the element is schemas valid, a positive error code
+ * number otherwise and -1 in case of internal or API error.
+ */
+int
+xmlSchemaValidateFacet(xmlSchemaTypePtr base, xmlSchemaFacetPtr facet,
+ const xmlChar *value, xmlSchemaValPtr val)
+{
+ int ret;
+
+ switch (facet->type) {
+ case XML_SCHEMA_FACET_PATTERN:
+ ret = xmlRegexpExec(facet->regexp, value);
+ if (ret == 1)
+ return(0);
+ if (ret == 0) {
+ TODO /* error code */
+ return(1);
+ }
+ return(ret);
+ case XML_SCHEMA_FACET_MAXEXCLUSIVE:
+ ret = xmlSchemaCompareValues(val, facet->val);
+ if (ret == -2) {
+ TODO /* error code */
+ return(-1);
+ }
+ if (ret == -1)
+ return(0);
+ TODO /* error code */
+ return(1);
+ default:
+ TODO
+ }
+ return(0);
+}
+
+#endif /* LIBXML_SCHEMAS_ENABLED */
diff --git a/xmlunicode.c b/xmlunicode.c
new file mode 100644
index 0000000..8baf8d1
--- /dev/null
+++ b/xmlunicode.c
@@ -0,0 +1,4290 @@
+/*
+ * xmlunicode.c: this module implements the Unicode character APIs
+ *
+ * This file is automatically generated from the
+ * UCS description files of the Unicode Character Database
+ * http://www.unicode.org/Public/3.1-Update/UnicodeCharacterDatabase-3.1.0.html
+ * using the genUnicode.py Python script.
+ *
+ * Generation date: Tue Apr 16 17:28:05 2002
+ * Sources: Blocks-4.txt UnicodeData-3.1.0.txt
+ * Daniel Veillard <veillard@redhat.com>
+ */
+
+#define IN_LIBXML
+#include "libxml.h"
+
+#ifdef LIBXML_UNICODE_ENABLED
+
+#include <string.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlunicode.h>
+
+/**
+ * xmlUCSIsAlphabeticPresentationForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of AlphabeticPresentationForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsAlphabeticPresentationForms(int code) {
+ return((code >= 0xFB00) && (code <= 0xFB4F));
+}
+
+/**
+ * xmlUCSIsArabic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Arabic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArabic(int code) {
+ return((code >= 0x0600) && (code <= 0x06FF));
+}
+
+/**
+ * xmlUCSIsArabicPresentationFormsA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ArabicPresentationForms-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArabicPresentationFormsA(int code) {
+ return((code >= 0xFB50) && (code <= 0xFDFF));
+}
+
+/**
+ * xmlUCSIsArabicPresentationFormsB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ArabicPresentationForms-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArabicPresentationFormsB(int code) {
+ return((code >= 0xFE70) && (code <= 0xFEFE));
+}
+
+/**
+ * xmlUCSIsArmenian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Armenian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArmenian(int code) {
+ return((code >= 0x0530) && (code <= 0x058F));
+}
+
+/**
+ * xmlUCSIsArrows:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Arrows UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsArrows(int code) {
+ return((code >= 0x2190) && (code <= 0x21FF));
+}
+
+/**
+ * xmlUCSIsBasicLatin:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BasicLatin UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBasicLatin(int code) {
+ return((code >= 0x0000) && (code <= 0x007F));
+}
+
+/**
+ * xmlUCSIsBengali:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Bengali UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBengali(int code) {
+ return((code >= 0x0980) && (code <= 0x09FF));
+}
+
+/**
+ * xmlUCSIsBlockElements:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BlockElements UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBlockElements(int code) {
+ return((code >= 0x2580) && (code <= 0x259F));
+}
+
+/**
+ * xmlUCSIsBopomofo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Bopomofo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBopomofo(int code) {
+ return((code >= 0x3100) && (code <= 0x312F));
+}
+
+/**
+ * xmlUCSIsBopomofoExtended:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BopomofoExtended UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBopomofoExtended(int code) {
+ return((code >= 0x31A0) && (code <= 0x31BF));
+}
+
+/**
+ * xmlUCSIsBoxDrawing:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BoxDrawing UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBoxDrawing(int code) {
+ return((code >= 0x2500) && (code <= 0x257F));
+}
+
+/**
+ * xmlUCSIsBraillePatterns:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of BraillePatterns UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsBraillePatterns(int code) {
+ return((code >= 0x2800) && (code <= 0x28FF));
+}
+
+/**
+ * xmlUCSIsByzantineMusicalSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ByzantineMusicalSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsByzantineMusicalSymbols(int code) {
+ return((code >= 0x1D000) && (code <= 0x1D0FF));
+}
+
+/**
+ * xmlUCSIsCJKCompatibility:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibility UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibility(int code) {
+ return((code >= 0x3300) && (code <= 0x33FF));
+}
+
+/**
+ * xmlUCSIsCJKCompatibilityForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibilityForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibilityForms(int code) {
+ return((code >= 0xFE30) && (code <= 0xFE4F));
+}
+
+/**
+ * xmlUCSIsCJKCompatibilityIdeographs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibilityIdeographs UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibilityIdeographs(int code) {
+ return((code >= 0xF900) && (code <= 0xFAFF));
+}
+
+/**
+ * xmlUCSIsCJKCompatibilityIdeographsSupplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKCompatibilityIdeographsSupplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKCompatibilityIdeographsSupplement(int code) {
+ return((code >= 0x2F800) && (code <= 0x2FA1F));
+}
+
+/**
+ * xmlUCSIsCJKRadicalsSupplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKRadicalsSupplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKRadicalsSupplement(int code) {
+ return((code >= 0x2E80) && (code <= 0x2EFF));
+}
+
+/**
+ * xmlUCSIsCJKSymbolsandPunctuation:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKSymbolsandPunctuation UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKSymbolsandPunctuation(int code) {
+ return((code >= 0x3000) && (code <= 0x303F));
+}
+
+/**
+ * xmlUCSIsCJKUnifiedIdeographs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKUnifiedIdeographs UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKUnifiedIdeographs(int code) {
+ return((code >= 0x4E00) && (code <= 0x9FFF));
+}
+
+/**
+ * xmlUCSIsCJKUnifiedIdeographsExtensionA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKUnifiedIdeographsExtensionA UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKUnifiedIdeographsExtensionA(int code) {
+ return((code >= 0x3400) && (code <= 0x4DB5));
+}
+
+/**
+ * xmlUCSIsCJKUnifiedIdeographsExtensionB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CJKUnifiedIdeographsExtensionB UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCJKUnifiedIdeographsExtensionB(int code) {
+ return((code >= 0x20000) && (code <= 0x2A6D6));
+}
+
+/**
+ * xmlUCSIsCherokee:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cherokee UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCherokee(int code) {
+ return((code >= 0x13A0) && (code <= 0x13FF));
+}
+
+/**
+ * xmlUCSIsCombiningDiacriticalMarks:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningDiacriticalMarks UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningDiacriticalMarks(int code) {
+ return((code >= 0x0300) && (code <= 0x036F));
+}
+
+/**
+ * xmlUCSIsCombiningHalfMarks:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningHalfMarks UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningHalfMarks(int code) {
+ return((code >= 0xFE20) && (code <= 0xFE2F));
+}
+
+/**
+ * xmlUCSIsCombiningMarksforSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CombiningMarksforSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCombiningMarksforSymbols(int code) {
+ return((code >= 0x20D0) && (code <= 0x20FF));
+}
+
+/**
+ * xmlUCSIsControlPictures:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of ControlPictures UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsControlPictures(int code) {
+ return((code >= 0x2400) && (code <= 0x243F));
+}
+
+/**
+ * xmlUCSIsCurrencySymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of CurrencySymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCurrencySymbols(int code) {
+ return((code >= 0x20A0) && (code <= 0x20CF));
+}
+
+/**
+ * xmlUCSIsCyrillic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cyrillic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCyrillic(int code) {
+ return((code >= 0x0400) && (code <= 0x04FF));
+}
+
+/**
+ * xmlUCSIsDeseret:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Deseret UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsDeseret(int code) {
+ return((code >= 0x10400) && (code <= 0x1044F));
+}
+
+/**
+ * xmlUCSIsDevanagari:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Devanagari UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsDevanagari(int code) {
+ return((code >= 0x0900) && (code <= 0x097F));
+}
+
+/**
+ * xmlUCSIsDingbats:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Dingbats UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsDingbats(int code) {
+ return((code >= 0x2700) && (code <= 0x27BF));
+}
+
+/**
+ * xmlUCSIsEnclosedAlphanumerics:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of EnclosedAlphanumerics UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsEnclosedAlphanumerics(int code) {
+ return((code >= 0x2460) && (code <= 0x24FF));
+}
+
+/**
+ * xmlUCSIsEnclosedCJKLettersandMonths:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of EnclosedCJKLettersandMonths UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsEnclosedCJKLettersandMonths(int code) {
+ return((code >= 0x3200) && (code <= 0x32FF));
+}
+
+/**
+ * xmlUCSIsEthiopic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ethiopic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsEthiopic(int code) {
+ return((code >= 0x1200) && (code <= 0x137F));
+}
+
+/**
+ * xmlUCSIsGeneralPunctuation:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GeneralPunctuation UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGeneralPunctuation(int code) {
+ return((code >= 0x2000) && (code <= 0x206F));
+}
+
+/**
+ * xmlUCSIsGeometricShapes:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GeometricShapes UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGeometricShapes(int code) {
+ return((code >= 0x25A0) && (code <= 0x25FF));
+}
+
+/**
+ * xmlUCSIsGeorgian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Georgian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGeorgian(int code) {
+ return((code >= 0x10A0) && (code <= 0x10FF));
+}
+
+/**
+ * xmlUCSIsGothic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Gothic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGothic(int code) {
+ return((code >= 0x10330) && (code <= 0x1034F));
+}
+
+/**
+ * xmlUCSIsGreek:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Greek UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGreek(int code) {
+ return((code >= 0x0370) && (code <= 0x03FF));
+}
+
+/**
+ * xmlUCSIsGreekExtended:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of GreekExtended UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGreekExtended(int code) {
+ return((code >= 0x1F00) && (code <= 0x1FFF));
+}
+
+/**
+ * xmlUCSIsGujarati:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Gujarati UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGujarati(int code) {
+ return((code >= 0x0A80) && (code <= 0x0AFF));
+}
+
+/**
+ * xmlUCSIsGurmukhi:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Gurmukhi UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsGurmukhi(int code) {
+ return((code >= 0x0A00) && (code <= 0x0A7F));
+}
+
+/**
+ * xmlUCSIsHalfwidthandFullwidthForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HalfwidthandFullwidthForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHalfwidthandFullwidthForms(int code) {
+ return((code >= 0xFF00) && (code <= 0xFFEF));
+}
+
+/**
+ * xmlUCSIsHangulCompatibilityJamo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HangulCompatibilityJamo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHangulCompatibilityJamo(int code) {
+ return((code >= 0x3130) && (code <= 0x318F));
+}
+
+/**
+ * xmlUCSIsHangulJamo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HangulJamo UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHangulJamo(int code) {
+ return((code >= 0x1100) && (code <= 0x11FF));
+}
+
+/**
+ * xmlUCSIsHangulSyllables:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HangulSyllables UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHangulSyllables(int code) {
+ return((code >= 0xAC00) && (code <= 0xD7A3));
+}
+
+/**
+ * xmlUCSIsHebrew:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Hebrew UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHebrew(int code) {
+ return((code >= 0x0590) && (code <= 0x05FF));
+}
+
+/**
+ * xmlUCSIsHighPrivateUseSurrogates:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HighPrivateUseSurrogates UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHighPrivateUseSurrogates(int code) {
+ return((code >= 0xDB80) && (code <= 0xDBFF));
+}
+
+/**
+ * xmlUCSIsHighSurrogates:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of HighSurrogates UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHighSurrogates(int code) {
+ return((code >= 0xD800) && (code <= 0xDB7F));
+}
+
+/**
+ * xmlUCSIsHiragana:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Hiragana UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsHiragana(int code) {
+ return((code >= 0x3040) && (code <= 0x309F));
+}
+
+/**
+ * xmlUCSIsIPAExtensions:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of IPAExtensions UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsIPAExtensions(int code) {
+ return((code >= 0x0250) && (code <= 0x02AF));
+}
+
+/**
+ * xmlUCSIsIdeographicDescriptionCharacters:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of IdeographicDescriptionCharacters UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsIdeographicDescriptionCharacters(int code) {
+ return((code >= 0x2FF0) && (code <= 0x2FFF));
+}
+
+/**
+ * xmlUCSIsKanbun:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Kanbun UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKanbun(int code) {
+ return((code >= 0x3190) && (code <= 0x319F));
+}
+
+/**
+ * xmlUCSIsKangxiRadicals:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of KangxiRadicals UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKangxiRadicals(int code) {
+ return((code >= 0x2F00) && (code <= 0x2FDF));
+}
+
+/**
+ * xmlUCSIsKannada:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Kannada UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKannada(int code) {
+ return((code >= 0x0C80) && (code <= 0x0CFF));
+}
+
+/**
+ * xmlUCSIsKatakana:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Katakana UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKatakana(int code) {
+ return((code >= 0x30A0) && (code <= 0x30FF));
+}
+
+/**
+ * xmlUCSIsKhmer:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Khmer UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsKhmer(int code) {
+ return((code >= 0x1780) && (code <= 0x17FF));
+}
+
+/**
+ * xmlUCSIsLao:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lao UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLao(int code) {
+ return((code >= 0x0E80) && (code <= 0x0EFF));
+}
+
+/**
+ * xmlUCSIsLatin1Supplement:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Latin-1Supplement UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatin1Supplement(int code) {
+ return((code >= 0x0080) && (code <= 0x00FF));
+}
+
+/**
+ * xmlUCSIsLatinExtendedA:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LatinExtended-A UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatinExtendedA(int code) {
+ return((code >= 0x0100) && (code <= 0x017F));
+}
+
+/**
+ * xmlUCSIsLatinExtendedB:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LatinExtended-B UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatinExtendedB(int code) {
+ return((code >= 0x0180) && (code <= 0x024F));
+}
+
+/**
+ * xmlUCSIsLatinExtendedAdditional:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LatinExtendedAdditional UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLatinExtendedAdditional(int code) {
+ return((code >= 0x1E00) && (code <= 0x1EFF));
+}
+
+/**
+ * xmlUCSIsLetterlikeSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LetterlikeSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLetterlikeSymbols(int code) {
+ return((code >= 0x2100) && (code <= 0x214F));
+}
+
+/**
+ * xmlUCSIsLowSurrogates:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of LowSurrogates UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsLowSurrogates(int code) {
+ return((code >= 0xDC00) && (code <= 0xDFFF));
+}
+
+/**
+ * xmlUCSIsMalayalam:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Malayalam UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMalayalam(int code) {
+ return((code >= 0x0D00) && (code <= 0x0D7F));
+}
+
+/**
+ * xmlUCSIsMathematicalAlphanumericSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MathematicalAlphanumericSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMathematicalAlphanumericSymbols(int code) {
+ return((code >= 0x1D400) && (code <= 0x1D7FF));
+}
+
+/**
+ * xmlUCSIsMathematicalOperators:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MathematicalOperators UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMathematicalOperators(int code) {
+ return((code >= 0x2200) && (code <= 0x22FF));
+}
+
+/**
+ * xmlUCSIsMiscellaneousSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousSymbols(int code) {
+ return((code >= 0x2600) && (code <= 0x26FF));
+}
+
+/**
+ * xmlUCSIsMiscellaneousTechnical:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MiscellaneousTechnical UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMiscellaneousTechnical(int code) {
+ return((code >= 0x2300) && (code <= 0x23FF));
+}
+
+/**
+ * xmlUCSIsMongolian:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Mongolian UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMongolian(int code) {
+ return((code >= 0x1800) && (code <= 0x18AF));
+}
+
+/**
+ * xmlUCSIsMusicalSymbols:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of MusicalSymbols UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMusicalSymbols(int code) {
+ return((code >= 0x1D100) && (code <= 0x1D1FF));
+}
+
+/**
+ * xmlUCSIsMyanmar:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Myanmar UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsMyanmar(int code) {
+ return((code >= 0x1000) && (code <= 0x109F));
+}
+
+/**
+ * xmlUCSIsNumberForms:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of NumberForms UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsNumberForms(int code) {
+ return((code >= 0x2150) && (code <= 0x218F));
+}
+
+/**
+ * xmlUCSIsOgham:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ogham UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOgham(int code) {
+ return((code >= 0x1680) && (code <= 0x169F));
+}
+
+/**
+ * xmlUCSIsOldItalic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of OldItalic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOldItalic(int code) {
+ return((code >= 0x10300) && (code <= 0x1032F));
+}
+
+/**
+ * xmlUCSIsOpticalCharacterRecognition:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of OpticalCharacterRecognition UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOpticalCharacterRecognition(int code) {
+ return((code >= 0x2440) && (code <= 0x245F));
+}
+
+/**
+ * xmlUCSIsOriya:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Oriya UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsOriya(int code) {
+ return((code >= 0x0B00) && (code <= 0x0B7F));
+}
+
+/**
+ * xmlUCSIsPrivateUse:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of PrivateUse UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsPrivateUse(int code) {
+ return((code >= 0x100000) && (code <= 0x10FFFD));
+}
+
+/**
+ * xmlUCSIsRunic:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Runic UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsRunic(int code) {
+ return((code >= 0x16A0) && (code <= 0x16FF));
+}
+
+/**
+ * xmlUCSIsSinhala:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sinhala UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSinhala(int code) {
+ return((code >= 0x0D80) && (code <= 0x0DFF));
+}
+
+/**
+ * xmlUCSIsSmallFormVariants:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SmallFormVariants UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSmallFormVariants(int code) {
+ return((code >= 0xFE50) && (code <= 0xFE6F));
+}
+
+/**
+ * xmlUCSIsSpacingModifierLetters:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SpacingModifierLetters UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSpacingModifierLetters(int code) {
+ return((code >= 0x02B0) && (code <= 0x02FF));
+}
+
+/**
+ * xmlUCSIsSpecials:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Specials UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSpecials(int code) {
+ return((code >= 0xFFF0) && (code <= 0xFFFD));
+}
+
+/**
+ * xmlUCSIsSuperscriptsandSubscripts:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of SuperscriptsandSubscripts UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSuperscriptsandSubscripts(int code) {
+ return((code >= 0x2070) && (code <= 0x209F));
+}
+
+/**
+ * xmlUCSIsSyriac:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Syriac UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsSyriac(int code) {
+ return((code >= 0x0700) && (code <= 0x074F));
+}
+
+/**
+ * xmlUCSIsTags:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tags UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTags(int code) {
+ return((code >= 0xE0000) && (code <= 0xE007F));
+}
+
+/**
+ * xmlUCSIsTamil:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tamil UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTamil(int code) {
+ return((code >= 0x0B80) && (code <= 0x0BFF));
+}
+
+/**
+ * xmlUCSIsTelugu:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Telugu UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTelugu(int code) {
+ return((code >= 0x0C00) && (code <= 0x0C7F));
+}
+
+/**
+ * xmlUCSIsThaana:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Thaana UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsThaana(int code) {
+ return((code >= 0x0780) && (code <= 0x07BF));
+}
+
+/**
+ * xmlUCSIsThai:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Thai UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsThai(int code) {
+ return((code >= 0x0E00) && (code <= 0x0E7F));
+}
+
+/**
+ * xmlUCSIsTibetan:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Tibetan UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsTibetan(int code) {
+ return((code >= 0x0F00) && (code <= 0x0FFF));
+}
+
+/**
+ * xmlUCSIsUnifiedCanadianAboriginalSyllabics:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of UnifiedCanadianAboriginalSyllabics UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsUnifiedCanadianAboriginalSyllabics(int code) {
+ return((code >= 0x1400) && (code <= 0x167F));
+}
+
+/**
+ * xmlUCSIsYiRadicals:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of YiRadicals UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsYiRadicals(int code) {
+ return((code >= 0xA490) && (code <= 0xA4CF));
+}
+
+/**
+ * xmlUCSIsYiSyllables:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of YiSyllables UCS Block
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsYiSyllables(int code) {
+ return((code >= 0xA000) && (code <= 0xA48F));
+}
+
+/**
+ * xmlUCSIsBlock:
+ * @code: UCS code point
+ * @block: UCS block name
+ *
+ * Check whether the caracter is part of the UCS Block
+ *
+ * Returns 1 if true, 0 if false and -1 on unknown block
+ */
+int
+xmlUCSIsBlock(int code, const char *block) {
+ if (!strcmp(block, "AlphabeticPresentationForms"))
+ return(xmlUCSIsAlphabeticPresentationForms(code));
+ if (!strcmp(block, "Arabic"))
+ return(xmlUCSIsArabic(code));
+ if (!strcmp(block, "ArabicPresentationForms-A"))
+ return(xmlUCSIsArabicPresentationFormsA(code));
+ if (!strcmp(block, "ArabicPresentationForms-B"))
+ return(xmlUCSIsArabicPresentationFormsB(code));
+ if (!strcmp(block, "Armenian"))
+ return(xmlUCSIsArmenian(code));
+ if (!strcmp(block, "Arrows"))
+ return(xmlUCSIsArrows(code));
+ if (!strcmp(block, "BasicLatin"))
+ return(xmlUCSIsBasicLatin(code));
+ if (!strcmp(block, "Bengali"))
+ return(xmlUCSIsBengali(code));
+ if (!strcmp(block, "BlockElements"))
+ return(xmlUCSIsBlockElements(code));
+ if (!strcmp(block, "Bopomofo"))
+ return(xmlUCSIsBopomofo(code));
+ if (!strcmp(block, "BopomofoExtended"))
+ return(xmlUCSIsBopomofoExtended(code));
+ if (!strcmp(block, "BoxDrawing"))
+ return(xmlUCSIsBoxDrawing(code));
+ if (!strcmp(block, "BraillePatterns"))
+ return(xmlUCSIsBraillePatterns(code));
+ if (!strcmp(block, "ByzantineMusicalSymbols"))
+ return(xmlUCSIsByzantineMusicalSymbols(code));
+ if (!strcmp(block, "CJKCompatibility"))
+ return(xmlUCSIsCJKCompatibility(code));
+ if (!strcmp(block, "CJKCompatibilityForms"))
+ return(xmlUCSIsCJKCompatibilityForms(code));
+ if (!strcmp(block, "CJKCompatibilityIdeographs"))
+ return(xmlUCSIsCJKCompatibilityIdeographs(code));
+ if (!strcmp(block, "CJKCompatibilityIdeographsSupplement"))
+ return(xmlUCSIsCJKCompatibilityIdeographsSupplement(code));
+ if (!strcmp(block, "CJKRadicalsSupplement"))
+ return(xmlUCSIsCJKRadicalsSupplement(code));
+ if (!strcmp(block, "CJKSymbolsandPunctuation"))
+ return(xmlUCSIsCJKSymbolsandPunctuation(code));
+ if (!strcmp(block, "CJKUnifiedIdeographs"))
+ return(xmlUCSIsCJKUnifiedIdeographs(code));
+ if (!strcmp(block, "CJKUnifiedIdeographsExtensionA"))
+ return(xmlUCSIsCJKUnifiedIdeographsExtensionA(code));
+ if (!strcmp(block, "CJKUnifiedIdeographsExtensionB"))
+ return(xmlUCSIsCJKUnifiedIdeographsExtensionB(code));
+ if (!strcmp(block, "Cherokee"))
+ return(xmlUCSIsCherokee(code));
+ if (!strcmp(block, "CombiningDiacriticalMarks"))
+ return(xmlUCSIsCombiningDiacriticalMarks(code));
+ if (!strcmp(block, "CombiningHalfMarks"))
+ return(xmlUCSIsCombiningHalfMarks(code));
+ if (!strcmp(block, "CombiningMarksforSymbols"))
+ return(xmlUCSIsCombiningMarksforSymbols(code));
+ if (!strcmp(block, "ControlPictures"))
+ return(xmlUCSIsControlPictures(code));
+ if (!strcmp(block, "CurrencySymbols"))
+ return(xmlUCSIsCurrencySymbols(code));
+ if (!strcmp(block, "Cyrillic"))
+ return(xmlUCSIsCyrillic(code));
+ if (!strcmp(block, "Deseret"))
+ return(xmlUCSIsDeseret(code));
+ if (!strcmp(block, "Devanagari"))
+ return(xmlUCSIsDevanagari(code));
+ if (!strcmp(block, "Dingbats"))
+ return(xmlUCSIsDingbats(code));
+ if (!strcmp(block, "EnclosedAlphanumerics"))
+ return(xmlUCSIsEnclosedAlphanumerics(code));
+ if (!strcmp(block, "EnclosedCJKLettersandMonths"))
+ return(xmlUCSIsEnclosedCJKLettersandMonths(code));
+ if (!strcmp(block, "Ethiopic"))
+ return(xmlUCSIsEthiopic(code));
+ if (!strcmp(block, "GeneralPunctuation"))
+ return(xmlUCSIsGeneralPunctuation(code));
+ if (!strcmp(block, "GeometricShapes"))
+ return(xmlUCSIsGeometricShapes(code));
+ if (!strcmp(block, "Georgian"))
+ return(xmlUCSIsGeorgian(code));
+ if (!strcmp(block, "Gothic"))
+ return(xmlUCSIsGothic(code));
+ if (!strcmp(block, "Greek"))
+ return(xmlUCSIsGreek(code));
+ if (!strcmp(block, "GreekExtended"))
+ return(xmlUCSIsGreekExtended(code));
+ if (!strcmp(block, "Gujarati"))
+ return(xmlUCSIsGujarati(code));
+ if (!strcmp(block, "Gurmukhi"))
+ return(xmlUCSIsGurmukhi(code));
+ if (!strcmp(block, "HalfwidthandFullwidthForms"))
+ return(xmlUCSIsHalfwidthandFullwidthForms(code));
+ if (!strcmp(block, "HangulCompatibilityJamo"))
+ return(xmlUCSIsHangulCompatibilityJamo(code));
+ if (!strcmp(block, "HangulJamo"))
+ return(xmlUCSIsHangulJamo(code));
+ if (!strcmp(block, "HangulSyllables"))
+ return(xmlUCSIsHangulSyllables(code));
+ if (!strcmp(block, "Hebrew"))
+ return(xmlUCSIsHebrew(code));
+ if (!strcmp(block, "HighPrivateUseSurrogates"))
+ return(xmlUCSIsHighPrivateUseSurrogates(code));
+ if (!strcmp(block, "HighSurrogates"))
+ return(xmlUCSIsHighSurrogates(code));
+ if (!strcmp(block, "Hiragana"))
+ return(xmlUCSIsHiragana(code));
+ if (!strcmp(block, "IPAExtensions"))
+ return(xmlUCSIsIPAExtensions(code));
+ if (!strcmp(block, "IdeographicDescriptionCharacters"))
+ return(xmlUCSIsIdeographicDescriptionCharacters(code));
+ if (!strcmp(block, "Kanbun"))
+ return(xmlUCSIsKanbun(code));
+ if (!strcmp(block, "KangxiRadicals"))
+ return(xmlUCSIsKangxiRadicals(code));
+ if (!strcmp(block, "Kannada"))
+ return(xmlUCSIsKannada(code));
+ if (!strcmp(block, "Katakana"))
+ return(xmlUCSIsKatakana(code));
+ if (!strcmp(block, "Khmer"))
+ return(xmlUCSIsKhmer(code));
+ if (!strcmp(block, "Lao"))
+ return(xmlUCSIsLao(code));
+ if (!strcmp(block, "Latin-1Supplement"))
+ return(xmlUCSIsLatin1Supplement(code));
+ if (!strcmp(block, "LatinExtended-A"))
+ return(xmlUCSIsLatinExtendedA(code));
+ if (!strcmp(block, "LatinExtended-B"))
+ return(xmlUCSIsLatinExtendedB(code));
+ if (!strcmp(block, "LatinExtendedAdditional"))
+ return(xmlUCSIsLatinExtendedAdditional(code));
+ if (!strcmp(block, "LetterlikeSymbols"))
+ return(xmlUCSIsLetterlikeSymbols(code));
+ if (!strcmp(block, "LowSurrogates"))
+ return(xmlUCSIsLowSurrogates(code));
+ if (!strcmp(block, "Malayalam"))
+ return(xmlUCSIsMalayalam(code));
+ if (!strcmp(block, "MathematicalAlphanumericSymbols"))
+ return(xmlUCSIsMathematicalAlphanumericSymbols(code));
+ if (!strcmp(block, "MathematicalOperators"))
+ return(xmlUCSIsMathematicalOperators(code));
+ if (!strcmp(block, "MiscellaneousSymbols"))
+ return(xmlUCSIsMiscellaneousSymbols(code));
+ if (!strcmp(block, "MiscellaneousTechnical"))
+ return(xmlUCSIsMiscellaneousTechnical(code));
+ if (!strcmp(block, "Mongolian"))
+ return(xmlUCSIsMongolian(code));
+ if (!strcmp(block, "MusicalSymbols"))
+ return(xmlUCSIsMusicalSymbols(code));
+ if (!strcmp(block, "Myanmar"))
+ return(xmlUCSIsMyanmar(code));
+ if (!strcmp(block, "NumberForms"))
+ return(xmlUCSIsNumberForms(code));
+ if (!strcmp(block, "Ogham"))
+ return(xmlUCSIsOgham(code));
+ if (!strcmp(block, "OldItalic"))
+ return(xmlUCSIsOldItalic(code));
+ if (!strcmp(block, "OpticalCharacterRecognition"))
+ return(xmlUCSIsOpticalCharacterRecognition(code));
+ if (!strcmp(block, "Oriya"))
+ return(xmlUCSIsOriya(code));
+ if (!strcmp(block, "PrivateUse"))
+ return(xmlUCSIsPrivateUse(code));
+ if (!strcmp(block, "Runic"))
+ return(xmlUCSIsRunic(code));
+ if (!strcmp(block, "Sinhala"))
+ return(xmlUCSIsSinhala(code));
+ if (!strcmp(block, "SmallFormVariants"))
+ return(xmlUCSIsSmallFormVariants(code));
+ if (!strcmp(block, "SpacingModifierLetters"))
+ return(xmlUCSIsSpacingModifierLetters(code));
+ if (!strcmp(block, "Specials"))
+ return(xmlUCSIsSpecials(code));
+ if (!strcmp(block, "SuperscriptsandSubscripts"))
+ return(xmlUCSIsSuperscriptsandSubscripts(code));
+ if (!strcmp(block, "Syriac"))
+ return(xmlUCSIsSyriac(code));
+ if (!strcmp(block, "Tags"))
+ return(xmlUCSIsTags(code));
+ if (!strcmp(block, "Tamil"))
+ return(xmlUCSIsTamil(code));
+ if (!strcmp(block, "Telugu"))
+ return(xmlUCSIsTelugu(code));
+ if (!strcmp(block, "Thaana"))
+ return(xmlUCSIsThaana(code));
+ if (!strcmp(block, "Thai"))
+ return(xmlUCSIsThai(code));
+ if (!strcmp(block, "Tibetan"))
+ return(xmlUCSIsTibetan(code));
+ if (!strcmp(block, "UnifiedCanadianAboriginalSyllabics"))
+ return(xmlUCSIsUnifiedCanadianAboriginalSyllabics(code));
+ if (!strcmp(block, "YiRadicals"))
+ return(xmlUCSIsYiRadicals(code));
+ if (!strcmp(block, "YiSyllables"))
+ return(xmlUCSIsYiSyllables(code));
+ return(-1);
+}
+
+/**
+ * xmlUCSIsCatC:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of C UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatC(int code) {
+ return(((code >= 0x0) && (code <= 0x1f)) ||
+ ((code >= 0x7f) && (code <= 0x9f)) ||
+ (code == 0x70f) ||
+ ((code >= 0x180b) && (code <= 0x180e)) ||
+ ((code >= 0x200c) && (code <= 0x200f)) ||
+ ((code >= 0x202a) && (code <= 0x202e)) ||
+ ((code >= 0x206a) && (code <= 0x206f)) ||
+ (code == 0xd800) ||
+ ((code >= 0xdb7f) && (code <= 0xdb80)) ||
+ ((code >= 0xdbff) && (code <= 0xdc00)) ||
+ ((code >= 0xdfff) && (code <= 0xe000)) ||
+ (code == 0xf8ff) ||
+ (code == 0xfeff) ||
+ ((code >= 0xfff9) && (code <= 0xfffb)) ||
+ ((code >= 0x1d173) && (code <= 0x1d17a)) ||
+ (code == 0xe0001) ||
+ ((code >= 0xe0020) && (code <= 0xe007f)) ||
+ (code == 0xf0000) ||
+ (code == 0xffffd) ||
+ (code == 0x100000) ||
+ (code == 0x10fffd));
+}
+
+/**
+ * xmlUCSIsCatCc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCc(int code) {
+ return(((code >= 0x0) && (code <= 0x1f)) ||
+ ((code >= 0x7f) && (code <= 0x9f)));
+}
+
+/**
+ * xmlUCSIsCatCf:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cf UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCf(int code) {
+ return((code == 0x70f) ||
+ ((code >= 0x180b) && (code <= 0x180e)) ||
+ ((code >= 0x200c) && (code <= 0x200f)) ||
+ ((code >= 0x202a) && (code <= 0x202e)) ||
+ ((code >= 0x206a) && (code <= 0x206f)) ||
+ (code == 0xfeff) ||
+ ((code >= 0xfff9) && (code <= 0xfffb)) ||
+ ((code >= 0x1d173) && (code <= 0x1d17a)) ||
+ (code == 0xe0001) ||
+ ((code >= 0xe0020) && (code <= 0xe007f)));
+}
+
+/**
+ * xmlUCSIsCatCo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Co UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCo(int code) {
+ return((code == 0xe000) ||
+ (code == 0xf8ff) ||
+ (code == 0xf0000) ||
+ (code == 0xffffd) ||
+ (code == 0x100000) ||
+ (code == 0x10fffd));
+}
+
+/**
+ * xmlUCSIsCatCs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Cs UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatCs(int code) {
+ return((code == 0xd800) ||
+ ((code >= 0xdb7f) && (code <= 0xdb80)) ||
+ ((code >= 0xdbff) && (code <= 0xdc00)) ||
+ (code == 0xdfff));
+}
+
+/**
+ * xmlUCSIsCatL:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of L UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatL(int code) {
+ return(((code >= 0x41) && (code <= 0x5a)) ||
+ ((code >= 0x61) && (code <= 0x7a)) ||
+ (code == 0xaa) ||
+ (code == 0xb5) ||
+ (code == 0xba) ||
+ ((code >= 0xc0) && (code <= 0xd6)) ||
+ ((code >= 0xd8) && (code <= 0xf6)) ||
+ ((code >= 0xf8) && (code <= 0x21f)) ||
+ ((code >= 0x222) && (code <= 0x233)) ||
+ ((code >= 0x250) && (code <= 0x2ad)) ||
+ ((code >= 0x2b0) && (code <= 0x2b8)) ||
+ ((code >= 0x2bb) && (code <= 0x2c1)) ||
+ ((code >= 0x2d0) && (code <= 0x2d1)) ||
+ ((code >= 0x2e0) && (code <= 0x2e4)) ||
+ (code == 0x2ee) ||
+ (code == 0x37a) ||
+ (code == 0x386) ||
+ ((code >= 0x388) && (code <= 0x38a)) ||
+ (code == 0x38c) ||
+ ((code >= 0x38e) && (code <= 0x3a1)) ||
+ ((code >= 0x3a3) && (code <= 0x3ce)) ||
+ ((code >= 0x3d0) && (code <= 0x3d7)) ||
+ ((code >= 0x3da) && (code <= 0x3f5)) ||
+ ((code >= 0x400) && (code <= 0x481)) ||
+ ((code >= 0x48c) && (code <= 0x4c4)) ||
+ ((code >= 0x4c7) && (code <= 0x4c8)) ||
+ ((code >= 0x4cb) && (code <= 0x4cc)) ||
+ ((code >= 0x4d0) && (code <= 0x4f5)) ||
+ ((code >= 0x4f8) && (code <= 0x4f9)) ||
+ ((code >= 0x531) && (code <= 0x556)) ||
+ (code == 0x559) ||
+ ((code >= 0x561) && (code <= 0x587)) ||
+ ((code >= 0x5d0) && (code <= 0x5ea)) ||
+ ((code >= 0x5f0) && (code <= 0x5f2)) ||
+ ((code >= 0x621) && (code <= 0x63a)) ||
+ ((code >= 0x640) && (code <= 0x64a)) ||
+ ((code >= 0x671) && (code <= 0x6d3)) ||
+ (code == 0x6d5) ||
+ ((code >= 0x6e5) && (code <= 0x6e6)) ||
+ ((code >= 0x6fa) && (code <= 0x6fc)) ||
+ (code == 0x710) ||
+ ((code >= 0x712) && (code <= 0x72c)) ||
+ ((code >= 0x780) && (code <= 0x7a5)) ||
+ ((code >= 0x905) && (code <= 0x939)) ||
+ (code == 0x93d) ||
+ (code == 0x950) ||
+ ((code >= 0x958) && (code <= 0x961)) ||
+ ((code >= 0x985) && (code <= 0x98c)) ||
+ ((code >= 0x98f) && (code <= 0x990)) ||
+ ((code >= 0x993) && (code <= 0x9a8)) ||
+ ((code >= 0x9aa) && (code <= 0x9b0)) ||
+ (code == 0x9b2) ||
+ ((code >= 0x9b6) && (code <= 0x9b9)) ||
+ ((code >= 0x9dc) && (code <= 0x9dd)) ||
+ ((code >= 0x9df) && (code <= 0x9e1)) ||
+ ((code >= 0x9f0) && (code <= 0x9f1)) ||
+ ((code >= 0xa05) && (code <= 0xa0a)) ||
+ ((code >= 0xa0f) && (code <= 0xa10)) ||
+ ((code >= 0xa13) && (code <= 0xa28)) ||
+ ((code >= 0xa2a) && (code <= 0xa30)) ||
+ ((code >= 0xa32) && (code <= 0xa33)) ||
+ ((code >= 0xa35) && (code <= 0xa36)) ||
+ ((code >= 0xa38) && (code <= 0xa39)) ||
+ ((code >= 0xa59) && (code <= 0xa5c)) ||
+ (code == 0xa5e) ||
+ ((code >= 0xa72) && (code <= 0xa74)) ||
+ ((code >= 0xa85) && (code <= 0xa8b)) ||
+ (code == 0xa8d) ||
+ ((code >= 0xa8f) && (code <= 0xa91)) ||
+ ((code >= 0xa93) && (code <= 0xaa8)) ||
+ ((code >= 0xaaa) && (code <= 0xab0)) ||
+ ((code >= 0xab2) && (code <= 0xab3)) ||
+ ((code >= 0xab5) && (code <= 0xab9)) ||
+ (code == 0xabd) ||
+ (code == 0xad0) ||
+ (code == 0xae0) ||
+ ((code >= 0xb05) && (code <= 0xb0c)) ||
+ ((code >= 0xb0f) && (code <= 0xb10)) ||
+ ((code >= 0xb13) && (code <= 0xb28)) ||
+ ((code >= 0xb2a) && (code <= 0xb30)) ||
+ ((code >= 0xb32) && (code <= 0xb33)) ||
+ ((code >= 0xb36) && (code <= 0xb39)) ||
+ (code == 0xb3d) ||
+ ((code >= 0xb5c) && (code <= 0xb5d)) ||
+ ((code >= 0xb5f) && (code <= 0xb61)) ||
+ ((code >= 0xb85) && (code <= 0xb8a)) ||
+ ((code >= 0xb8e) && (code <= 0xb90)) ||
+ ((code >= 0xb92) && (code <= 0xb95)) ||
+ ((code >= 0xb99) && (code <= 0xb9a)) ||
+ (code == 0xb9c) ||
+ ((code >= 0xb9e) && (code <= 0xb9f)) ||
+ ((code >= 0xba3) && (code <= 0xba4)) ||
+ ((code >= 0xba8) && (code <= 0xbaa)) ||
+ ((code >= 0xbae) && (code <= 0xbb5)) ||
+ ((code >= 0xbb7) && (code <= 0xbb9)) ||
+ ((code >= 0xc05) && (code <= 0xc0c)) ||
+ ((code >= 0xc0e) && (code <= 0xc10)) ||
+ ((code >= 0xc12) && (code <= 0xc28)) ||
+ ((code >= 0xc2a) && (code <= 0xc33)) ||
+ ((code >= 0xc35) && (code <= 0xc39)) ||
+ ((code >= 0xc60) && (code <= 0xc61)) ||
+ ((code >= 0xc85) && (code <= 0xc8c)) ||
+ ((code >= 0xc8e) && (code <= 0xc90)) ||
+ ((code >= 0xc92) && (code <= 0xca8)) ||
+ ((code >= 0xcaa) && (code <= 0xcb3)) ||
+ ((code >= 0xcb5) && (code <= 0xcb9)) ||
+ (code == 0xcde) ||
+ ((code >= 0xce0) && (code <= 0xce1)) ||
+ ((code >= 0xd05) && (code <= 0xd0c)) ||
+ ((code >= 0xd0e) && (code <= 0xd10)) ||
+ ((code >= 0xd12) && (code <= 0xd28)) ||
+ ((code >= 0xd2a) && (code <= 0xd39)) ||
+ ((code >= 0xd60) && (code <= 0xd61)) ||
+ ((code >= 0xd85) && (code <= 0xd96)) ||
+ ((code >= 0xd9a) && (code <= 0xdb1)) ||
+ ((code >= 0xdb3) && (code <= 0xdbb)) ||
+ (code == 0xdbd) ||
+ ((code >= 0xdc0) && (code <= 0xdc6)) ||
+ ((code >= 0xe01) && (code <= 0xe30)) ||
+ ((code >= 0xe32) && (code <= 0xe33)) ||
+ ((code >= 0xe40) && (code <= 0xe46)) ||
+ ((code >= 0xe81) && (code <= 0xe82)) ||
+ (code == 0xe84) ||
+ ((code >= 0xe87) && (code <= 0xe88)) ||
+ (code == 0xe8a) ||
+ (code == 0xe8d) ||
+ ((code >= 0xe94) && (code <= 0xe97)) ||
+ ((code >= 0xe99) && (code <= 0xe9f)) ||
+ ((code >= 0xea1) && (code <= 0xea3)) ||
+ (code == 0xea5) ||
+ (code == 0xea7) ||
+ ((code >= 0xeaa) && (code <= 0xeab)) ||
+ ((code >= 0xead) && (code <= 0xeb0)) ||
+ ((code >= 0xeb2) && (code <= 0xeb3)) ||
+ (code == 0xebd) ||
+ ((code >= 0xec0) && (code <= 0xec4)) ||
+ (code == 0xec6) ||
+ ((code >= 0xedc) && (code <= 0xedd)) ||
+ (code == 0xf00) ||
+ ((code >= 0xf40) && (code <= 0xf47)) ||
+ ((code >= 0xf49) && (code <= 0xf6a)) ||
+ ((code >= 0xf88) && (code <= 0xf8b)) ||
+ ((code >= 0x1000) && (code <= 0x1021)) ||
+ ((code >= 0x1023) && (code <= 0x1027)) ||
+ ((code >= 0x1029) && (code <= 0x102a)) ||
+ ((code >= 0x1050) && (code <= 0x1055)) ||
+ ((code >= 0x10a0) && (code <= 0x10c5)) ||
+ ((code >= 0x10d0) && (code <= 0x10f6)) ||
+ ((code >= 0x1100) && (code <= 0x1159)) ||
+ ((code >= 0x115f) && (code <= 0x11a2)) ||
+ ((code >= 0x11a8) && (code <= 0x11f9)) ||
+ ((code >= 0x1200) && (code <= 0x1206)) ||
+ ((code >= 0x1208) && (code <= 0x1246)) ||
+ (code == 0x1248) ||
+ ((code >= 0x124a) && (code <= 0x124d)) ||
+ ((code >= 0x1250) && (code <= 0x1256)) ||
+ (code == 0x1258) ||
+ ((code >= 0x125a) && (code <= 0x125d)) ||
+ ((code >= 0x1260) && (code <= 0x1286)) ||
+ (code == 0x1288) ||
+ ((code >= 0x128a) && (code <= 0x128d)) ||
+ ((code >= 0x1290) && (code <= 0x12ae)) ||
+ (code == 0x12b0) ||
+ ((code >= 0x12b2) && (code <= 0x12b5)) ||
+ ((code >= 0x12b8) && (code <= 0x12be)) ||
+ (code == 0x12c0) ||
+ ((code >= 0x12c2) && (code <= 0x12c5)) ||
+ ((code >= 0x12c8) && (code <= 0x12ce)) ||
+ ((code >= 0x12d0) && (code <= 0x12d6)) ||
+ ((code >= 0x12d8) && (code <= 0x12ee)) ||
+ ((code >= 0x12f0) && (code <= 0x130e)) ||
+ (code == 0x1310) ||
+ ((code >= 0x1312) && (code <= 0x1315)) ||
+ ((code >= 0x1318) && (code <= 0x131e)) ||
+ ((code >= 0x1320) && (code <= 0x1346)) ||
+ ((code >= 0x1348) && (code <= 0x135a)) ||
+ ((code >= 0x13a0) && (code <= 0x13f4)) ||
+ ((code >= 0x1401) && (code <= 0x166c)) ||
+ ((code >= 0x166f) && (code <= 0x1676)) ||
+ ((code >= 0x1681) && (code <= 0x169a)) ||
+ ((code >= 0x16a0) && (code <= 0x16ea)) ||
+ ((code >= 0x1780) && (code <= 0x17b3)) ||
+ ((code >= 0x1820) && (code <= 0x1877)) ||
+ ((code >= 0x1880) && (code <= 0x18a8)) ||
+ ((code >= 0x1e00) && (code <= 0x1e9b)) ||
+ ((code >= 0x1ea0) && (code <= 0x1ef9)) ||
+ ((code >= 0x1f00) && (code <= 0x1f15)) ||
+ ((code >= 0x1f18) && (code <= 0x1f1d)) ||
+ ((code >= 0x1f20) && (code <= 0x1f45)) ||
+ ((code >= 0x1f48) && (code <= 0x1f4d)) ||
+ ((code >= 0x1f50) && (code <= 0x1f57)) ||
+ (code == 0x1f59) ||
+ (code == 0x1f5b) ||
+ (code == 0x1f5d) ||
+ ((code >= 0x1f5f) && (code <= 0x1f7d)) ||
+ ((code >= 0x1f80) && (code <= 0x1fb4)) ||
+ ((code >= 0x1fb6) && (code <= 0x1fbc)) ||
+ (code == 0x1fbe) ||
+ ((code >= 0x1fc2) && (code <= 0x1fc4)) ||
+ ((code >= 0x1fc6) && (code <= 0x1fcc)) ||
+ ((code >= 0x1fd0) && (code <= 0x1fd3)) ||
+ ((code >= 0x1fd6) && (code <= 0x1fdb)) ||
+ ((code >= 0x1fe0) && (code <= 0x1fec)) ||
+ ((code >= 0x1ff2) && (code <= 0x1ff4)) ||
+ ((code >= 0x1ff6) && (code <= 0x1ffc)) ||
+ (code == 0x207f) ||
+ (code == 0x2102) ||
+ (code == 0x2107) ||
+ ((code >= 0x210a) && (code <= 0x2113)) ||
+ (code == 0x2115) ||
+ ((code >= 0x2119) && (code <= 0x211d)) ||
+ (code == 0x2124) ||
+ (code == 0x2126) ||
+ (code == 0x2128) ||
+ ((code >= 0x212a) && (code <= 0x212d)) ||
+ ((code >= 0x212f) && (code <= 0x2131)) ||
+ ((code >= 0x2133) && (code <= 0x2139)) ||
+ ((code >= 0x3005) && (code <= 0x3006)) ||
+ ((code >= 0x3031) && (code <= 0x3035)) ||
+ ((code >= 0x3041) && (code <= 0x3094)) ||
+ ((code >= 0x309d) && (code <= 0x309e)) ||
+ ((code >= 0x30a1) && (code <= 0x30fa)) ||
+ ((code >= 0x30fc) && (code <= 0x30fe)) ||
+ ((code >= 0x3105) && (code <= 0x312c)) ||
+ ((code >= 0x3131) && (code <= 0x318e)) ||
+ ((code >= 0x31a0) && (code <= 0x31b7)) ||
+ (code == 0x3400) ||
+ (code == 0x4db5) ||
+ (code == 0x4e00) ||
+ (code == 0x9fa5) ||
+ ((code >= 0xa000) && (code <= 0xa48c)) ||
+ (code == 0xac00) ||
+ (code == 0xd7a3) ||
+ ((code >= 0xf900) && (code <= 0xfa2d)) ||
+ ((code >= 0xfb00) && (code <= 0xfb06)) ||
+ ((code >= 0xfb13) && (code <= 0xfb17)) ||
+ (code == 0xfb1d) ||
+ ((code >= 0xfb1f) && (code <= 0xfb28)) ||
+ ((code >= 0xfb2a) && (code <= 0xfb36)) ||
+ ((code >= 0xfb38) && (code <= 0xfb3c)) ||
+ (code == 0xfb3e) ||
+ ((code >= 0xfb40) && (code <= 0xfb41)) ||
+ ((code >= 0xfb43) && (code <= 0xfb44)) ||
+ ((code >= 0xfb46) && (code <= 0xfbb1)) ||
+ ((code >= 0xfbd3) && (code <= 0xfd3d)) ||
+ ((code >= 0xfd50) && (code <= 0xfd8f)) ||
+ ((code >= 0xfd92) && (code <= 0xfdc7)) ||
+ ((code >= 0xfdf0) && (code <= 0xfdfb)) ||
+ ((code >= 0xfe70) && (code <= 0xfe72)) ||
+ (code == 0xfe74) ||
+ ((code >= 0xfe76) && (code <= 0xfefc)) ||
+ ((code >= 0xff21) && (code <= 0xff3a)) ||
+ ((code >= 0xff41) && (code <= 0xff5a)) ||
+ ((code >= 0xff66) && (code <= 0xffbe)) ||
+ ((code >= 0xffc2) && (code <= 0xffc7)) ||
+ ((code >= 0xffca) && (code <= 0xffcf)) ||
+ ((code >= 0xffd2) && (code <= 0xffd7)) ||
+ ((code >= 0xffda) && (code <= 0xffdc)) ||
+ ((code >= 0x10300) && (code <= 0x1031e)) ||
+ ((code >= 0x10330) && (code <= 0x10349)) ||
+ ((code >= 0x10400) && (code <= 0x10425)) ||
+ ((code >= 0x10428) && (code <= 0x1044d)) ||
+ ((code >= 0x1d400) && (code <= 0x1d454)) ||
+ ((code >= 0x1d456) && (code <= 0x1d49c)) ||
+ ((code >= 0x1d49e) && (code <= 0x1d49f)) ||
+ (code == 0x1d4a2) ||
+ ((code >= 0x1d4a5) && (code <= 0x1d4a6)) ||
+ ((code >= 0x1d4a9) && (code <= 0x1d4ac)) ||
+ ((code >= 0x1d4ae) && (code <= 0x1d4b9)) ||
+ (code == 0x1d4bb) ||
+ ((code >= 0x1d4bd) && (code <= 0x1d4c0)) ||
+ ((code >= 0x1d4c2) && (code <= 0x1d4c3)) ||
+ ((code >= 0x1d4c5) && (code <= 0x1d505)) ||
+ ((code >= 0x1d507) && (code <= 0x1d50a)) ||
+ ((code >= 0x1d50d) && (code <= 0x1d514)) ||
+ ((code >= 0x1d516) && (code <= 0x1d51c)) ||
+ ((code >= 0x1d51e) && (code <= 0x1d539)) ||
+ ((code >= 0x1d53b) && (code <= 0x1d53e)) ||
+ ((code >= 0x1d540) && (code <= 0x1d544)) ||
+ (code == 0x1d546) ||
+ ((code >= 0x1d54a) && (code <= 0x1d550)) ||
+ ((code >= 0x1d552) && (code <= 0x1d6a3)) ||
+ ((code >= 0x1d6a8) && (code <= 0x1d6c0)) ||
+ ((code >= 0x1d6c2) && (code <= 0x1d6da)) ||
+ ((code >= 0x1d6dc) && (code <= 0x1d6fa)) ||
+ ((code >= 0x1d6fc) && (code <= 0x1d714)) ||
+ ((code >= 0x1d716) && (code <= 0x1d734)) ||
+ ((code >= 0x1d736) && (code <= 0x1d74e)) ||
+ ((code >= 0x1d750) && (code <= 0x1d76e)) ||
+ ((code >= 0x1d770) && (code <= 0x1d788)) ||
+ ((code >= 0x1d78a) && (code <= 0x1d7a8)) ||
+ ((code >= 0x1d7aa) && (code <= 0x1d7c2)) ||
+ ((code >= 0x1d7c4) && (code <= 0x1d7c9)) ||
+ (code == 0x20000) ||
+ (code == 0x2a6d6) ||
+ ((code >= 0x2f800) && (code <= 0x2fa1d)));
+}
+
+/**
+ * xmlUCSIsCatLl:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ll UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLl(int code) {
+ return(((code >= 0x61) && (code <= 0x7a)) ||
+ (code == 0xaa) ||
+ (code == 0xb5) ||
+ (code == 0xba) ||
+ ((code >= 0xdf) && (code <= 0xf6)) ||
+ ((code >= 0xf8) && (code <= 0xff)) ||
+ (code == 0x101) ||
+ (code == 0x103) ||
+ (code == 0x105) ||
+ (code == 0x107) ||
+ (code == 0x109) ||
+ (code == 0x10b) ||
+ (code == 0x10d) ||
+ (code == 0x10f) ||
+ (code == 0x111) ||
+ (code == 0x113) ||
+ (code == 0x115) ||
+ (code == 0x117) ||
+ (code == 0x119) ||
+ (code == 0x11b) ||
+ (code == 0x11d) ||
+ (code == 0x11f) ||
+ (code == 0x121) ||
+ (code == 0x123) ||
+ (code == 0x125) ||
+ (code == 0x127) ||
+ (code == 0x129) ||
+ (code == 0x12b) ||
+ (code == 0x12d) ||
+ (code == 0x12f) ||
+ (code == 0x131) ||
+ (code == 0x133) ||
+ (code == 0x135) ||
+ ((code >= 0x137) && (code <= 0x138)) ||
+ (code == 0x13a) ||
+ (code == 0x13c) ||
+ (code == 0x13e) ||
+ (code == 0x140) ||
+ (code == 0x142) ||
+ (code == 0x144) ||
+ (code == 0x146) ||
+ ((code >= 0x148) && (code <= 0x149)) ||
+ (code == 0x14b) ||
+ (code == 0x14d) ||
+ (code == 0x14f) ||
+ (code == 0x151) ||
+ (code == 0x153) ||
+ (code == 0x155) ||
+ (code == 0x157) ||
+ (code == 0x159) ||
+ (code == 0x15b) ||
+ (code == 0x15d) ||
+ (code == 0x15f) ||
+ (code == 0x161) ||
+ (code == 0x163) ||
+ (code == 0x165) ||
+ (code == 0x167) ||
+ (code == 0x169) ||
+ (code == 0x16b) ||
+ (code == 0x16d) ||
+ (code == 0x16f) ||
+ (code == 0x171) ||
+ (code == 0x173) ||
+ (code == 0x175) ||
+ (code == 0x177) ||
+ (code == 0x17a) ||
+ (code == 0x17c) ||
+ ((code >= 0x17e) && (code <= 0x180)) ||
+ (code == 0x183) ||
+ (code == 0x185) ||
+ (code == 0x188) ||
+ ((code >= 0x18c) && (code <= 0x18d)) ||
+ (code == 0x192) ||
+ (code == 0x195) ||
+ ((code >= 0x199) && (code <= 0x19b)) ||
+ (code == 0x19e) ||
+ (code == 0x1a1) ||
+ (code == 0x1a3) ||
+ (code == 0x1a5) ||
+ (code == 0x1a8) ||
+ ((code >= 0x1aa) && (code <= 0x1ab)) ||
+ (code == 0x1ad) ||
+ (code == 0x1b0) ||
+ (code == 0x1b4) ||
+ (code == 0x1b6) ||
+ ((code >= 0x1b9) && (code <= 0x1ba)) ||
+ ((code >= 0x1bd) && (code <= 0x1bf)) ||
+ (code == 0x1c6) ||
+ (code == 0x1c9) ||
+ (code == 0x1cc) ||
+ (code == 0x1ce) ||
+ (code == 0x1d0) ||
+ (code == 0x1d2) ||
+ (code == 0x1d4) ||
+ (code == 0x1d6) ||
+ (code == 0x1d8) ||
+ (code == 0x1da) ||
+ ((code >= 0x1dc) && (code <= 0x1dd)) ||
+ (code == 0x1df) ||
+ (code == 0x1e1) ||
+ (code == 0x1e3) ||
+ (code == 0x1e5) ||
+ (code == 0x1e7) ||
+ (code == 0x1e9) ||
+ (code == 0x1eb) ||
+ (code == 0x1ed) ||
+ ((code >= 0x1ef) && (code <= 0x1f0)) ||
+ (code == 0x1f3) ||
+ (code == 0x1f5) ||
+ (code == 0x1f9) ||
+ (code == 0x1fb) ||
+ (code == 0x1fd) ||
+ (code == 0x1ff) ||
+ (code == 0x201) ||
+ (code == 0x203) ||
+ (code == 0x205) ||
+ (code == 0x207) ||
+ (code == 0x209) ||
+ (code == 0x20b) ||
+ (code == 0x20d) ||
+ (code == 0x20f) ||
+ (code == 0x211) ||
+ (code == 0x213) ||
+ (code == 0x215) ||
+ (code == 0x217) ||
+ (code == 0x219) ||
+ (code == 0x21b) ||
+ (code == 0x21d) ||
+ (code == 0x21f) ||
+ (code == 0x223) ||
+ (code == 0x225) ||
+ (code == 0x227) ||
+ (code == 0x229) ||
+ (code == 0x22b) ||
+ (code == 0x22d) ||
+ (code == 0x22f) ||
+ (code == 0x231) ||
+ (code == 0x233) ||
+ ((code >= 0x250) && (code <= 0x2ad)) ||
+ (code == 0x390) ||
+ ((code >= 0x3ac) && (code <= 0x3ce)) ||
+ ((code >= 0x3d0) && (code <= 0x3d1)) ||
+ ((code >= 0x3d5) && (code <= 0x3d7)) ||
+ (code == 0x3db) ||
+ (code == 0x3dd) ||
+ (code == 0x3df) ||
+ (code == 0x3e1) ||
+ (code == 0x3e3) ||
+ (code == 0x3e5) ||
+ (code == 0x3e7) ||
+ (code == 0x3e9) ||
+ (code == 0x3eb) ||
+ (code == 0x3ed) ||
+ ((code >= 0x3ef) && (code <= 0x3f3)) ||
+ (code == 0x3f5) ||
+ ((code >= 0x430) && (code <= 0x45f)) ||
+ (code == 0x461) ||
+ (code == 0x463) ||
+ (code == 0x465) ||
+ (code == 0x467) ||
+ (code == 0x469) ||
+ (code == 0x46b) ||
+ (code == 0x46d) ||
+ (code == 0x46f) ||
+ (code == 0x471) ||
+ (code == 0x473) ||
+ (code == 0x475) ||
+ (code == 0x477) ||
+ (code == 0x479) ||
+ (code == 0x47b) ||
+ (code == 0x47d) ||
+ (code == 0x47f) ||
+ (code == 0x481) ||
+ (code == 0x48d) ||
+ (code == 0x48f) ||
+ (code == 0x491) ||
+ (code == 0x493) ||
+ (code == 0x495) ||
+ (code == 0x497) ||
+ (code == 0x499) ||
+ (code == 0x49b) ||
+ (code == 0x49d) ||
+ (code == 0x49f) ||
+ (code == 0x4a1) ||
+ (code == 0x4a3) ||
+ (code == 0x4a5) ||
+ (code == 0x4a7) ||
+ (code == 0x4a9) ||
+ (code == 0x4ab) ||
+ (code == 0x4ad) ||
+ (code == 0x4af) ||
+ (code == 0x4b1) ||
+ (code == 0x4b3) ||
+ (code == 0x4b5) ||
+ (code == 0x4b7) ||
+ (code == 0x4b9) ||
+ (code == 0x4bb) ||
+ (code == 0x4bd) ||
+ (code == 0x4bf) ||
+ (code == 0x4c2) ||
+ (code == 0x4c4) ||
+ (code == 0x4c8) ||
+ (code == 0x4cc) ||
+ (code == 0x4d1) ||
+ (code == 0x4d3) ||
+ (code == 0x4d5) ||
+ (code == 0x4d7) ||
+ (code == 0x4d9) ||
+ (code == 0x4db) ||
+ (code == 0x4dd) ||
+ (code == 0x4df) ||
+ (code == 0x4e1) ||
+ (code == 0x4e3) ||
+ (code == 0x4e5) ||
+ (code == 0x4e7) ||
+ (code == 0x4e9) ||
+ (code == 0x4eb) ||
+ (code == 0x4ed) ||
+ (code == 0x4ef) ||
+ (code == 0x4f1) ||
+ (code == 0x4f3) ||
+ (code == 0x4f5) ||
+ (code == 0x4f9) ||
+ ((code >= 0x561) && (code <= 0x587)) ||
+ (code == 0x1e01) ||
+ (code == 0x1e03) ||
+ (code == 0x1e05) ||
+ (code == 0x1e07) ||
+ (code == 0x1e09) ||
+ (code == 0x1e0b) ||
+ (code == 0x1e0d) ||
+ (code == 0x1e0f) ||
+ (code == 0x1e11) ||
+ (code == 0x1e13) ||
+ (code == 0x1e15) ||
+ (code == 0x1e17) ||
+ (code == 0x1e19) ||
+ (code == 0x1e1b) ||
+ (code == 0x1e1d) ||
+ (code == 0x1e1f) ||
+ (code == 0x1e21) ||
+ (code == 0x1e23) ||
+ (code == 0x1e25) ||
+ (code == 0x1e27) ||
+ (code == 0x1e29) ||
+ (code == 0x1e2b) ||
+ (code == 0x1e2d) ||
+ (code == 0x1e2f) ||
+ (code == 0x1e31) ||
+ (code == 0x1e33) ||
+ (code == 0x1e35) ||
+ (code == 0x1e37) ||
+ (code == 0x1e39) ||
+ (code == 0x1e3b) ||
+ (code == 0x1e3d) ||
+ (code == 0x1e3f) ||
+ (code == 0x1e41) ||
+ (code == 0x1e43) ||
+ (code == 0x1e45) ||
+ (code == 0x1e47) ||
+ (code == 0x1e49) ||
+ (code == 0x1e4b) ||
+ (code == 0x1e4d) ||
+ (code == 0x1e4f) ||
+ (code == 0x1e51) ||
+ (code == 0x1e53) ||
+ (code == 0x1e55) ||
+ (code == 0x1e57) ||
+ (code == 0x1e59) ||
+ (code == 0x1e5b) ||
+ (code == 0x1e5d) ||
+ (code == 0x1e5f) ||
+ (code == 0x1e61) ||
+ (code == 0x1e63) ||
+ (code == 0x1e65) ||
+ (code == 0x1e67) ||
+ (code == 0x1e69) ||
+ (code == 0x1e6b) ||
+ (code == 0x1e6d) ||
+ (code == 0x1e6f) ||
+ (code == 0x1e71) ||
+ (code == 0x1e73) ||
+ (code == 0x1e75) ||
+ (code == 0x1e77) ||
+ (code == 0x1e79) ||
+ (code == 0x1e7b) ||
+ (code == 0x1e7d) ||
+ (code == 0x1e7f) ||
+ (code == 0x1e81) ||
+ (code == 0x1e83) ||
+ (code == 0x1e85) ||
+ (code == 0x1e87) ||
+ (code == 0x1e89) ||
+ (code == 0x1e8b) ||
+ (code == 0x1e8d) ||
+ (code == 0x1e8f) ||
+ (code == 0x1e91) ||
+ (code == 0x1e93) ||
+ ((code >= 0x1e95) && (code <= 0x1e9b)) ||
+ (code == 0x1ea1) ||
+ (code == 0x1ea3) ||
+ (code == 0x1ea5) ||
+ (code == 0x1ea7) ||
+ (code == 0x1ea9) ||
+ (code == 0x1eab) ||
+ (code == 0x1ead) ||
+ (code == 0x1eaf) ||
+ (code == 0x1eb1) ||
+ (code == 0x1eb3) ||
+ (code == 0x1eb5) ||
+ (code == 0x1eb7) ||
+ (code == 0x1eb9) ||
+ (code == 0x1ebb) ||
+ (code == 0x1ebd) ||
+ (code == 0x1ebf) ||
+ (code == 0x1ec1) ||
+ (code == 0x1ec3) ||
+ (code == 0x1ec5) ||
+ (code == 0x1ec7) ||
+ (code == 0x1ec9) ||
+ (code == 0x1ecb) ||
+ (code == 0x1ecd) ||
+ (code == 0x1ecf) ||
+ (code == 0x1ed1) ||
+ (code == 0x1ed3) ||
+ (code == 0x1ed5) ||
+ (code == 0x1ed7) ||
+ (code == 0x1ed9) ||
+ (code == 0x1edb) ||
+ (code == 0x1edd) ||
+ (code == 0x1edf) ||
+ (code == 0x1ee1) ||
+ (code == 0x1ee3) ||
+ (code == 0x1ee5) ||
+ (code == 0x1ee7) ||
+ (code == 0x1ee9) ||
+ (code == 0x1eeb) ||
+ (code == 0x1eed) ||
+ (code == 0x1eef) ||
+ (code == 0x1ef1) ||
+ (code == 0x1ef3) ||
+ (code == 0x1ef5) ||
+ (code == 0x1ef7) ||
+ (code == 0x1ef9) ||
+ ((code >= 0x1f00) && (code <= 0x1f07)) ||
+ ((code >= 0x1f10) && (code <= 0x1f15)) ||
+ ((code >= 0x1f20) && (code <= 0x1f27)) ||
+ ((code >= 0x1f30) && (code <= 0x1f37)) ||
+ ((code >= 0x1f40) && (code <= 0x1f45)) ||
+ ((code >= 0x1f50) && (code <= 0x1f57)) ||
+ ((code >= 0x1f60) && (code <= 0x1f67)) ||
+ ((code >= 0x1f70) && (code <= 0x1f7d)) ||
+ ((code >= 0x1f80) && (code <= 0x1f87)) ||
+ ((code >= 0x1f90) && (code <= 0x1f97)) ||
+ ((code >= 0x1fa0) && (code <= 0x1fa7)) ||
+ ((code >= 0x1fb0) && (code <= 0x1fb4)) ||
+ ((code >= 0x1fb6) && (code <= 0x1fb7)) ||
+ (code == 0x1fbe) ||
+ ((code >= 0x1fc2) && (code <= 0x1fc4)) ||
+ ((code >= 0x1fc6) && (code <= 0x1fc7)) ||
+ ((code >= 0x1fd0) && (code <= 0x1fd3)) ||
+ ((code >= 0x1fd6) && (code <= 0x1fd7)) ||
+ ((code >= 0x1fe0) && (code <= 0x1fe7)) ||
+ ((code >= 0x1ff2) && (code <= 0x1ff4)) ||
+ ((code >= 0x1ff6) && (code <= 0x1ff7)) ||
+ (code == 0x207f) ||
+ (code == 0x210a) ||
+ ((code >= 0x210e) && (code <= 0x210f)) ||
+ (code == 0x2113) ||
+ (code == 0x212f) ||
+ (code == 0x2134) ||
+ (code == 0x2139) ||
+ ((code >= 0xfb00) && (code <= 0xfb06)) ||
+ ((code >= 0xfb13) && (code <= 0xfb17)) ||
+ ((code >= 0xff41) && (code <= 0xff5a)) ||
+ ((code >= 0x10428) && (code <= 0x1044d)) ||
+ ((code >= 0x1d41a) && (code <= 0x1d433)) ||
+ ((code >= 0x1d44e) && (code <= 0x1d454)) ||
+ ((code >= 0x1d456) && (code <= 0x1d467)) ||
+ ((code >= 0x1d482) && (code <= 0x1d49b)) ||
+ ((code >= 0x1d4b6) && (code <= 0x1d4b9)) ||
+ (code == 0x1d4bb) ||
+ ((code >= 0x1d4bd) && (code <= 0x1d4c0)) ||
+ ((code >= 0x1d4c2) && (code <= 0x1d4c3)) ||
+ ((code >= 0x1d4c5) && (code <= 0x1d4cf)) ||
+ ((code >= 0x1d4ea) && (code <= 0x1d503)) ||
+ ((code >= 0x1d51e) && (code <= 0x1d537)) ||
+ ((code >= 0x1d552) && (code <= 0x1d56b)) ||
+ ((code >= 0x1d586) && (code <= 0x1d59f)) ||
+ ((code >= 0x1d5ba) && (code <= 0x1d5d3)) ||
+ ((code >= 0x1d5ee) && (code <= 0x1d607)) ||
+ ((code >= 0x1d622) && (code <= 0x1d63b)) ||
+ ((code >= 0x1d656) && (code <= 0x1d66f)) ||
+ ((code >= 0x1d68a) && (code <= 0x1d6a3)) ||
+ ((code >= 0x1d6c2) && (code <= 0x1d6da)) ||
+ ((code >= 0x1d6dc) && (code <= 0x1d6e1)) ||
+ ((code >= 0x1d6fc) && (code <= 0x1d714)) ||
+ ((code >= 0x1d716) && (code <= 0x1d71b)) ||
+ ((code >= 0x1d736) && (code <= 0x1d74e)) ||
+ ((code >= 0x1d750) && (code <= 0x1d755)) ||
+ ((code >= 0x1d770) && (code <= 0x1d788)) ||
+ ((code >= 0x1d78a) && (code <= 0x1d78f)) ||
+ ((code >= 0x1d7aa) && (code <= 0x1d7c2)) ||
+ ((code >= 0x1d7c4) && (code <= 0x1d7c9)));
+}
+
+/**
+ * xmlUCSIsCatLm:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lm UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLm(int code) {
+ return(((code >= 0x2b0) && (code <= 0x2b8)) ||
+ ((code >= 0x2bb) && (code <= 0x2c1)) ||
+ ((code >= 0x2d0) && (code <= 0x2d1)) ||
+ ((code >= 0x2e0) && (code <= 0x2e4)) ||
+ (code == 0x2ee) ||
+ (code == 0x37a) ||
+ (code == 0x559) ||
+ (code == 0x640) ||
+ ((code >= 0x6e5) && (code <= 0x6e6)) ||
+ (code == 0xe46) ||
+ (code == 0xec6) ||
+ (code == 0x1843) ||
+ (code == 0x3005) ||
+ ((code >= 0x3031) && (code <= 0x3035)) ||
+ ((code >= 0x309d) && (code <= 0x309e)) ||
+ ((code >= 0x30fc) && (code <= 0x30fe)) ||
+ (code == 0xff70) ||
+ ((code >= 0xff9e) && (code <= 0xff9f)));
+}
+
+/**
+ * xmlUCSIsCatLo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lo UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLo(int code) {
+ return((code == 0x1bb) ||
+ ((code >= 0x1c0) && (code <= 0x1c3)) ||
+ ((code >= 0x5d0) && (code <= 0x5ea)) ||
+ ((code >= 0x5f0) && (code <= 0x5f2)) ||
+ ((code >= 0x621) && (code <= 0x63a)) ||
+ ((code >= 0x641) && (code <= 0x64a)) ||
+ ((code >= 0x671) && (code <= 0x6d3)) ||
+ (code == 0x6d5) ||
+ ((code >= 0x6fa) && (code <= 0x6fc)) ||
+ (code == 0x710) ||
+ ((code >= 0x712) && (code <= 0x72c)) ||
+ ((code >= 0x780) && (code <= 0x7a5)) ||
+ ((code >= 0x905) && (code <= 0x939)) ||
+ (code == 0x93d) ||
+ (code == 0x950) ||
+ ((code >= 0x958) && (code <= 0x961)) ||
+ ((code >= 0x985) && (code <= 0x98c)) ||
+ ((code >= 0x98f) && (code <= 0x990)) ||
+ ((code >= 0x993) && (code <= 0x9a8)) ||
+ ((code >= 0x9aa) && (code <= 0x9b0)) ||
+ (code == 0x9b2) ||
+ ((code >= 0x9b6) && (code <= 0x9b9)) ||
+ ((code >= 0x9dc) && (code <= 0x9dd)) ||
+ ((code >= 0x9df) && (code <= 0x9e1)) ||
+ ((code >= 0x9f0) && (code <= 0x9f1)) ||
+ ((code >= 0xa05) && (code <= 0xa0a)) ||
+ ((code >= 0xa0f) && (code <= 0xa10)) ||
+ ((code >= 0xa13) && (code <= 0xa28)) ||
+ ((code >= 0xa2a) && (code <= 0xa30)) ||
+ ((code >= 0xa32) && (code <= 0xa33)) ||
+ ((code >= 0xa35) && (code <= 0xa36)) ||
+ ((code >= 0xa38) && (code <= 0xa39)) ||
+ ((code >= 0xa59) && (code <= 0xa5c)) ||
+ (code == 0xa5e) ||
+ ((code >= 0xa72) && (code <= 0xa74)) ||
+ ((code >= 0xa85) && (code <= 0xa8b)) ||
+ (code == 0xa8d) ||
+ ((code >= 0xa8f) && (code <= 0xa91)) ||
+ ((code >= 0xa93) && (code <= 0xaa8)) ||
+ ((code >= 0xaaa) && (code <= 0xab0)) ||
+ ((code >= 0xab2) && (code <= 0xab3)) ||
+ ((code >= 0xab5) && (code <= 0xab9)) ||
+ (code == 0xabd) ||
+ (code == 0xad0) ||
+ (code == 0xae0) ||
+ ((code >= 0xb05) && (code <= 0xb0c)) ||
+ ((code >= 0xb0f) && (code <= 0xb10)) ||
+ ((code >= 0xb13) && (code <= 0xb28)) ||
+ ((code >= 0xb2a) && (code <= 0xb30)) ||
+ ((code >= 0xb32) && (code <= 0xb33)) ||
+ ((code >= 0xb36) && (code <= 0xb39)) ||
+ (code == 0xb3d) ||
+ ((code >= 0xb5c) && (code <= 0xb5d)) ||
+ ((code >= 0xb5f) && (code <= 0xb61)) ||
+ ((code >= 0xb85) && (code <= 0xb8a)) ||
+ ((code >= 0xb8e) && (code <= 0xb90)) ||
+ ((code >= 0xb92) && (code <= 0xb95)) ||
+ ((code >= 0xb99) && (code <= 0xb9a)) ||
+ (code == 0xb9c) ||
+ ((code >= 0xb9e) && (code <= 0xb9f)) ||
+ ((code >= 0xba3) && (code <= 0xba4)) ||
+ ((code >= 0xba8) && (code <= 0xbaa)) ||
+ ((code >= 0xbae) && (code <= 0xbb5)) ||
+ ((code >= 0xbb7) && (code <= 0xbb9)) ||
+ ((code >= 0xc05) && (code <= 0xc0c)) ||
+ ((code >= 0xc0e) && (code <= 0xc10)) ||
+ ((code >= 0xc12) && (code <= 0xc28)) ||
+ ((code >= 0xc2a) && (code <= 0xc33)) ||
+ ((code >= 0xc35) && (code <= 0xc39)) ||
+ ((code >= 0xc60) && (code <= 0xc61)) ||
+ ((code >= 0xc85) && (code <= 0xc8c)) ||
+ ((code >= 0xc8e) && (code <= 0xc90)) ||
+ ((code >= 0xc92) && (code <= 0xca8)) ||
+ ((code >= 0xcaa) && (code <= 0xcb3)) ||
+ ((code >= 0xcb5) && (code <= 0xcb9)) ||
+ (code == 0xcde) ||
+ ((code >= 0xce0) && (code <= 0xce1)) ||
+ ((code >= 0xd05) && (code <= 0xd0c)) ||
+ ((code >= 0xd0e) && (code <= 0xd10)) ||
+ ((code >= 0xd12) && (code <= 0xd28)) ||
+ ((code >= 0xd2a) && (code <= 0xd39)) ||
+ ((code >= 0xd60) && (code <= 0xd61)) ||
+ ((code >= 0xd85) && (code <= 0xd96)) ||
+ ((code >= 0xd9a) && (code <= 0xdb1)) ||
+ ((code >= 0xdb3) && (code <= 0xdbb)) ||
+ (code == 0xdbd) ||
+ ((code >= 0xdc0) && (code <= 0xdc6)) ||
+ ((code >= 0xe01) && (code <= 0xe30)) ||
+ ((code >= 0xe32) && (code <= 0xe33)) ||
+ ((code >= 0xe40) && (code <= 0xe45)) ||
+ ((code >= 0xe81) && (code <= 0xe82)) ||
+ (code == 0xe84) ||
+ ((code >= 0xe87) && (code <= 0xe88)) ||
+ (code == 0xe8a) ||
+ (code == 0xe8d) ||
+ ((code >= 0xe94) && (code <= 0xe97)) ||
+ ((code >= 0xe99) && (code <= 0xe9f)) ||
+ ((code >= 0xea1) && (code <= 0xea3)) ||
+ (code == 0xea5) ||
+ (code == 0xea7) ||
+ ((code >= 0xeaa) && (code <= 0xeab)) ||
+ ((code >= 0xead) && (code <= 0xeb0)) ||
+ ((code >= 0xeb2) && (code <= 0xeb3)) ||
+ (code == 0xebd) ||
+ ((code >= 0xec0) && (code <= 0xec4)) ||
+ ((code >= 0xedc) && (code <= 0xedd)) ||
+ (code == 0xf00) ||
+ ((code >= 0xf40) && (code <= 0xf47)) ||
+ ((code >= 0xf49) && (code <= 0xf6a)) ||
+ ((code >= 0xf88) && (code <= 0xf8b)) ||
+ ((code >= 0x1000) && (code <= 0x1021)) ||
+ ((code >= 0x1023) && (code <= 0x1027)) ||
+ ((code >= 0x1029) && (code <= 0x102a)) ||
+ ((code >= 0x1050) && (code <= 0x1055)) ||
+ ((code >= 0x10d0) && (code <= 0x10f6)) ||
+ ((code >= 0x1100) && (code <= 0x1159)) ||
+ ((code >= 0x115f) && (code <= 0x11a2)) ||
+ ((code >= 0x11a8) && (code <= 0x11f9)) ||
+ ((code >= 0x1200) && (code <= 0x1206)) ||
+ ((code >= 0x1208) && (code <= 0x1246)) ||
+ (code == 0x1248) ||
+ ((code >= 0x124a) && (code <= 0x124d)) ||
+ ((code >= 0x1250) && (code <= 0x1256)) ||
+ (code == 0x1258) ||
+ ((code >= 0x125a) && (code <= 0x125d)) ||
+ ((code >= 0x1260) && (code <= 0x1286)) ||
+ (code == 0x1288) ||
+ ((code >= 0x128a) && (code <= 0x128d)) ||
+ ((code >= 0x1290) && (code <= 0x12ae)) ||
+ (code == 0x12b0) ||
+ ((code >= 0x12b2) && (code <= 0x12b5)) ||
+ ((code >= 0x12b8) && (code <= 0x12be)) ||
+ (code == 0x12c0) ||
+ ((code >= 0x12c2) && (code <= 0x12c5)) ||
+ ((code >= 0x12c8) && (code <= 0x12ce)) ||
+ ((code >= 0x12d0) && (code <= 0x12d6)) ||
+ ((code >= 0x12d8) && (code <= 0x12ee)) ||
+ ((code >= 0x12f0) && (code <= 0x130e)) ||
+ (code == 0x1310) ||
+ ((code >= 0x1312) && (code <= 0x1315)) ||
+ ((code >= 0x1318) && (code <= 0x131e)) ||
+ ((code >= 0x1320) && (code <= 0x1346)) ||
+ ((code >= 0x1348) && (code <= 0x135a)) ||
+ ((code >= 0x13a0) && (code <= 0x13f4)) ||
+ ((code >= 0x1401) && (code <= 0x166c)) ||
+ ((code >= 0x166f) && (code <= 0x1676)) ||
+ ((code >= 0x1681) && (code <= 0x169a)) ||
+ ((code >= 0x16a0) && (code <= 0x16ea)) ||
+ ((code >= 0x1780) && (code <= 0x17b3)) ||
+ ((code >= 0x1820) && (code <= 0x1842)) ||
+ ((code >= 0x1844) && (code <= 0x1877)) ||
+ ((code >= 0x1880) && (code <= 0x18a8)) ||
+ ((code >= 0x2135) && (code <= 0x2138)) ||
+ (code == 0x3006) ||
+ ((code >= 0x3041) && (code <= 0x3094)) ||
+ ((code >= 0x30a1) && (code <= 0x30fa)) ||
+ ((code >= 0x3105) && (code <= 0x312c)) ||
+ ((code >= 0x3131) && (code <= 0x318e)) ||
+ ((code >= 0x31a0) && (code <= 0x31b7)) ||
+ (code == 0x3400) ||
+ (code == 0x4db5) ||
+ (code == 0x4e00) ||
+ (code == 0x9fa5) ||
+ ((code >= 0xa000) && (code <= 0xa48c)) ||
+ (code == 0xac00) ||
+ (code == 0xd7a3) ||
+ ((code >= 0xf900) && (code <= 0xfa2d)) ||
+ (code == 0xfb1d) ||
+ ((code >= 0xfb1f) && (code <= 0xfb28)) ||
+ ((code >= 0xfb2a) && (code <= 0xfb36)) ||
+ ((code >= 0xfb38) && (code <= 0xfb3c)) ||
+ (code == 0xfb3e) ||
+ ((code >= 0xfb40) && (code <= 0xfb41)) ||
+ ((code >= 0xfb43) && (code <= 0xfb44)) ||
+ ((code >= 0xfb46) && (code <= 0xfbb1)) ||
+ ((code >= 0xfbd3) && (code <= 0xfd3d)) ||
+ ((code >= 0xfd50) && (code <= 0xfd8f)) ||
+ ((code >= 0xfd92) && (code <= 0xfdc7)) ||
+ ((code >= 0xfdf0) && (code <= 0xfdfb)) ||
+ ((code >= 0xfe70) && (code <= 0xfe72)) ||
+ (code == 0xfe74) ||
+ ((code >= 0xfe76) && (code <= 0xfefc)) ||
+ ((code >= 0xff66) && (code <= 0xff6f)) ||
+ ((code >= 0xff71) && (code <= 0xff9d)) ||
+ ((code >= 0xffa0) && (code <= 0xffbe)) ||
+ ((code >= 0xffc2) && (code <= 0xffc7)) ||
+ ((code >= 0xffca) && (code <= 0xffcf)) ||
+ ((code >= 0xffd2) && (code <= 0xffd7)) ||
+ ((code >= 0xffda) && (code <= 0xffdc)) ||
+ ((code >= 0x10300) && (code <= 0x1031e)) ||
+ ((code >= 0x10330) && (code <= 0x10349)) ||
+ (code == 0x20000) ||
+ (code == 0x2a6d6) ||
+ ((code >= 0x2f800) && (code <= 0x2fa1d)));
+}
+
+/**
+ * xmlUCSIsCatLt:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lt UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLt(int code) {
+ return((code == 0x1c5) ||
+ (code == 0x1c8) ||
+ (code == 0x1cb) ||
+ (code == 0x1f2) ||
+ ((code >= 0x1f88) && (code <= 0x1f8f)) ||
+ ((code >= 0x1f98) && (code <= 0x1f9f)) ||
+ ((code >= 0x1fa8) && (code <= 0x1faf)) ||
+ (code == 0x1fbc) ||
+ (code == 0x1fcc) ||
+ (code == 0x1ffc));
+}
+
+/**
+ * xmlUCSIsCatLu:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Lu UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatLu(int code) {
+ return(((code >= 0x41) && (code <= 0x5a)) ||
+ ((code >= 0xc0) && (code <= 0xd6)) ||
+ ((code >= 0xd8) && (code <= 0xde)) ||
+ (code == 0x100) ||
+ (code == 0x102) ||
+ (code == 0x104) ||
+ (code == 0x106) ||
+ (code == 0x108) ||
+ (code == 0x10a) ||
+ (code == 0x10c) ||
+ (code == 0x10e) ||
+ (code == 0x110) ||
+ (code == 0x112) ||
+ (code == 0x114) ||
+ (code == 0x116) ||
+ (code == 0x118) ||
+ (code == 0x11a) ||
+ (code == 0x11c) ||
+ (code == 0x11e) ||
+ (code == 0x120) ||
+ (code == 0x122) ||
+ (code == 0x124) ||
+ (code == 0x126) ||
+ (code == 0x128) ||
+ (code == 0x12a) ||
+ (code == 0x12c) ||
+ (code == 0x12e) ||
+ (code == 0x130) ||
+ (code == 0x132) ||
+ (code == 0x134) ||
+ (code == 0x136) ||
+ (code == 0x139) ||
+ (code == 0x13b) ||
+ (code == 0x13d) ||
+ (code == 0x13f) ||
+ (code == 0x141) ||
+ (code == 0x143) ||
+ (code == 0x145) ||
+ (code == 0x147) ||
+ (code == 0x14a) ||
+ (code == 0x14c) ||
+ (code == 0x14e) ||
+ (code == 0x150) ||
+ (code == 0x152) ||
+ (code == 0x154) ||
+ (code == 0x156) ||
+ (code == 0x158) ||
+ (code == 0x15a) ||
+ (code == 0x15c) ||
+ (code == 0x15e) ||
+ (code == 0x160) ||
+ (code == 0x162) ||
+ (code == 0x164) ||
+ (code == 0x166) ||
+ (code == 0x168) ||
+ (code == 0x16a) ||
+ (code == 0x16c) ||
+ (code == 0x16e) ||
+ (code == 0x170) ||
+ (code == 0x172) ||
+ (code == 0x174) ||
+ (code == 0x176) ||
+ ((code >= 0x178) && (code <= 0x179)) ||
+ (code == 0x17b) ||
+ (code == 0x17d) ||
+ ((code >= 0x181) && (code <= 0x182)) ||
+ (code == 0x184) ||
+ ((code >= 0x186) && (code <= 0x187)) ||
+ ((code >= 0x189) && (code <= 0x18b)) ||
+ ((code >= 0x18e) && (code <= 0x191)) ||
+ ((code >= 0x193) && (code <= 0x194)) ||
+ ((code >= 0x196) && (code <= 0x198)) ||
+ ((code >= 0x19c) && (code <= 0x19d)) ||
+ ((code >= 0x19f) && (code <= 0x1a0)) ||
+ (code == 0x1a2) ||
+ (code == 0x1a4) ||
+ ((code >= 0x1a6) && (code <= 0x1a7)) ||
+ (code == 0x1a9) ||
+ (code == 0x1ac) ||
+ ((code >= 0x1ae) && (code <= 0x1af)) ||
+ ((code >= 0x1b1) && (code <= 0x1b3)) ||
+ (code == 0x1b5) ||
+ ((code >= 0x1b7) && (code <= 0x1b8)) ||
+ (code == 0x1bc) ||
+ (code == 0x1c4) ||
+ (code == 0x1c7) ||
+ (code == 0x1ca) ||
+ (code == 0x1cd) ||
+ (code == 0x1cf) ||
+ (code == 0x1d1) ||
+ (code == 0x1d3) ||
+ (code == 0x1d5) ||
+ (code == 0x1d7) ||
+ (code == 0x1d9) ||
+ (code == 0x1db) ||
+ (code == 0x1de) ||
+ (code == 0x1e0) ||
+ (code == 0x1e2) ||
+ (code == 0x1e4) ||
+ (code == 0x1e6) ||
+ (code == 0x1e8) ||
+ (code == 0x1ea) ||
+ (code == 0x1ec) ||
+ (code == 0x1ee) ||
+ (code == 0x1f1) ||
+ (code == 0x1f4) ||
+ ((code >= 0x1f6) && (code <= 0x1f8)) ||
+ (code == 0x1fa) ||
+ (code == 0x1fc) ||
+ (code == 0x1fe) ||
+ (code == 0x200) ||
+ (code == 0x202) ||
+ (code == 0x204) ||
+ (code == 0x206) ||
+ (code == 0x208) ||
+ (code == 0x20a) ||
+ (code == 0x20c) ||
+ (code == 0x20e) ||
+ (code == 0x210) ||
+ (code == 0x212) ||
+ (code == 0x214) ||
+ (code == 0x216) ||
+ (code == 0x218) ||
+ (code == 0x21a) ||
+ (code == 0x21c) ||
+ (code == 0x21e) ||
+ (code == 0x222) ||
+ (code == 0x224) ||
+ (code == 0x226) ||
+ (code == 0x228) ||
+ (code == 0x22a) ||
+ (code == 0x22c) ||
+ (code == 0x22e) ||
+ (code == 0x230) ||
+ (code == 0x232) ||
+ (code == 0x386) ||
+ ((code >= 0x388) && (code <= 0x38a)) ||
+ (code == 0x38c) ||
+ ((code >= 0x38e) && (code <= 0x38f)) ||
+ ((code >= 0x391) && (code <= 0x3a1)) ||
+ ((code >= 0x3a3) && (code <= 0x3ab)) ||
+ ((code >= 0x3d2) && (code <= 0x3d4)) ||
+ (code == 0x3da) ||
+ (code == 0x3dc) ||
+ (code == 0x3de) ||
+ (code == 0x3e0) ||
+ (code == 0x3e2) ||
+ (code == 0x3e4) ||
+ (code == 0x3e6) ||
+ (code == 0x3e8) ||
+ (code == 0x3ea) ||
+ (code == 0x3ec) ||
+ (code == 0x3ee) ||
+ (code == 0x3f4) ||
+ ((code >= 0x400) && (code <= 0x42f)) ||
+ (code == 0x460) ||
+ (code == 0x462) ||
+ (code == 0x464) ||
+ (code == 0x466) ||
+ (code == 0x468) ||
+ (code == 0x46a) ||
+ (code == 0x46c) ||
+ (code == 0x46e) ||
+ (code == 0x470) ||
+ (code == 0x472) ||
+ (code == 0x474) ||
+ (code == 0x476) ||
+ (code == 0x478) ||
+ (code == 0x47a) ||
+ (code == 0x47c) ||
+ (code == 0x47e) ||
+ (code == 0x480) ||
+ (code == 0x48c) ||
+ (code == 0x48e) ||
+ (code == 0x490) ||
+ (code == 0x492) ||
+ (code == 0x494) ||
+ (code == 0x496) ||
+ (code == 0x498) ||
+ (code == 0x49a) ||
+ (code == 0x49c) ||
+ (code == 0x49e) ||
+ (code == 0x4a0) ||
+ (code == 0x4a2) ||
+ (code == 0x4a4) ||
+ (code == 0x4a6) ||
+ (code == 0x4a8) ||
+ (code == 0x4aa) ||
+ (code == 0x4ac) ||
+ (code == 0x4ae) ||
+ (code == 0x4b0) ||
+ (code == 0x4b2) ||
+ (code == 0x4b4) ||
+ (code == 0x4b6) ||
+ (code == 0x4b8) ||
+ (code == 0x4ba) ||
+ (code == 0x4bc) ||
+ (code == 0x4be) ||
+ ((code >= 0x4c0) && (code <= 0x4c1)) ||
+ (code == 0x4c3) ||
+ (code == 0x4c7) ||
+ (code == 0x4cb) ||
+ (code == 0x4d0) ||
+ (code == 0x4d2) ||
+ (code == 0x4d4) ||
+ (code == 0x4d6) ||
+ (code == 0x4d8) ||
+ (code == 0x4da) ||
+ (code == 0x4dc) ||
+ (code == 0x4de) ||
+ (code == 0x4e0) ||
+ (code == 0x4e2) ||
+ (code == 0x4e4) ||
+ (code == 0x4e6) ||
+ (code == 0x4e8) ||
+ (code == 0x4ea) ||
+ (code == 0x4ec) ||
+ (code == 0x4ee) ||
+ (code == 0x4f0) ||
+ (code == 0x4f2) ||
+ (code == 0x4f4) ||
+ (code == 0x4f8) ||
+ ((code >= 0x531) && (code <= 0x556)) ||
+ ((code >= 0x10a0) && (code <= 0x10c5)) ||
+ (code == 0x1e00) ||
+ (code == 0x1e02) ||
+ (code == 0x1e04) ||
+ (code == 0x1e06) ||
+ (code == 0x1e08) ||
+ (code == 0x1e0a) ||
+ (code == 0x1e0c) ||
+ (code == 0x1e0e) ||
+ (code == 0x1e10) ||
+ (code == 0x1e12) ||
+ (code == 0x1e14) ||
+ (code == 0x1e16) ||
+ (code == 0x1e18) ||
+ (code == 0x1e1a) ||
+ (code == 0x1e1c) ||
+ (code == 0x1e1e) ||
+ (code == 0x1e20) ||
+ (code == 0x1e22) ||
+ (code == 0x1e24) ||
+ (code == 0x1e26) ||
+ (code == 0x1e28) ||
+ (code == 0x1e2a) ||
+ (code == 0x1e2c) ||
+ (code == 0x1e2e) ||
+ (code == 0x1e30) ||
+ (code == 0x1e32) ||
+ (code == 0x1e34) ||
+ (code == 0x1e36) ||
+ (code == 0x1e38) ||
+ (code == 0x1e3a) ||
+ (code == 0x1e3c) ||
+ (code == 0x1e3e) ||
+ (code == 0x1e40) ||
+ (code == 0x1e42) ||
+ (code == 0x1e44) ||
+ (code == 0x1e46) ||
+ (code == 0x1e48) ||
+ (code == 0x1e4a) ||
+ (code == 0x1e4c) ||
+ (code == 0x1e4e) ||
+ (code == 0x1e50) ||
+ (code == 0x1e52) ||
+ (code == 0x1e54) ||
+ (code == 0x1e56) ||
+ (code == 0x1e58) ||
+ (code == 0x1e5a) ||
+ (code == 0x1e5c) ||
+ (code == 0x1e5e) ||
+ (code == 0x1e60) ||
+ (code == 0x1e62) ||
+ (code == 0x1e64) ||
+ (code == 0x1e66) ||
+ (code == 0x1e68) ||
+ (code == 0x1e6a) ||
+ (code == 0x1e6c) ||
+ (code == 0x1e6e) ||
+ (code == 0x1e70) ||
+ (code == 0x1e72) ||
+ (code == 0x1e74) ||
+ (code == 0x1e76) ||
+ (code == 0x1e78) ||
+ (code == 0x1e7a) ||
+ (code == 0x1e7c) ||
+ (code == 0x1e7e) ||
+ (code == 0x1e80) ||
+ (code == 0x1e82) ||
+ (code == 0x1e84) ||
+ (code == 0x1e86) ||
+ (code == 0x1e88) ||
+ (code == 0x1e8a) ||
+ (code == 0x1e8c) ||
+ (code == 0x1e8e) ||
+ (code == 0x1e90) ||
+ (code == 0x1e92) ||
+ (code == 0x1e94) ||
+ (code == 0x1ea0) ||
+ (code == 0x1ea2) ||
+ (code == 0x1ea4) ||
+ (code == 0x1ea6) ||
+ (code == 0x1ea8) ||
+ (code == 0x1eaa) ||
+ (code == 0x1eac) ||
+ (code == 0x1eae) ||
+ (code == 0x1eb0) ||
+ (code == 0x1eb2) ||
+ (code == 0x1eb4) ||
+ (code == 0x1eb6) ||
+ (code == 0x1eb8) ||
+ (code == 0x1eba) ||
+ (code == 0x1ebc) ||
+ (code == 0x1ebe) ||
+ (code == 0x1ec0) ||
+ (code == 0x1ec2) ||
+ (code == 0x1ec4) ||
+ (code == 0x1ec6) ||
+ (code == 0x1ec8) ||
+ (code == 0x1eca) ||
+ (code == 0x1ecc) ||
+ (code == 0x1ece) ||
+ (code == 0x1ed0) ||
+ (code == 0x1ed2) ||
+ (code == 0x1ed4) ||
+ (code == 0x1ed6) ||
+ (code == 0x1ed8) ||
+ (code == 0x1eda) ||
+ (code == 0x1edc) ||
+ (code == 0x1ede) ||
+ (code == 0x1ee0) ||
+ (code == 0x1ee2) ||
+ (code == 0x1ee4) ||
+ (code == 0x1ee6) ||
+ (code == 0x1ee8) ||
+ (code == 0x1eea) ||
+ (code == 0x1eec) ||
+ (code == 0x1eee) ||
+ (code == 0x1ef0) ||
+ (code == 0x1ef2) ||
+ (code == 0x1ef4) ||
+ (code == 0x1ef6) ||
+ (code == 0x1ef8) ||
+ ((code >= 0x1f08) && (code <= 0x1f0f)) ||
+ ((code >= 0x1f18) && (code <= 0x1f1d)) ||
+ ((code >= 0x1f28) && (code <= 0x1f2f)) ||
+ ((code >= 0x1f38) && (code <= 0x1f3f)) ||
+ ((code >= 0x1f48) && (code <= 0x1f4d)) ||
+ (code == 0x1f59) ||
+ (code == 0x1f5b) ||
+ (code == 0x1f5d) ||
+ (code == 0x1f5f) ||
+ ((code >= 0x1f68) && (code <= 0x1f6f)) ||
+ ((code >= 0x1fb8) && (code <= 0x1fbb)) ||
+ ((code >= 0x1fc8) && (code <= 0x1fcb)) ||
+ ((code >= 0x1fd8) && (code <= 0x1fdb)) ||
+ ((code >= 0x1fe8) && (code <= 0x1fec)) ||
+ ((code >= 0x1ff8) && (code <= 0x1ffb)) ||
+ (code == 0x2102) ||
+ (code == 0x2107) ||
+ ((code >= 0x210b) && (code <= 0x210d)) ||
+ ((code >= 0x2110) && (code <= 0x2112)) ||
+ (code == 0x2115) ||
+ ((code >= 0x2119) && (code <= 0x211d)) ||
+ (code == 0x2124) ||
+ (code == 0x2126) ||
+ (code == 0x2128) ||
+ ((code >= 0x212a) && (code <= 0x212d)) ||
+ ((code >= 0x2130) && (code <= 0x2131)) ||
+ (code == 0x2133) ||
+ ((code >= 0xff21) && (code <= 0xff3a)) ||
+ ((code >= 0x10400) && (code <= 0x10425)) ||
+ ((code >= 0x1d400) && (code <= 0x1d419)) ||
+ ((code >= 0x1d434) && (code <= 0x1d44d)) ||
+ ((code >= 0x1d468) && (code <= 0x1d481)) ||
+ (code == 0x1d49c) ||
+ ((code >= 0x1d49e) && (code <= 0x1d49f)) ||
+ (code == 0x1d4a2) ||
+ ((code >= 0x1d4a5) && (code <= 0x1d4a6)) ||
+ ((code >= 0x1d4a9) && (code <= 0x1d4ac)) ||
+ ((code >= 0x1d4ae) && (code <= 0x1d4b5)) ||
+ ((code >= 0x1d4d0) && (code <= 0x1d4e9)) ||
+ ((code >= 0x1d504) && (code <= 0x1d505)) ||
+ ((code >= 0x1d507) && (code <= 0x1d50a)) ||
+ ((code >= 0x1d50d) && (code <= 0x1d514)) ||
+ ((code >= 0x1d516) && (code <= 0x1d51c)) ||
+ ((code >= 0x1d538) && (code <= 0x1d539)) ||
+ ((code >= 0x1d53b) && (code <= 0x1d53e)) ||
+ ((code >= 0x1d540) && (code <= 0x1d544)) ||
+ (code == 0x1d546) ||
+ ((code >= 0x1d54a) && (code <= 0x1d550)) ||
+ ((code >= 0x1d56c) && (code <= 0x1d585)) ||
+ ((code >= 0x1d5a0) && (code <= 0x1d5b9)) ||
+ ((code >= 0x1d5d4) && (code <= 0x1d5ed)) ||
+ ((code >= 0x1d608) && (code <= 0x1d621)) ||
+ ((code >= 0x1d63c) && (code <= 0x1d655)) ||
+ ((code >= 0x1d670) && (code <= 0x1d689)) ||
+ ((code >= 0x1d6a8) && (code <= 0x1d6c0)) ||
+ ((code >= 0x1d6e2) && (code <= 0x1d6fa)) ||
+ ((code >= 0x1d71c) && (code <= 0x1d734)) ||
+ ((code >= 0x1d756) && (code <= 0x1d76e)) ||
+ ((code >= 0x1d790) && (code <= 0x1d7a8)));
+}
+
+/**
+ * xmlUCSIsCatM:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of M UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatM(int code) {
+ return(((code >= 0x300) && (code <= 0x34e)) ||
+ ((code >= 0x360) && (code <= 0x362)) ||
+ ((code >= 0x483) && (code <= 0x486)) ||
+ ((code >= 0x488) && (code <= 0x489)) ||
+ ((code >= 0x591) && (code <= 0x5a1)) ||
+ ((code >= 0x5a3) && (code <= 0x5b9)) ||
+ ((code >= 0x5bb) && (code <= 0x5bd)) ||
+ (code == 0x5bf) ||
+ ((code >= 0x5c1) && (code <= 0x5c2)) ||
+ (code == 0x5c4) ||
+ ((code >= 0x64b) && (code <= 0x655)) ||
+ (code == 0x670) ||
+ ((code >= 0x6d6) && (code <= 0x6e4)) ||
+ ((code >= 0x6e7) && (code <= 0x6e8)) ||
+ ((code >= 0x6ea) && (code <= 0x6ed)) ||
+ (code == 0x711) ||
+ ((code >= 0x730) && (code <= 0x74a)) ||
+ ((code >= 0x7a6) && (code <= 0x7b0)) ||
+ ((code >= 0x901) && (code <= 0x903)) ||
+ (code == 0x93c) ||
+ ((code >= 0x93e) && (code <= 0x94d)) ||
+ ((code >= 0x951) && (code <= 0x954)) ||
+ ((code >= 0x962) && (code <= 0x963)) ||
+ ((code >= 0x981) && (code <= 0x983)) ||
+ (code == 0x9bc) ||
+ ((code >= 0x9be) && (code <= 0x9c4)) ||
+ ((code >= 0x9c7) && (code <= 0x9c8)) ||
+ ((code >= 0x9cb) && (code <= 0x9cd)) ||
+ (code == 0x9d7) ||
+ ((code >= 0x9e2) && (code <= 0x9e3)) ||
+ (code == 0xa02) ||
+ (code == 0xa3c) ||
+ ((code >= 0xa3e) && (code <= 0xa42)) ||
+ ((code >= 0xa47) && (code <= 0xa48)) ||
+ ((code >= 0xa4b) && (code <= 0xa4d)) ||
+ ((code >= 0xa70) && (code <= 0xa71)) ||
+ ((code >= 0xa81) && (code <= 0xa83)) ||
+ (code == 0xabc) ||
+ ((code >= 0xabe) && (code <= 0xac5)) ||
+ ((code >= 0xac7) && (code <= 0xac9)) ||
+ ((code >= 0xacb) && (code <= 0xacd)) ||
+ ((code >= 0xb01) && (code <= 0xb03)) ||
+ (code == 0xb3c) ||
+ ((code >= 0xb3e) && (code <= 0xb43)) ||
+ ((code >= 0xb47) && (code <= 0xb48)) ||
+ ((code >= 0xb4b) && (code <= 0xb4d)) ||
+ ((code >= 0xb56) && (code <= 0xb57)) ||
+ ((code >= 0xb82) && (code <= 0xb83)) ||
+ ((code >= 0xbbe) && (code <= 0xbc2)) ||
+ ((code >= 0xbc6) && (code <= 0xbc8)) ||
+ ((code >= 0xbca) && (code <= 0xbcd)) ||
+ (code == 0xbd7) ||
+ ((code >= 0xc01) && (code <= 0xc03)) ||
+ ((code >= 0xc3e) && (code <= 0xc44)) ||
+ ((code >= 0xc46) && (code <= 0xc48)) ||
+ ((code >= 0xc4a) && (code <= 0xc4d)) ||
+ ((code >= 0xc55) && (code <= 0xc56)) ||
+ ((code >= 0xc82) && (code <= 0xc83)) ||
+ ((code >= 0xcbe) && (code <= 0xcc4)) ||
+ ((code >= 0xcc6) && (code <= 0xcc8)) ||
+ ((code >= 0xcca) && (code <= 0xccd)) ||
+ ((code >= 0xcd5) && (code <= 0xcd6)) ||
+ ((code >= 0xd02) && (code <= 0xd03)) ||
+ ((code >= 0xd3e) && (code <= 0xd43)) ||
+ ((code >= 0xd46) && (code <= 0xd48)) ||
+ ((code >= 0xd4a) && (code <= 0xd4d)) ||
+ (code == 0xd57) ||
+ ((code >= 0xd82) && (code <= 0xd83)) ||
+ (code == 0xdca) ||
+ ((code >= 0xdcf) && (code <= 0xdd4)) ||
+ (code == 0xdd6) ||
+ ((code >= 0xdd8) && (code <= 0xddf)) ||
+ ((code >= 0xdf2) && (code <= 0xdf3)) ||
+ (code == 0xe31) ||
+ ((code >= 0xe34) && (code <= 0xe3a)) ||
+ ((code >= 0xe47) && (code <= 0xe4e)) ||
+ (code == 0xeb1) ||
+ ((code >= 0xeb4) && (code <= 0xeb9)) ||
+ ((code >= 0xebb) && (code <= 0xebc)) ||
+ ((code >= 0xec8) && (code <= 0xecd)) ||
+ ((code >= 0xf18) && (code <= 0xf19)) ||
+ (code == 0xf35) ||
+ (code == 0xf37) ||
+ (code == 0xf39) ||
+ ((code >= 0xf3e) && (code <= 0xf3f)) ||
+ ((code >= 0xf71) && (code <= 0xf84)) ||
+ ((code >= 0xf86) && (code <= 0xf87)) ||
+ ((code >= 0xf90) && (code <= 0xf97)) ||
+ ((code >= 0xf99) && (code <= 0xfbc)) ||
+ (code == 0xfc6) ||
+ ((code >= 0x102c) && (code <= 0x1032)) ||
+ ((code >= 0x1036) && (code <= 0x1039)) ||
+ ((code >= 0x1056) && (code <= 0x1059)) ||
+ ((code >= 0x17b4) && (code <= 0x17d3)) ||
+ (code == 0x18a9) ||
+ ((code >= 0x20d0) && (code <= 0x20e3)) ||
+ ((code >= 0x302a) && (code <= 0x302f)) ||
+ ((code >= 0x3099) && (code <= 0x309a)) ||
+ (code == 0xfb1e) ||
+ ((code >= 0xfe20) && (code <= 0xfe23)) ||
+ ((code >= 0x1d165) && (code <= 0x1d169)) ||
+ ((code >= 0x1d16d) && (code <= 0x1d172)) ||
+ ((code >= 0x1d17b) && (code <= 0x1d182)) ||
+ ((code >= 0x1d185) && (code <= 0x1d18b)) ||
+ ((code >= 0x1d1aa) && (code <= 0x1d1ad)));
+}
+
+/**
+ * xmlUCSIsCatMc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Mc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatMc(int code) {
+ return((code == 0x903) ||
+ ((code >= 0x93e) && (code <= 0x940)) ||
+ ((code >= 0x949) && (code <= 0x94c)) ||
+ ((code >= 0x982) && (code <= 0x983)) ||
+ ((code >= 0x9be) && (code <= 0x9c0)) ||
+ ((code >= 0x9c7) && (code <= 0x9c8)) ||
+ ((code >= 0x9cb) && (code <= 0x9cc)) ||
+ (code == 0x9d7) ||
+ ((code >= 0xa3e) && (code <= 0xa40)) ||
+ (code == 0xa83) ||
+ ((code >= 0xabe) && (code <= 0xac0)) ||
+ (code == 0xac9) ||
+ ((code >= 0xacb) && (code <= 0xacc)) ||
+ ((code >= 0xb02) && (code <= 0xb03)) ||
+ (code == 0xb3e) ||
+ (code == 0xb40) ||
+ ((code >= 0xb47) && (code <= 0xb48)) ||
+ ((code >= 0xb4b) && (code <= 0xb4c)) ||
+ (code == 0xb57) ||
+ (code == 0xb83) ||
+ ((code >= 0xbbe) && (code <= 0xbbf)) ||
+ ((code >= 0xbc1) && (code <= 0xbc2)) ||
+ ((code >= 0xbc6) && (code <= 0xbc8)) ||
+ ((code >= 0xbca) && (code <= 0xbcc)) ||
+ (code == 0xbd7) ||
+ ((code >= 0xc01) && (code <= 0xc03)) ||
+ ((code >= 0xc41) && (code <= 0xc44)) ||
+ ((code >= 0xc82) && (code <= 0xc83)) ||
+ (code == 0xcbe) ||
+ ((code >= 0xcc0) && (code <= 0xcc4)) ||
+ ((code >= 0xcc7) && (code <= 0xcc8)) ||
+ ((code >= 0xcca) && (code <= 0xccb)) ||
+ ((code >= 0xcd5) && (code <= 0xcd6)) ||
+ ((code >= 0xd02) && (code <= 0xd03)) ||
+ ((code >= 0xd3e) && (code <= 0xd40)) ||
+ ((code >= 0xd46) && (code <= 0xd48)) ||
+ ((code >= 0xd4a) && (code <= 0xd4c)) ||
+ (code == 0xd57) ||
+ ((code >= 0xd82) && (code <= 0xd83)) ||
+ ((code >= 0xdcf) && (code <= 0xdd1)) ||
+ ((code >= 0xdd8) && (code <= 0xddf)) ||
+ ((code >= 0xdf2) && (code <= 0xdf3)) ||
+ ((code >= 0xf3e) && (code <= 0xf3f)) ||
+ (code == 0xf7f) ||
+ (code == 0x102c) ||
+ (code == 0x1031) ||
+ (code == 0x1038) ||
+ ((code >= 0x1056) && (code <= 0x1057)) ||
+ ((code >= 0x17b4) && (code <= 0x17b6)) ||
+ ((code >= 0x17be) && (code <= 0x17c5)) ||
+ ((code >= 0x17c7) && (code <= 0x17c8)) ||
+ ((code >= 0x1d165) && (code <= 0x1d166)) ||
+ ((code >= 0x1d16d) && (code <= 0x1d172)));
+}
+
+/**
+ * xmlUCSIsCatMe:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Me UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatMe(int code) {
+ return(((code >= 0x488) && (code <= 0x489)) ||
+ ((code >= 0x6dd) && (code <= 0x6de)) ||
+ ((code >= 0x20dd) && (code <= 0x20e0)) ||
+ ((code >= 0x20e2) && (code <= 0x20e3)));
+}
+
+/**
+ * xmlUCSIsCatMn:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Mn UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatMn(int code) {
+ return(((code >= 0x300) && (code <= 0x34e)) ||
+ ((code >= 0x360) && (code <= 0x362)) ||
+ ((code >= 0x483) && (code <= 0x486)) ||
+ ((code >= 0x591) && (code <= 0x5a1)) ||
+ ((code >= 0x5a3) && (code <= 0x5b9)) ||
+ ((code >= 0x5bb) && (code <= 0x5bd)) ||
+ (code == 0x5bf) ||
+ ((code >= 0x5c1) && (code <= 0x5c2)) ||
+ (code == 0x5c4) ||
+ ((code >= 0x64b) && (code <= 0x655)) ||
+ (code == 0x670) ||
+ ((code >= 0x6d6) && (code <= 0x6dc)) ||
+ ((code >= 0x6df) && (code <= 0x6e4)) ||
+ ((code >= 0x6e7) && (code <= 0x6e8)) ||
+ ((code >= 0x6ea) && (code <= 0x6ed)) ||
+ (code == 0x711) ||
+ ((code >= 0x730) && (code <= 0x74a)) ||
+ ((code >= 0x7a6) && (code <= 0x7b0)) ||
+ ((code >= 0x901) && (code <= 0x902)) ||
+ (code == 0x93c) ||
+ ((code >= 0x941) && (code <= 0x948)) ||
+ (code == 0x94d) ||
+ ((code >= 0x951) && (code <= 0x954)) ||
+ ((code >= 0x962) && (code <= 0x963)) ||
+ (code == 0x981) ||
+ (code == 0x9bc) ||
+ ((code >= 0x9c1) && (code <= 0x9c4)) ||
+ (code == 0x9cd) ||
+ ((code >= 0x9e2) && (code <= 0x9e3)) ||
+ (code == 0xa02) ||
+ (code == 0xa3c) ||
+ ((code >= 0xa41) && (code <= 0xa42)) ||
+ ((code >= 0xa47) && (code <= 0xa48)) ||
+ ((code >= 0xa4b) && (code <= 0xa4d)) ||
+ ((code >= 0xa70) && (code <= 0xa71)) ||
+ ((code >= 0xa81) && (code <= 0xa82)) ||
+ (code == 0xabc) ||
+ ((code >= 0xac1) && (code <= 0xac5)) ||
+ ((code >= 0xac7) && (code <= 0xac8)) ||
+ (code == 0xacd) ||
+ (code == 0xb01) ||
+ (code == 0xb3c) ||
+ (code == 0xb3f) ||
+ ((code >= 0xb41) && (code <= 0xb43)) ||
+ (code == 0xb4d) ||
+ (code == 0xb56) ||
+ (code == 0xb82) ||
+ (code == 0xbc0) ||
+ (code == 0xbcd) ||
+ ((code >= 0xc3e) && (code <= 0xc40)) ||
+ ((code >= 0xc46) && (code <= 0xc48)) ||
+ ((code >= 0xc4a) && (code <= 0xc4d)) ||
+ ((code >= 0xc55) && (code <= 0xc56)) ||
+ (code == 0xcbf) ||
+ (code == 0xcc6) ||
+ ((code >= 0xccc) && (code <= 0xccd)) ||
+ ((code >= 0xd41) && (code <= 0xd43)) ||
+ (code == 0xd4d) ||
+ (code == 0xdca) ||
+ ((code >= 0xdd2) && (code <= 0xdd4)) ||
+ (code == 0xdd6) ||
+ (code == 0xe31) ||
+ ((code >= 0xe34) && (code <= 0xe3a)) ||
+ ((code >= 0xe47) && (code <= 0xe4e)) ||
+ (code == 0xeb1) ||
+ ((code >= 0xeb4) && (code <= 0xeb9)) ||
+ ((code >= 0xebb) && (code <= 0xebc)) ||
+ ((code >= 0xec8) && (code <= 0xecd)) ||
+ ((code >= 0xf18) && (code <= 0xf19)) ||
+ (code == 0xf35) ||
+ (code == 0xf37) ||
+ (code == 0xf39) ||
+ ((code >= 0xf71) && (code <= 0xf7e)) ||
+ ((code >= 0xf80) && (code <= 0xf84)) ||
+ ((code >= 0xf86) && (code <= 0xf87)) ||
+ ((code >= 0xf90) && (code <= 0xf97)) ||
+ ((code >= 0xf99) && (code <= 0xfbc)) ||
+ (code == 0xfc6) ||
+ ((code >= 0x102d) && (code <= 0x1030)) ||
+ (code == 0x1032) ||
+ ((code >= 0x1036) && (code <= 0x1037)) ||
+ (code == 0x1039) ||
+ ((code >= 0x1058) && (code <= 0x1059)) ||
+ ((code >= 0x17b7) && (code <= 0x17bd)) ||
+ (code == 0x17c6) ||
+ ((code >= 0x17c9) && (code <= 0x17d3)) ||
+ (code == 0x18a9) ||
+ ((code >= 0x20d0) && (code <= 0x20dc)) ||
+ (code == 0x20e1) ||
+ ((code >= 0x302a) && (code <= 0x302f)) ||
+ ((code >= 0x3099) && (code <= 0x309a)) ||
+ (code == 0xfb1e) ||
+ ((code >= 0xfe20) && (code <= 0xfe23)) ||
+ ((code >= 0x1d167) && (code <= 0x1d169)) ||
+ ((code >= 0x1d17b) && (code <= 0x1d182)) ||
+ ((code >= 0x1d185) && (code <= 0x1d18b)) ||
+ ((code >= 0x1d1aa) && (code <= 0x1d1ad)));
+}
+
+/**
+ * xmlUCSIsCatN:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of N UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatN(int code) {
+ return(((code >= 0x30) && (code <= 0x39)) ||
+ ((code >= 0xb2) && (code <= 0xb3)) ||
+ (code == 0xb9) ||
+ ((code >= 0xbc) && (code <= 0xbe)) ||
+ ((code >= 0x660) && (code <= 0x669)) ||
+ ((code >= 0x6f0) && (code <= 0x6f9)) ||
+ ((code >= 0x966) && (code <= 0x96f)) ||
+ ((code >= 0x9e6) && (code <= 0x9ef)) ||
+ ((code >= 0x9f4) && (code <= 0x9f9)) ||
+ ((code >= 0xa66) && (code <= 0xa6f)) ||
+ ((code >= 0xae6) && (code <= 0xaef)) ||
+ ((code >= 0xb66) && (code <= 0xb6f)) ||
+ ((code >= 0xbe7) && (code <= 0xbf2)) ||
+ ((code >= 0xc66) && (code <= 0xc6f)) ||
+ ((code >= 0xce6) && (code <= 0xcef)) ||
+ ((code >= 0xd66) && (code <= 0xd6f)) ||
+ ((code >= 0xe50) && (code <= 0xe59)) ||
+ ((code >= 0xed0) && (code <= 0xed9)) ||
+ ((code >= 0xf20) && (code <= 0xf33)) ||
+ ((code >= 0x1040) && (code <= 0x1049)) ||
+ ((code >= 0x1369) && (code <= 0x137c)) ||
+ ((code >= 0x16ee) && (code <= 0x16f0)) ||
+ ((code >= 0x17e0) && (code <= 0x17e9)) ||
+ ((code >= 0x1810) && (code <= 0x1819)) ||
+ (code == 0x2070) ||
+ ((code >= 0x2074) && (code <= 0x2079)) ||
+ ((code >= 0x2080) && (code <= 0x2089)) ||
+ ((code >= 0x2153) && (code <= 0x2183)) ||
+ ((code >= 0x2460) && (code <= 0x249b)) ||
+ (code == 0x24ea) ||
+ ((code >= 0x2776) && (code <= 0x2793)) ||
+ (code == 0x3007) ||
+ ((code >= 0x3021) && (code <= 0x3029)) ||
+ ((code >= 0x3038) && (code <= 0x303a)) ||
+ ((code >= 0x3192) && (code <= 0x3195)) ||
+ ((code >= 0x3220) && (code <= 0x3229)) ||
+ ((code >= 0x3280) && (code <= 0x3289)) ||
+ ((code >= 0xff10) && (code <= 0xff19)) ||
+ ((code >= 0x10320) && (code <= 0x10323)) ||
+ (code == 0x1034a) ||
+ ((code >= 0x1d7ce) && (code <= 0x1d7ff)));
+}
+
+/**
+ * xmlUCSIsCatNd:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Nd UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatNd(int code) {
+ return(((code >= 0x30) && (code <= 0x39)) ||
+ ((code >= 0x660) && (code <= 0x669)) ||
+ ((code >= 0x6f0) && (code <= 0x6f9)) ||
+ ((code >= 0x966) && (code <= 0x96f)) ||
+ ((code >= 0x9e6) && (code <= 0x9ef)) ||
+ ((code >= 0xa66) && (code <= 0xa6f)) ||
+ ((code >= 0xae6) && (code <= 0xaef)) ||
+ ((code >= 0xb66) && (code <= 0xb6f)) ||
+ ((code >= 0xbe7) && (code <= 0xbef)) ||
+ ((code >= 0xc66) && (code <= 0xc6f)) ||
+ ((code >= 0xce6) && (code <= 0xcef)) ||
+ ((code >= 0xd66) && (code <= 0xd6f)) ||
+ ((code >= 0xe50) && (code <= 0xe59)) ||
+ ((code >= 0xed0) && (code <= 0xed9)) ||
+ ((code >= 0xf20) && (code <= 0xf29)) ||
+ ((code >= 0x1040) && (code <= 0x1049)) ||
+ ((code >= 0x1369) && (code <= 0x1371)) ||
+ ((code >= 0x17e0) && (code <= 0x17e9)) ||
+ ((code >= 0x1810) && (code <= 0x1819)) ||
+ ((code >= 0xff10) && (code <= 0xff19)) ||
+ ((code >= 0x1d7ce) && (code <= 0x1d7ff)));
+}
+
+/**
+ * xmlUCSIsCatNl:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Nl UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatNl(int code) {
+ return(((code >= 0x16ee) && (code <= 0x16f0)) ||
+ ((code >= 0x2160) && (code <= 0x2183)) ||
+ (code == 0x3007) ||
+ ((code >= 0x3021) && (code <= 0x3029)) ||
+ ((code >= 0x3038) && (code <= 0x303a)) ||
+ (code == 0x1034a));
+}
+
+/**
+ * xmlUCSIsCatNo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of No UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatNo(int code) {
+ return(((code >= 0xb2) && (code <= 0xb3)) ||
+ (code == 0xb9) ||
+ ((code >= 0xbc) && (code <= 0xbe)) ||
+ ((code >= 0x9f4) && (code <= 0x9f9)) ||
+ ((code >= 0xbf0) && (code <= 0xbf2)) ||
+ ((code >= 0xf2a) && (code <= 0xf33)) ||
+ ((code >= 0x1372) && (code <= 0x137c)) ||
+ (code == 0x2070) ||
+ ((code >= 0x2074) && (code <= 0x2079)) ||
+ ((code >= 0x2080) && (code <= 0x2089)) ||
+ ((code >= 0x2153) && (code <= 0x215f)) ||
+ ((code >= 0x2460) && (code <= 0x249b)) ||
+ (code == 0x24ea) ||
+ ((code >= 0x2776) && (code <= 0x2793)) ||
+ ((code >= 0x3192) && (code <= 0x3195)) ||
+ ((code >= 0x3220) && (code <= 0x3229)) ||
+ ((code >= 0x3280) && (code <= 0x3289)) ||
+ ((code >= 0x10320) && (code <= 0x10323)));
+}
+
+/**
+ * xmlUCSIsCatP:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of P UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatP(int code) {
+ return(((code >= 0x21) && (code <= 0x23)) ||
+ ((code >= 0x25) && (code <= 0x2a)) ||
+ ((code >= 0x2c) && (code <= 0x2f)) ||
+ ((code >= 0x3a) && (code <= 0x3b)) ||
+ ((code >= 0x3f) && (code <= 0x40)) ||
+ ((code >= 0x5b) && (code <= 0x5d)) ||
+ (code == 0x5f) ||
+ (code == 0x7b) ||
+ (code == 0x7d) ||
+ (code == 0xa1) ||
+ (code == 0xab) ||
+ (code == 0xad) ||
+ (code == 0xb7) ||
+ (code == 0xbb) ||
+ (code == 0xbf) ||
+ (code == 0x37e) ||
+ (code == 0x387) ||
+ ((code >= 0x55a) && (code <= 0x55f)) ||
+ ((code >= 0x589) && (code <= 0x58a)) ||
+ (code == 0x5be) ||
+ (code == 0x5c0) ||
+ (code == 0x5c3) ||
+ ((code >= 0x5f3) && (code <= 0x5f4)) ||
+ (code == 0x60c) ||
+ (code == 0x61b) ||
+ (code == 0x61f) ||
+ ((code >= 0x66a) && (code <= 0x66d)) ||
+ (code == 0x6d4) ||
+ ((code >= 0x700) && (code <= 0x70d)) ||
+ ((code >= 0x964) && (code <= 0x965)) ||
+ (code == 0x970) ||
+ (code == 0xdf4) ||
+ (code == 0xe4f) ||
+ ((code >= 0xe5a) && (code <= 0xe5b)) ||
+ ((code >= 0xf04) && (code <= 0xf12)) ||
+ ((code >= 0xf3a) && (code <= 0xf3d)) ||
+ (code == 0xf85) ||
+ ((code >= 0x104a) && (code <= 0x104f)) ||
+ (code == 0x10fb) ||
+ ((code >= 0x1361) && (code <= 0x1368)) ||
+ ((code >= 0x166d) && (code <= 0x166e)) ||
+ ((code >= 0x169b) && (code <= 0x169c)) ||
+ ((code >= 0x16eb) && (code <= 0x16ed)) ||
+ ((code >= 0x17d4) && (code <= 0x17da)) ||
+ (code == 0x17dc) ||
+ ((code >= 0x1800) && (code <= 0x180a)) ||
+ ((code >= 0x2010) && (code <= 0x2027)) ||
+ ((code >= 0x2030) && (code <= 0x2043)) ||
+ ((code >= 0x2045) && (code <= 0x2046)) ||
+ ((code >= 0x2048) && (code <= 0x204d)) ||
+ ((code >= 0x207d) && (code <= 0x207e)) ||
+ ((code >= 0x208d) && (code <= 0x208e)) ||
+ ((code >= 0x2329) && (code <= 0x232a)) ||
+ ((code >= 0x3001) && (code <= 0x3003)) ||
+ ((code >= 0x3008) && (code <= 0x3011)) ||
+ ((code >= 0x3014) && (code <= 0x301f)) ||
+ (code == 0x3030) ||
+ (code == 0x30fb) ||
+ ((code >= 0xfd3e) && (code <= 0xfd3f)) ||
+ ((code >= 0xfe30) && (code <= 0xfe44)) ||
+ ((code >= 0xfe49) && (code <= 0xfe52)) ||
+ ((code >= 0xfe54) && (code <= 0xfe61)) ||
+ (code == 0xfe63) ||
+ (code == 0xfe68) ||
+ ((code >= 0xfe6a) && (code <= 0xfe6b)) ||
+ ((code >= 0xff01) && (code <= 0xff03)) ||
+ ((code >= 0xff05) && (code <= 0xff0a)) ||
+ ((code >= 0xff0c) && (code <= 0xff0f)) ||
+ ((code >= 0xff1a) && (code <= 0xff1b)) ||
+ ((code >= 0xff1f) && (code <= 0xff20)) ||
+ ((code >= 0xff3b) && (code <= 0xff3d)) ||
+ (code == 0xff3f) ||
+ (code == 0xff5b) ||
+ (code == 0xff5d) ||
+ ((code >= 0xff61) && (code <= 0xff65)));
+}
+
+/**
+ * xmlUCSIsCatPc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPc(int code) {
+ return((code == 0x5f) ||
+ ((code >= 0x203f) && (code <= 0x2040)) ||
+ (code == 0x30fb) ||
+ ((code >= 0xfe33) && (code <= 0xfe34)) ||
+ ((code >= 0xfe4d) && (code <= 0xfe4f)) ||
+ (code == 0xff3f) ||
+ (code == 0xff65));
+}
+
+/**
+ * xmlUCSIsCatPd:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pd UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPd(int code) {
+ return((code == 0x2d) ||
+ (code == 0xad) ||
+ (code == 0x58a) ||
+ (code == 0x1806) ||
+ ((code >= 0x2010) && (code <= 0x2015)) ||
+ (code == 0x301c) ||
+ (code == 0x3030) ||
+ ((code >= 0xfe31) && (code <= 0xfe32)) ||
+ (code == 0xfe58) ||
+ (code == 0xfe63) ||
+ (code == 0xff0d));
+}
+
+/**
+ * xmlUCSIsCatPe:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pe UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPe(int code) {
+ return((code == 0x29) ||
+ (code == 0x5d) ||
+ (code == 0x7d) ||
+ (code == 0xf3b) ||
+ (code == 0xf3d) ||
+ (code == 0x169c) ||
+ (code == 0x2046) ||
+ (code == 0x207e) ||
+ (code == 0x208e) ||
+ (code == 0x232a) ||
+ (code == 0x3009) ||
+ (code == 0x300b) ||
+ (code == 0x300d) ||
+ (code == 0x300f) ||
+ (code == 0x3011) ||
+ (code == 0x3015) ||
+ (code == 0x3017) ||
+ (code == 0x3019) ||
+ (code == 0x301b) ||
+ ((code >= 0x301e) && (code <= 0x301f)) ||
+ (code == 0xfd3f) ||
+ (code == 0xfe36) ||
+ (code == 0xfe38) ||
+ (code == 0xfe3a) ||
+ (code == 0xfe3c) ||
+ (code == 0xfe3e) ||
+ (code == 0xfe40) ||
+ (code == 0xfe42) ||
+ (code == 0xfe44) ||
+ (code == 0xfe5a) ||
+ (code == 0xfe5c) ||
+ (code == 0xfe5e) ||
+ (code == 0xff09) ||
+ (code == 0xff3d) ||
+ (code == 0xff5d) ||
+ (code == 0xff63));
+}
+
+/**
+ * xmlUCSIsCatPf:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pf UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPf(int code) {
+ return((code == 0xbb) ||
+ (code == 0x2019) ||
+ (code == 0x201d) ||
+ (code == 0x203a));
+}
+
+/**
+ * xmlUCSIsCatPi:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Pi UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPi(int code) {
+ return((code == 0xab) ||
+ (code == 0x2018) ||
+ ((code >= 0x201b) && (code <= 0x201c)) ||
+ (code == 0x201f) ||
+ (code == 0x2039));
+}
+
+/**
+ * xmlUCSIsCatPo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Po UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPo(int code) {
+ return(((code >= 0x21) && (code <= 0x23)) ||
+ ((code >= 0x25) && (code <= 0x27)) ||
+ (code == 0x2a) ||
+ (code == 0x2c) ||
+ ((code >= 0x2e) && (code <= 0x2f)) ||
+ ((code >= 0x3a) && (code <= 0x3b)) ||
+ ((code >= 0x3f) && (code <= 0x40)) ||
+ (code == 0x5c) ||
+ (code == 0xa1) ||
+ (code == 0xb7) ||
+ (code == 0xbf) ||
+ (code == 0x37e) ||
+ (code == 0x387) ||
+ ((code >= 0x55a) && (code <= 0x55f)) ||
+ (code == 0x589) ||
+ (code == 0x5be) ||
+ (code == 0x5c0) ||
+ (code == 0x5c3) ||
+ ((code >= 0x5f3) && (code <= 0x5f4)) ||
+ (code == 0x60c) ||
+ (code == 0x61b) ||
+ (code == 0x61f) ||
+ ((code >= 0x66a) && (code <= 0x66d)) ||
+ (code == 0x6d4) ||
+ ((code >= 0x700) && (code <= 0x70d)) ||
+ ((code >= 0x964) && (code <= 0x965)) ||
+ (code == 0x970) ||
+ (code == 0xdf4) ||
+ (code == 0xe4f) ||
+ ((code >= 0xe5a) && (code <= 0xe5b)) ||
+ ((code >= 0xf04) && (code <= 0xf12)) ||
+ (code == 0xf85) ||
+ ((code >= 0x104a) && (code <= 0x104f)) ||
+ (code == 0x10fb) ||
+ ((code >= 0x1361) && (code <= 0x1368)) ||
+ ((code >= 0x166d) && (code <= 0x166e)) ||
+ ((code >= 0x16eb) && (code <= 0x16ed)) ||
+ ((code >= 0x17d4) && (code <= 0x17da)) ||
+ (code == 0x17dc) ||
+ ((code >= 0x1800) && (code <= 0x1805)) ||
+ ((code >= 0x1807) && (code <= 0x180a)) ||
+ ((code >= 0x2016) && (code <= 0x2017)) ||
+ ((code >= 0x2020) && (code <= 0x2027)) ||
+ ((code >= 0x2030) && (code <= 0x2038)) ||
+ ((code >= 0x203b) && (code <= 0x203e)) ||
+ ((code >= 0x2041) && (code <= 0x2043)) ||
+ ((code >= 0x2048) && (code <= 0x204d)) ||
+ ((code >= 0x3001) && (code <= 0x3003)) ||
+ (code == 0xfe30) ||
+ ((code >= 0xfe49) && (code <= 0xfe4c)) ||
+ ((code >= 0xfe50) && (code <= 0xfe52)) ||
+ ((code >= 0xfe54) && (code <= 0xfe57)) ||
+ ((code >= 0xfe5f) && (code <= 0xfe61)) ||
+ (code == 0xfe68) ||
+ ((code >= 0xfe6a) && (code <= 0xfe6b)) ||
+ ((code >= 0xff01) && (code <= 0xff03)) ||
+ ((code >= 0xff05) && (code <= 0xff07)) ||
+ (code == 0xff0a) ||
+ (code == 0xff0c) ||
+ ((code >= 0xff0e) && (code <= 0xff0f)) ||
+ ((code >= 0xff1a) && (code <= 0xff1b)) ||
+ ((code >= 0xff1f) && (code <= 0xff20)) ||
+ (code == 0xff3c) ||
+ (code == 0xff61) ||
+ (code == 0xff64));
+}
+
+/**
+ * xmlUCSIsCatPs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Ps UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatPs(int code) {
+ return((code == 0x28) ||
+ (code == 0x5b) ||
+ (code == 0x7b) ||
+ (code == 0xf3a) ||
+ (code == 0xf3c) ||
+ (code == 0x169b) ||
+ (code == 0x201a) ||
+ (code == 0x201e) ||
+ (code == 0x2045) ||
+ (code == 0x207d) ||
+ (code == 0x208d) ||
+ (code == 0x2329) ||
+ (code == 0x3008) ||
+ (code == 0x300a) ||
+ (code == 0x300c) ||
+ (code == 0x300e) ||
+ (code == 0x3010) ||
+ (code == 0x3014) ||
+ (code == 0x3016) ||
+ (code == 0x3018) ||
+ (code == 0x301a) ||
+ (code == 0x301d) ||
+ (code == 0xfd3e) ||
+ (code == 0xfe35) ||
+ (code == 0xfe37) ||
+ (code == 0xfe39) ||
+ (code == 0xfe3b) ||
+ (code == 0xfe3d) ||
+ (code == 0xfe3f) ||
+ (code == 0xfe41) ||
+ (code == 0xfe43) ||
+ (code == 0xfe59) ||
+ (code == 0xfe5b) ||
+ (code == 0xfe5d) ||
+ (code == 0xff08) ||
+ (code == 0xff3b) ||
+ (code == 0xff5b) ||
+ (code == 0xff62));
+}
+
+/**
+ * xmlUCSIsCatS:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of S UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatS(int code) {
+ return((code == 0x24) ||
+ (code == 0x2b) ||
+ ((code >= 0x3c) && (code <= 0x3e)) ||
+ (code == 0x5e) ||
+ (code == 0x60) ||
+ (code == 0x7c) ||
+ (code == 0x7e) ||
+ ((code >= 0xa2) && (code <= 0xa9)) ||
+ (code == 0xac) ||
+ ((code >= 0xae) && (code <= 0xb1)) ||
+ (code == 0xb4) ||
+ (code == 0xb6) ||
+ (code == 0xb8) ||
+ (code == 0xd7) ||
+ (code == 0xf7) ||
+ ((code >= 0x2b9) && (code <= 0x2ba)) ||
+ ((code >= 0x2c2) && (code <= 0x2cf)) ||
+ ((code >= 0x2d2) && (code <= 0x2df)) ||
+ ((code >= 0x2e5) && (code <= 0x2ed)) ||
+ ((code >= 0x374) && (code <= 0x375)) ||
+ ((code >= 0x384) && (code <= 0x385)) ||
+ (code == 0x482) ||
+ (code == 0x6e9) ||
+ ((code >= 0x6fd) && (code <= 0x6fe)) ||
+ ((code >= 0x9f2) && (code <= 0x9f3)) ||
+ (code == 0x9fa) ||
+ (code == 0xb70) ||
+ (code == 0xe3f) ||
+ ((code >= 0xf01) && (code <= 0xf03)) ||
+ ((code >= 0xf13) && (code <= 0xf17)) ||
+ ((code >= 0xf1a) && (code <= 0xf1f)) ||
+ (code == 0xf34) ||
+ (code == 0xf36) ||
+ (code == 0xf38) ||
+ ((code >= 0xfbe) && (code <= 0xfc5)) ||
+ ((code >= 0xfc7) && (code <= 0xfcc)) ||
+ (code == 0xfcf) ||
+ (code == 0x17db) ||
+ (code == 0x1fbd) ||
+ ((code >= 0x1fbf) && (code <= 0x1fc1)) ||
+ ((code >= 0x1fcd) && (code <= 0x1fcf)) ||
+ ((code >= 0x1fdd) && (code <= 0x1fdf)) ||
+ ((code >= 0x1fed) && (code <= 0x1fef)) ||
+ ((code >= 0x1ffd) && (code <= 0x1ffe)) ||
+ (code == 0x2044) ||
+ ((code >= 0x207a) && (code <= 0x207c)) ||
+ ((code >= 0x208a) && (code <= 0x208c)) ||
+ ((code >= 0x20a0) && (code <= 0x20af)) ||
+ ((code >= 0x2100) && (code <= 0x2101)) ||
+ ((code >= 0x2103) && (code <= 0x2106)) ||
+ ((code >= 0x2108) && (code <= 0x2109)) ||
+ (code == 0x2114) ||
+ ((code >= 0x2116) && (code <= 0x2118)) ||
+ ((code >= 0x211e) && (code <= 0x2123)) ||
+ (code == 0x2125) ||
+ (code == 0x2127) ||
+ (code == 0x2129) ||
+ (code == 0x212e) ||
+ (code == 0x2132) ||
+ (code == 0x213a) ||
+ ((code >= 0x2190) && (code <= 0x21f3)) ||
+ ((code >= 0x2200) && (code <= 0x22f1)) ||
+ ((code >= 0x2300) && (code <= 0x2328)) ||
+ ((code >= 0x232b) && (code <= 0x237b)) ||
+ ((code >= 0x237d) && (code <= 0x239a)) ||
+ ((code >= 0x2400) && (code <= 0x2426)) ||
+ ((code >= 0x2440) && (code <= 0x244a)) ||
+ ((code >= 0x249c) && (code <= 0x24e9)) ||
+ ((code >= 0x2500) && (code <= 0x2595)) ||
+ ((code >= 0x25a0) && (code <= 0x25f7)) ||
+ ((code >= 0x2600) && (code <= 0x2613)) ||
+ ((code >= 0x2619) && (code <= 0x2671)) ||
+ ((code >= 0x2701) && (code <= 0x2704)) ||
+ ((code >= 0x2706) && (code <= 0x2709)) ||
+ ((code >= 0x270c) && (code <= 0x2727)) ||
+ ((code >= 0x2729) && (code <= 0x274b)) ||
+ (code == 0x274d) ||
+ ((code >= 0x274f) && (code <= 0x2752)) ||
+ (code == 0x2756) ||
+ ((code >= 0x2758) && (code <= 0x275e)) ||
+ ((code >= 0x2761) && (code <= 0x2767)) ||
+ (code == 0x2794) ||
+ ((code >= 0x2798) && (code <= 0x27af)) ||
+ ((code >= 0x27b1) && (code <= 0x27be)) ||
+ ((code >= 0x2800) && (code <= 0x28ff)) ||
+ ((code >= 0x2e80) && (code <= 0x2e99)) ||
+ ((code >= 0x2e9b) && (code <= 0x2ef3)) ||
+ ((code >= 0x2f00) && (code <= 0x2fd5)) ||
+ ((code >= 0x2ff0) && (code <= 0x2ffb)) ||
+ (code == 0x3004) ||
+ ((code >= 0x3012) && (code <= 0x3013)) ||
+ (code == 0x3020) ||
+ ((code >= 0x3036) && (code <= 0x3037)) ||
+ ((code >= 0x303e) && (code <= 0x303f)) ||
+ ((code >= 0x309b) && (code <= 0x309c)) ||
+ ((code >= 0x3190) && (code <= 0x3191)) ||
+ ((code >= 0x3196) && (code <= 0x319f)) ||
+ ((code >= 0x3200) && (code <= 0x321c)) ||
+ ((code >= 0x322a) && (code <= 0x3243)) ||
+ ((code >= 0x3260) && (code <= 0x327b)) ||
+ (code == 0x327f) ||
+ ((code >= 0x328a) && (code <= 0x32b0)) ||
+ ((code >= 0x32c0) && (code <= 0x32cb)) ||
+ ((code >= 0x32d0) && (code <= 0x32fe)) ||
+ ((code >= 0x3300) && (code <= 0x3376)) ||
+ ((code >= 0x337b) && (code <= 0x33dd)) ||
+ ((code >= 0x33e0) && (code <= 0x33fe)) ||
+ ((code >= 0xa490) && (code <= 0xa4a1)) ||
+ ((code >= 0xa4a4) && (code <= 0xa4b3)) ||
+ ((code >= 0xa4b5) && (code <= 0xa4c0)) ||
+ ((code >= 0xa4c2) && (code <= 0xa4c4)) ||
+ (code == 0xa4c6) ||
+ (code == 0xfb29) ||
+ (code == 0xfe62) ||
+ ((code >= 0xfe64) && (code <= 0xfe66)) ||
+ (code == 0xfe69) ||
+ (code == 0xff04) ||
+ (code == 0xff0b) ||
+ ((code >= 0xff1c) && (code <= 0xff1e)) ||
+ (code == 0xff3e) ||
+ (code == 0xff40) ||
+ (code == 0xff5c) ||
+ (code == 0xff5e) ||
+ ((code >= 0xffe0) && (code <= 0xffe6)) ||
+ ((code >= 0xffe8) && (code <= 0xffee)) ||
+ ((code >= 0xfffc) && (code <= 0xfffd)) ||
+ ((code >= 0x1d000) && (code <= 0x1d0f5)) ||
+ ((code >= 0x1d100) && (code <= 0x1d126)) ||
+ ((code >= 0x1d12a) && (code <= 0x1d164)) ||
+ ((code >= 0x1d16a) && (code <= 0x1d16c)) ||
+ ((code >= 0x1d183) && (code <= 0x1d184)) ||
+ ((code >= 0x1d18c) && (code <= 0x1d1a9)) ||
+ ((code >= 0x1d1ae) && (code <= 0x1d1dd)) ||
+ (code == 0x1d6c1) ||
+ (code == 0x1d6db) ||
+ (code == 0x1d6fb) ||
+ (code == 0x1d715) ||
+ (code == 0x1d735) ||
+ (code == 0x1d74f) ||
+ (code == 0x1d76f) ||
+ (code == 0x1d789) ||
+ (code == 0x1d7a9) ||
+ (code == 0x1d7c3));
+}
+
+/**
+ * xmlUCSIsCatSc:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sc UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSc(int code) {
+ return((code == 0x24) ||
+ ((code >= 0xa2) && (code <= 0xa5)) ||
+ ((code >= 0x9f2) && (code <= 0x9f3)) ||
+ (code == 0xe3f) ||
+ (code == 0x17db) ||
+ ((code >= 0x20a0) && (code <= 0x20af)) ||
+ (code == 0xfe69) ||
+ (code == 0xff04) ||
+ ((code >= 0xffe0) && (code <= 0xffe1)) ||
+ ((code >= 0xffe5) && (code <= 0xffe6)));
+}
+
+/**
+ * xmlUCSIsCatSk:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sk UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSk(int code) {
+ return((code == 0x5e) ||
+ (code == 0x60) ||
+ (code == 0xa8) ||
+ (code == 0xaf) ||
+ (code == 0xb4) ||
+ (code == 0xb8) ||
+ ((code >= 0x2b9) && (code <= 0x2ba)) ||
+ ((code >= 0x2c2) && (code <= 0x2cf)) ||
+ ((code >= 0x2d2) && (code <= 0x2df)) ||
+ ((code >= 0x2e5) && (code <= 0x2ed)) ||
+ ((code >= 0x374) && (code <= 0x375)) ||
+ ((code >= 0x384) && (code <= 0x385)) ||
+ (code == 0x1fbd) ||
+ ((code >= 0x1fbf) && (code <= 0x1fc1)) ||
+ ((code >= 0x1fcd) && (code <= 0x1fcf)) ||
+ ((code >= 0x1fdd) && (code <= 0x1fdf)) ||
+ ((code >= 0x1fed) && (code <= 0x1fef)) ||
+ ((code >= 0x1ffd) && (code <= 0x1ffe)) ||
+ ((code >= 0x309b) && (code <= 0x309c)) ||
+ (code == 0xff3e) ||
+ (code == 0xff40) ||
+ (code == 0xffe3));
+}
+
+/**
+ * xmlUCSIsCatSm:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Sm UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSm(int code) {
+ return((code == 0x2b) ||
+ ((code >= 0x3c) && (code <= 0x3e)) ||
+ (code == 0x7c) ||
+ (code == 0x7e) ||
+ (code == 0xac) ||
+ (code == 0xb1) ||
+ (code == 0xd7) ||
+ (code == 0xf7) ||
+ (code == 0x2044) ||
+ ((code >= 0x207a) && (code <= 0x207c)) ||
+ ((code >= 0x208a) && (code <= 0x208c)) ||
+ ((code >= 0x2190) && (code <= 0x2194)) ||
+ ((code >= 0x219a) && (code <= 0x219b)) ||
+ (code == 0x21a0) ||
+ (code == 0x21a3) ||
+ (code == 0x21a6) ||
+ (code == 0x21ae) ||
+ ((code >= 0x21ce) && (code <= 0x21cf)) ||
+ (code == 0x21d2) ||
+ (code == 0x21d4) ||
+ ((code >= 0x2200) && (code <= 0x22f1)) ||
+ ((code >= 0x2308) && (code <= 0x230b)) ||
+ ((code >= 0x2320) && (code <= 0x2321)) ||
+ (code == 0x25b7) ||
+ (code == 0x25c1) ||
+ (code == 0x266f) ||
+ (code == 0xfb29) ||
+ (code == 0xfe62) ||
+ ((code >= 0xfe64) && (code <= 0xfe66)) ||
+ (code == 0xff0b) ||
+ ((code >= 0xff1c) && (code <= 0xff1e)) ||
+ (code == 0xff5c) ||
+ (code == 0xff5e) ||
+ (code == 0xffe2) ||
+ ((code >= 0xffe9) && (code <= 0xffec)) ||
+ (code == 0x1d6c1) ||
+ (code == 0x1d6db) ||
+ (code == 0x1d6fb) ||
+ (code == 0x1d715) ||
+ (code == 0x1d735) ||
+ (code == 0x1d74f) ||
+ (code == 0x1d76f) ||
+ (code == 0x1d789) ||
+ (code == 0x1d7a9) ||
+ (code == 0x1d7c3));
+}
+
+/**
+ * xmlUCSIsCatSo:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of So UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatSo(int code) {
+ return(((code >= 0xa6) && (code <= 0xa7)) ||
+ (code == 0xa9) ||
+ (code == 0xae) ||
+ (code == 0xb0) ||
+ (code == 0xb6) ||
+ (code == 0x482) ||
+ (code == 0x6e9) ||
+ ((code >= 0x6fd) && (code <= 0x6fe)) ||
+ (code == 0x9fa) ||
+ (code == 0xb70) ||
+ ((code >= 0xf01) && (code <= 0xf03)) ||
+ ((code >= 0xf13) && (code <= 0xf17)) ||
+ ((code >= 0xf1a) && (code <= 0xf1f)) ||
+ (code == 0xf34) ||
+ (code == 0xf36) ||
+ (code == 0xf38) ||
+ ((code >= 0xfbe) && (code <= 0xfc5)) ||
+ ((code >= 0xfc7) && (code <= 0xfcc)) ||
+ (code == 0xfcf) ||
+ ((code >= 0x2100) && (code <= 0x2101)) ||
+ ((code >= 0x2103) && (code <= 0x2106)) ||
+ ((code >= 0x2108) && (code <= 0x2109)) ||
+ (code == 0x2114) ||
+ ((code >= 0x2116) && (code <= 0x2118)) ||
+ ((code >= 0x211e) && (code <= 0x2123)) ||
+ (code == 0x2125) ||
+ (code == 0x2127) ||
+ (code == 0x2129) ||
+ (code == 0x212e) ||
+ (code == 0x2132) ||
+ (code == 0x213a) ||
+ ((code >= 0x2195) && (code <= 0x2199)) ||
+ ((code >= 0x219c) && (code <= 0x219f)) ||
+ ((code >= 0x21a1) && (code <= 0x21a2)) ||
+ ((code >= 0x21a4) && (code <= 0x21a5)) ||
+ ((code >= 0x21a7) && (code <= 0x21ad)) ||
+ ((code >= 0x21af) && (code <= 0x21cd)) ||
+ ((code >= 0x21d0) && (code <= 0x21d1)) ||
+ (code == 0x21d3) ||
+ ((code >= 0x21d5) && (code <= 0x21f3)) ||
+ ((code >= 0x2300) && (code <= 0x2307)) ||
+ ((code >= 0x230c) && (code <= 0x231f)) ||
+ ((code >= 0x2322) && (code <= 0x2328)) ||
+ ((code >= 0x232b) && (code <= 0x237b)) ||
+ ((code >= 0x237d) && (code <= 0x239a)) ||
+ ((code >= 0x2400) && (code <= 0x2426)) ||
+ ((code >= 0x2440) && (code <= 0x244a)) ||
+ ((code >= 0x249c) && (code <= 0x24e9)) ||
+ ((code >= 0x2500) && (code <= 0x2595)) ||
+ ((code >= 0x25a0) && (code <= 0x25b6)) ||
+ ((code >= 0x25b8) && (code <= 0x25c0)) ||
+ ((code >= 0x25c2) && (code <= 0x25f7)) ||
+ ((code >= 0x2600) && (code <= 0x2613)) ||
+ ((code >= 0x2619) && (code <= 0x266e)) ||
+ ((code >= 0x2670) && (code <= 0x2671)) ||
+ ((code >= 0x2701) && (code <= 0x2704)) ||
+ ((code >= 0x2706) && (code <= 0x2709)) ||
+ ((code >= 0x270c) && (code <= 0x2727)) ||
+ ((code >= 0x2729) && (code <= 0x274b)) ||
+ (code == 0x274d) ||
+ ((code >= 0x274f) && (code <= 0x2752)) ||
+ (code == 0x2756) ||
+ ((code >= 0x2758) && (code <= 0x275e)) ||
+ ((code >= 0x2761) && (code <= 0x2767)) ||
+ (code == 0x2794) ||
+ ((code >= 0x2798) && (code <= 0x27af)) ||
+ ((code >= 0x27b1) && (code <= 0x27be)) ||
+ ((code >= 0x2800) && (code <= 0x28ff)) ||
+ ((code >= 0x2e80) && (code <= 0x2e99)) ||
+ ((code >= 0x2e9b) && (code <= 0x2ef3)) ||
+ ((code >= 0x2f00) && (code <= 0x2fd5)) ||
+ ((code >= 0x2ff0) && (code <= 0x2ffb)) ||
+ (code == 0x3004) ||
+ ((code >= 0x3012) && (code <= 0x3013)) ||
+ (code == 0x3020) ||
+ ((code >= 0x3036) && (code <= 0x3037)) ||
+ ((code >= 0x303e) && (code <= 0x303f)) ||
+ ((code >= 0x3190) && (code <= 0x3191)) ||
+ ((code >= 0x3196) && (code <= 0x319f)) ||
+ ((code >= 0x3200) && (code <= 0x321c)) ||
+ ((code >= 0x322a) && (code <= 0x3243)) ||
+ ((code >= 0x3260) && (code <= 0x327b)) ||
+ (code == 0x327f) ||
+ ((code >= 0x328a) && (code <= 0x32b0)) ||
+ ((code >= 0x32c0) && (code <= 0x32cb)) ||
+ ((code >= 0x32d0) && (code <= 0x32fe)) ||
+ ((code >= 0x3300) && (code <= 0x3376)) ||
+ ((code >= 0x337b) && (code <= 0x33dd)) ||
+ ((code >= 0x33e0) && (code <= 0x33fe)) ||
+ ((code >= 0xa490) && (code <= 0xa4a1)) ||
+ ((code >= 0xa4a4) && (code <= 0xa4b3)) ||
+ ((code >= 0xa4b5) && (code <= 0xa4c0)) ||
+ ((code >= 0xa4c2) && (code <= 0xa4c4)) ||
+ (code == 0xa4c6) ||
+ (code == 0xffe4) ||
+ (code == 0xffe8) ||
+ ((code >= 0xffed) && (code <= 0xffee)) ||
+ ((code >= 0xfffc) && (code <= 0xfffd)) ||
+ ((code >= 0x1d000) && (code <= 0x1d0f5)) ||
+ ((code >= 0x1d100) && (code <= 0x1d126)) ||
+ ((code >= 0x1d12a) && (code <= 0x1d164)) ||
+ ((code >= 0x1d16a) && (code <= 0x1d16c)) ||
+ ((code >= 0x1d183) && (code <= 0x1d184)) ||
+ ((code >= 0x1d18c) && (code <= 0x1d1a9)) ||
+ ((code >= 0x1d1ae) && (code <= 0x1d1dd)));
+}
+
+/**
+ * xmlUCSIsCatZ:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Z UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZ(int code) {
+ return((code == 0x20) ||
+ (code == 0xa0) ||
+ (code == 0x1680) ||
+ ((code >= 0x2000) && (code <= 0x200b)) ||
+ ((code >= 0x2028) && (code <= 0x2029)) ||
+ (code == 0x202f) ||
+ (code == 0x3000));
+}
+
+/**
+ * xmlUCSIsCatZl:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Zl UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZl(int code) {
+ return((code == 0x2028));
+}
+
+/**
+ * xmlUCSIsCatZp:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Zp UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZp(int code) {
+ return((code == 0x2029));
+}
+
+/**
+ * xmlUCSIsCatZs:
+ * @code: UCS code point
+ *
+ * Check whether the character is part of Zs UCS Category
+ *
+ * Returns 1 if true 0 otherwise
+ */
+int
+xmlUCSIsCatZs(int code) {
+ return((code == 0x20) ||
+ (code == 0xa0) ||
+ (code == 0x1680) ||
+ ((code >= 0x2000) && (code <= 0x200b)) ||
+ (code == 0x202f) ||
+ (code == 0x3000));
+}
+
+/**
+ * xmlUCSIsCat:
+ * @code: UCS code point
+ * @cat: UCS Category name
+ *
+ * Check whether the caracter is part of the UCS Category
+ *
+ * Returns 1 if true, 0 if false and -1 on unknown category
+ */
+int
+xmlUCSIsCat(int code, const char *cat) {
+ if (!strcmp(cat, "C"))
+ return(xmlUCSIsCatC(code));
+ if (!strcmp(cat, "Cc"))
+ return(xmlUCSIsCatCc(code));
+ if (!strcmp(cat, "Cf"))
+ return(xmlUCSIsCatCf(code));
+ if (!strcmp(cat, "Co"))
+ return(xmlUCSIsCatCo(code));
+ if (!strcmp(cat, "Cs"))
+ return(xmlUCSIsCatCs(code));
+ if (!strcmp(cat, "L"))
+ return(xmlUCSIsCatL(code));
+ if (!strcmp(cat, "Ll"))
+ return(xmlUCSIsCatLl(code));
+ if (!strcmp(cat, "Lm"))
+ return(xmlUCSIsCatLm(code));
+ if (!strcmp(cat, "Lo"))
+ return(xmlUCSIsCatLo(code));
+ if (!strcmp(cat, "Lt"))
+ return(xmlUCSIsCatLt(code));
+ if (!strcmp(cat, "Lu"))
+ return(xmlUCSIsCatLu(code));
+ if (!strcmp(cat, "M"))
+ return(xmlUCSIsCatM(code));
+ if (!strcmp(cat, "Mc"))
+ return(xmlUCSIsCatMc(code));
+ if (!strcmp(cat, "Me"))
+ return(xmlUCSIsCatMe(code));
+ if (!strcmp(cat, "Mn"))
+ return(xmlUCSIsCatMn(code));
+ if (!strcmp(cat, "N"))
+ return(xmlUCSIsCatN(code));
+ if (!strcmp(cat, "Nd"))
+ return(xmlUCSIsCatNd(code));
+ if (!strcmp(cat, "Nl"))
+ return(xmlUCSIsCatNl(code));
+ if (!strcmp(cat, "No"))
+ return(xmlUCSIsCatNo(code));
+ if (!strcmp(cat, "P"))
+ return(xmlUCSIsCatP(code));
+ if (!strcmp(cat, "Pc"))
+ return(xmlUCSIsCatPc(code));
+ if (!strcmp(cat, "Pd"))
+ return(xmlUCSIsCatPd(code));
+ if (!strcmp(cat, "Pe"))
+ return(xmlUCSIsCatPe(code));
+ if (!strcmp(cat, "Pf"))
+ return(xmlUCSIsCatPf(code));
+ if (!strcmp(cat, "Pi"))
+ return(xmlUCSIsCatPi(code));
+ if (!strcmp(cat, "Po"))
+ return(xmlUCSIsCatPo(code));
+ if (!strcmp(cat, "Ps"))
+ return(xmlUCSIsCatPs(code));
+ if (!strcmp(cat, "S"))
+ return(xmlUCSIsCatS(code));
+ if (!strcmp(cat, "Sc"))
+ return(xmlUCSIsCatSc(code));
+ if (!strcmp(cat, "Sk"))
+ return(xmlUCSIsCatSk(code));
+ if (!strcmp(cat, "Sm"))
+ return(xmlUCSIsCatSm(code));
+ if (!strcmp(cat, "So"))
+ return(xmlUCSIsCatSo(code));
+ if (!strcmp(cat, "Z"))
+ return(xmlUCSIsCatZ(code));
+ if (!strcmp(cat, "Zl"))
+ return(xmlUCSIsCatZl(code));
+ if (!strcmp(cat, "Zp"))
+ return(xmlUCSIsCatZp(code));
+ if (!strcmp(cat, "Zs"))
+ return(xmlUCSIsCatZs(code));
+ return(-1);
+}
+
+
+#endif /* LIBXML_UNICODE_ENABLED */