new files for a different method for doing range validation of character

* genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h:
  new files for a different method for doing range validation
  of character data.
* Makefile.am, parserInternals.c, include/libxml/Makefile.am,
  include/libxml/parserInternals.h: modified for new range method.
* catalog.c: small enhance for warning message (using one
  of the new range routines)
diff --git a/ChangeLog b/ChangeLog
index c4f9a84..50dac47 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sat Oct 11 23:11:22 HKT 2003 William Brack <wbrack@mmm.com.hk>
+
+	* genChRange.py, chvalid.def, chvalid.c, include/libxml/chvalid.h:
+	  new files for a different method for doing range validation
+	  of character data.
+	* Makefile.am, parserInternals.c, include/libxml/Makefile.am,
+	  include/libxml/parserInternals.h: modified for new range method.
+	* catalog.c: small enhance for warning message (using one
+	  of the new range routines)
+
 Sat Oct 11 13:24:57 CEST 2003 Daniel Veillard <daniel@veillard.com>
 
 	* valid.c include/libxml/valid.h: adding an serror field to
diff --git a/Makefile.am b/Makefile.am
index 8fc01b5..8872341 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -27,7 +27,7 @@
 		catalog.c globals.c threads.c c14n.c \
 		xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
 		triostr.c trio.c xmlreader.c relaxng.c dict.c SAX2.c \
-		legacy.c walker.c
+		legacy.c walker.c chvalid.c
 else
 libxml2_la_SOURCES = SAX.c entities.c encoding.c error.c parserInternals.c  \
 		parser.c tree.c hash.c list.c xmlIO.c xmlmemory.c uri.c  \
@@ -36,7 +36,7 @@
 		catalog.c globals.c threads.c c14n.c \
 		xmlregexp.c xmlschemas.c xmlschemastypes.c xmlunicode.c \
 		xmlreader.c relaxng.c dict.c SAX2.c \
-		legacy.c xmldwalk.c
+		legacy.c xmldwalk.c chvalid.c
 endif
 
 DEPS = $(top_builddir)/libxml2.la
diff --git a/catalog.c b/catalog.c
index 06dbb52..4375ebb 100644
--- a/catalog.c
+++ b/catalog.c
@@ -2932,11 +2932,11 @@
 	    cur = catalogs;
 	    nextent = &catal->xml;
 	    while (*cur != '\0') {
-		while (IS_BLANK(*cur)) 
+		while (xmlIsBlank_ch(*cur)) 
 		    cur++;
 		if (*cur != 0) {
 		    paths = cur;
-		    while ((*cur != 0) && (!IS_BLANK(*cur)))
+		    while ((*cur != 0) && (!xmlIsBlank_ch(*cur)))
 			cur++;
 		    path = (char *) xmlStrndup((const xmlChar *)paths, cur - paths);
 		    if (path != NULL) {
@@ -3015,10 +3015,10 @@
 
     cur = pathss;
     while ((cur != NULL) && (*cur != 0)) {
-	while (IS_BLANK(*cur)) cur++;
+	while (xmlIsBlank_ch(*cur)) cur++;
 	if (*cur != 0) {
 	    paths = cur;
-	    while ((*cur != 0) && (*cur != ':') && (!IS_BLANK(*cur)))
+	    while ((*cur != 0) && (*cur != ':') && (!xmlIsBlank_ch(*cur)))
 		cur++;
 	    path = xmlStrndup((const xmlChar *)paths, cur - paths);
 	    if (path != NULL) {
diff --git a/chvalid.c b/chvalid.c
new file mode 100755
index 0000000..fbfd32b
--- /dev/null
+++ b/chvalid.c
@@ -0,0 +1,186 @@
+/*
+ * chvalid.c:	this module implements the character range
+ *		validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: Sat Oct 11 20:57:37 2003
+ * Sources: chvalid.def
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#include "chvalid.h"
+
+/*
+ * The initial tables ({func_name}_tab) are used to validate whether a
+ * single-byte character is within the specified group.  Each table
+ * contains 256 bytes, with each byte representing one of the 256
+ * possible characters.  If the table byte is set, the character is
+ * allowed.
+ *
+ */
+unsigned char xmlIsPubidChar_tab[256] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+    0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x01,
+    0x01, 0x01, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x00, 0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x01, 0x00, 0x00, 0x01,
+    0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+    0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00 };
+
+static xmlChSRange xmlIsBaseChar_srng[] = { {0x100, 0x131}, {0x134, 0x13e}, 
+    {0x141, 0x148}, {0x14a, 0x17e}, {0x180, 0x1c3}, {0x1cd, 0x1f0}, 
+    {0x1f4, 0x1f5}, {0x1fa, 0x217}, {0x250, 0x2a8}, {0x2bb, 0x2c1}, 
+    {0x386, 0x386}, {0x388, 0x38a}, {0x38c, 0x38c}, {0x38e, 0x3a1}, 
+    {0x3a3, 0x3ce}, {0x3d0, 0x3d6}, {0x3da, 0x3da}, {0x3dc, 0x3dc}, 
+    {0x3de, 0x3de}, {0x3e0, 0x3e0}, {0x3e2, 0x3f3}, {0x401, 0x40c}, 
+    {0x40e, 0x44f}, {0x451, 0x45c}, {0x45e, 0x481}, {0x490, 0x4c4}, 
+    {0x4c7, 0x4c8}, {0x4cb, 0x4cc}, {0x4d0, 0x4eb}, {0x4ee, 0x4f5}, 
+    {0x4f8, 0x4f9}, {0x531, 0x556}, {0x559, 0x559}, {0x561, 0x586}, 
+    {0x5d0, 0x5ea}, {0x5f0, 0x5f2}, {0x621, 0x63a}, {0x641, 0x64a}, 
+    {0x671, 0x6b7}, {0x6ba, 0x6be}, {0x6c0, 0x6ce}, {0x6d0, 0x6d3}, 
+    {0x6d5, 0x6d5}, {0x6e5, 0x6e6}, {0x905, 0x939}, {0x93d, 0x93d}, 
+    {0x958, 0x961}, {0x985, 0x98c}, {0x98f, 0x990}, {0x993, 0x9a8}, 
+    {0x9aa, 0x9b0}, {0x9b2, 0x9b2}, {0x9b6, 0x9b9}, {0x9dc, 0x9dd}, 
+    {0x9df, 0x9e1}, {0x9f0, 0x9f1}, {0xa05, 0xa0a}, {0xa0f, 0xa10}, 
+    {0xa13, 0xa28}, {0xa2a, 0xa30}, {0xa32, 0xa33}, {0xa35, 0xa36}, 
+    {0xa38, 0xa39}, {0xa59, 0xa5c}, {0xa5e, 0xa5e}, {0xa72, 0xa74}, 
+    {0xa85, 0xa8b}, {0xa8d, 0xa8d}, {0xa8f, 0xa91}, {0xa93, 0xaa8}, 
+    {0xaaa, 0xab0}, {0xab2, 0xab3}, {0xab5, 0xab9}, {0xabd, 0xabd}, 
+    {0xae0, 0xae0}, {0xb05, 0xb0c}, {0xb0f, 0xb10}, {0xb13, 0xb28}, 
+    {0xb2a, 0xb30}, {0xb32, 0xb33}, {0xb36, 0xb39}, {0xb3d, 0xb3d}, 
+    {0xb5c, 0xb5d}, {0xb5f, 0xb61}, {0xb85, 0xb8a}, {0xb8e, 0xb90}, 
+    {0xb92, 0xb95}, {0xb99, 0xb9a}, {0xb9c, 0xb9c}, {0xb9e, 0xb9f}, 
+    {0xba3, 0xba4}, {0xba8, 0xbaa}, {0xbae, 0xbb5}, {0xbb7, 0xbb9}, 
+    {0xc05, 0xc0c}, {0xc0e, 0xc10}, {0xc12, 0xc28}, {0xc2a, 0xc33}, 
+    {0xc35, 0xc39}, {0xc60, 0xc61}, {0xc85, 0xc8c}, {0xc8e, 0xc90}, 
+    {0xc92, 0xca8}, {0xcaa, 0xcb3}, {0xcb5, 0xcb9}, {0xcde, 0xcde}, 
+    {0xce0, 0xce1}, {0xd05, 0xd0c}, {0xd0e, 0xd10}, {0xd12, 0xd28}, 
+    {0xd2a, 0xd39}, {0xd60, 0xd61}, {0xe01, 0xe2e}, {0xe30, 0xe30}, 
+    {0xe32, 0xe33}, {0xe40, 0xe45}, {0xe81, 0xe82}, {0xe84, 0xe84}, 
+    {0xe87, 0xe88}, {0xe8a, 0xe8a}, {0xe8d, 0xe8d}, {0xe94, 0xe97}, 
+    {0xe99, 0xe9f}, {0xea1, 0xea3}, {0xea5, 0xea5}, {0xea7, 0xea7}, 
+    {0xeaa, 0xeab}, {0xead, 0xeae}, {0xeb0, 0xeb0}, {0xeb2, 0xeb3}, 
+    {0xebd, 0xebd}, {0xec0, 0xec4}, {0xf40, 0xf47}, {0xf49, 0xf69}, 
+    {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x1100}, {0x1102, 0x1103}, 
+    {0x1105, 0x1107}, {0x1109, 0x1109}, {0x110b, 0x110c}, {0x110e, 0x1112}, 
+    {0x113c, 0x113c}, {0x113e, 0x113e}, {0x1140, 0x1140}, {0x114c, 0x114c}, 
+    {0x114e, 0x114e}, {0x1150, 0x1150}, {0x1154, 0x1155}, {0x1159, 0x1159}, 
+    {0x115f, 0x1161}, {0x1163, 0x1163}, {0x1165, 0x1165}, {0x1167, 0x1167}, 
+    {0x1169, 0x1169}, {0x116d, 0x116e}, {0x1172, 0x1173}, {0x1175, 0x1175}, 
+    {0x119e, 0x119e}, {0x11a8, 0x11a8}, {0x11ab, 0x11ab}, {0x11ae, 0x11af}, 
+    {0x11b7, 0x11b8}, {0x11ba, 0x11ba}, {0x11bc, 0x11c2}, {0x11eb, 0x11eb}, 
+    {0x11f0, 0x11f0}, {0x11f9, 0x11f9}, {0x1e00, 0x1e9b}, {0x1ea0, 0x1ef9}, 
+    {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45}, {0x1f48, 0x1f4d}, 
+    {0x1f50, 0x1f57}, {0x1f59, 0x1f59}, {0x1f5b, 0x1f5b}, {0x1f5d, 0x1f5d}, 
+    {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fbc}, {0x1fbe, 0x1fbe}, 
+    {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3}, {0x1fd6, 0x1fdb}, 
+    {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc}, {0x2126, 0x2126}, 
+    {0x212a, 0x212b}, {0x212e, 0x212e}, {0x2180, 0x2182}, {0x3041, 0x3094}, 
+    {0x30a1, 0x30fa}, {0x3105, 0x312c}, {0xac00, 0xd7a3}};
+xmlChRangeGroup xmlIsBaseCharGroup = {197, 0, xmlIsBaseChar_srng};
+
+static xmlChSRange xmlIsChar_srng[] = { {0x100, 0xd7ff}, {0xe000, 0xfffd}};
+static xmlChLRange xmlIsChar_lrng[] = { {0x10000, 0x10ffff}};
+xmlChRangeGroup xmlIsCharGroup = {2, 1, xmlIsChar_srng, xmlIsChar_lrng};
+
+static xmlChSRange xmlIsCombining_srng[] = { {0x300, 0x345}, 
+    {0x360, 0x361}, {0x483, 0x486}, {0x591, 0x5a1}, {0x5a3, 0x5b9}, 
+    {0x5bb, 0x5bd}, {0x5bf, 0x5bf}, {0x5c1, 0x5c2}, {0x5c4, 0x5c4}, 
+    {0x64b, 0x652}, {0x670, 0x670}, {0x6d6, 0x6dc}, {0x6dd, 0x6df}, 
+    {0x6e0, 0x6e4}, {0x6e7, 0x6e8}, {0x6ea, 0x6ed}, {0x901, 0x903}, 
+    {0x93c, 0x93c}, {0x93e, 0x94c}, {0x94d, 0x94d}, {0x951, 0x954}, 
+    {0x962, 0x963}, {0x981, 0x983}, {0x9bc, 0x9bc}, {0x9be, 0x9be}, 
+    {0x9bf, 0x9bf}, {0x9c0, 0x9c4}, {0x9c7, 0x9c8}, {0x9cb, 0x9cd}, 
+    {0x9d7, 0x9d7}, {0x9e2, 0x9e3}, {0xa02, 0xa02}, {0xa3c, 0xa3c}, 
+    {0xa3e, 0xa3e}, {0xa3f, 0xa3f}, {0xa40, 0xa42}, {0xa47, 0xa48}, 
+    {0xa4b, 0xa4d}, {0xa70, 0xa71}, {0xa81, 0xa83}, {0xabc, 0xabc}, 
+    {0xabe, 0xac5}, {0xac7, 0xac9}, {0xacb, 0xacd}, {0xb01, 0xb03}, 
+    {0xb3c, 0xb3c}, {0xb3e, 0xb43}, {0xb47, 0xb48}, {0xb4b, 0xb4d}, 
+    {0xb56, 0xb57}, {0xb82, 0xb83}, {0xbbe, 0xbc2}, {0xbc6, 0xbc8}, 
+    {0xbca, 0xbcd}, {0xbd7, 0xbd7}, {0xc01, 0xc03}, {0xc3e, 0xc44}, 
+    {0xc46, 0xc48}, {0xc4a, 0xc4d}, {0xc55, 0xc56}, {0xc82, 0xc83}, 
+    {0xcbe, 0xcc4}, {0xcc6, 0xcc8}, {0xcca, 0xccd}, {0xcd5, 0xcd6}, 
+    {0xd02, 0xd03}, {0xd3e, 0xd43}, {0xd46, 0xd48}, {0xd4a, 0xd4d}, 
+    {0xd57, 0xd57}, {0xe31, 0xe31}, {0xe34, 0xe3a}, {0xe47, 0xe4e}, 
+    {0xeb1, 0xeb1}, {0xeb4, 0xeb9}, {0xebb, 0xebc}, {0xec8, 0xecd}, 
+    {0xf18, 0xf19}, {0xf35, 0xf35}, {0xf37, 0xf37}, {0xf39, 0xf39}, 
+    {0xf3e, 0xf3e}, {0xf3f, 0xf3f}, {0xf71, 0xf84}, {0xf86, 0xf8b}, 
+    {0xf90, 0xf95}, {0xf97, 0xf97}, {0xf99, 0xfad}, {0xfb1, 0xfb7}, 
+    {0xfb9, 0xfb9}, {0x20d0, 0x20dc}, {0x20e1, 0x20e1}, {0x302a, 0x302f}, 
+    {0x3099, 0x3099}, {0x309a, 0x309a}};
+xmlChRangeGroup xmlIsCombiningGroup = {95, 0, xmlIsCombining_srng};
+
+static xmlChSRange xmlIsDigit_srng[] = { {0x660, 0x669}, {0x6f0, 0x6f9}, 
+    {0x966, 0x96f}, {0x9e6, 0x9ef}, {0xa66, 0xa6f}, {0xae6, 0xaef}, 
+    {0xb66, 0xb6f}, {0xbe7, 0xbef}, {0xc66, 0xc6f}, {0xce6, 0xcef}, 
+    {0xd66, 0xd6f}, {0xe50, 0xe59}, {0xed0, 0xed9}, {0xf20, 0xf29}};
+xmlChRangeGroup xmlIsDigitGroup = {14, 0, xmlIsDigit_srng};
+
+static xmlChSRange xmlIsExtender_srng[] = { {0x2d0, 0x2d0}, {0x2d1, 0x2d1}, 
+    {0x387, 0x387}, {0x640, 0x640}, {0xe46, 0xe46}, {0xec6, 0xec6}, 
+    {0x3005, 0x3005}, {0x3031, 0x3031}, {0x3032, 0x3032}, {0x3033, 0x3033}, 
+    {0x3034, 0x3034}, {0x3035, 0x3035}, {0x309d, 0x309d}, {0x309e, 0x309e}, 
+    {0x30fc, 0x30fc}, {0x30fd, 0x30fd}, {0x30fe, 0x30fe}};
+xmlChRangeGroup xmlIsExtenderGroup = {17, 0, xmlIsExtender_srng};
+
+static xmlChSRange xmlIsIdeographic_srng[] = { {0x3007, 0x3007}, 
+    {0x3021, 0x3029}, {0x4300, 0x9fa5}, {0xf900, 0xfa2d}};
+xmlChRangeGroup xmlIsIdeographicGroup = {4, 0, xmlIsIdeographic_srng};
+
+
+int
+xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) {
+    int low, high, mid;
+    xmlChSRangePtr sptr;
+    xmlChLRangePtr lptr;
+    if (val < 0x10000) {	/* is val in 'short' or 'long'  array? */
+	if (rptr->nbShortRange == 0)
+	    return 0;
+	low = 0;
+	high = rptr->nbShortRange;
+	sptr = rptr->shortRange;
+	while (low <= high) {
+	    mid = (low + high) / 2;
+	    if ((unsigned short) val < sptr[mid].low)
+		high = mid - 1;
+	    else if ((unsigned short) val > sptr[mid].high)
+		low = mid + 1;
+	    else
+		return 1;
+	}
+    } else {
+	if (rptr->nbLongRange == 0)
+	    return 0;
+	low = 0;
+	high = rptr->nbLongRange;
+	lptr = rptr->longRange;
+	while (low <= high) {
+	    mid = (low + high) / 2;
+	    if (val < lptr[mid].low)
+		high = mid - 1;
+	    else if (val > lptr[mid].high)
+		low = mid + 1;
+	    else
+		return 1;
+	}
+    }
+    return 0;
+}
+
diff --git a/chvalid.def b/chvalid.def
new file mode 100755
index 0000000..eed7ab5
--- /dev/null
+++ b/chvalid.def
@@ -0,0 +1,342 @@
+name xmlIsChar
+ur 0x09 0x0a 0x0d 0x20..0xff
+ur 0x000100..0x00d7ff
+ur 0x00e000..0x00fffd
+ur 0x010000..0x10ffff
+end xmlIsChar
+
+name xmlIsPubidChar
+ur 0x20 0x0d 0x0a 'a'..'z' 'A'..'Z' '0'..'9'
+ur '-' '\' '(' ')' '+' ',' '.' '/'
+ur ':' '=' '?' ';' '!' '*' '#' '@'
+ur '$' '_' '%'
+end
+
+name xmlIsBlank
+ur 0x09 0x0a 0x0d 0x20
+end xmlIsBlank
+
+name xmlIsBaseChar
+ur 0x41..0x5a 0x61..0x7a 0xc0..0xd6 0xd8..0xf6 0xf8..0xff
+ur 0x000100..0x000131
+ur 0x000134..0x00013e
+ur 0x000141..0x000148
+ur 0x00014a..0x00017e
+ur 0x000180..0x0001c3
+ur 0x0001cd..0x0001f0
+ur 0x0001f4..0x0001f5
+ur 0x0001fa..0x000217
+ur 0x000250..0x0002a8
+ur 0x0002bb..0x0002c1
+ur 0x000386..0x000386
+ur 0x000388..0x00038a
+ur 0x00038c
+ur 0x00038e..0x0003a1
+ur 0x0003a3..0x0003ce
+ur 0x0003d0..0x0003d6
+ur 0x0003da
+ur 0x0003dc
+ur 0x0003de
+ur 0x0003e0
+ur 0x0003e2..0x0003f3
+ur 0x000401..0x00040c
+ur 0x00040e..0x00044f
+ur 0x000451..0x00045c
+ur 0x00045e..0x000481
+ur 0x000490..0x0004c4
+ur 0x0004c7..0x0004c8
+ur 0x0004cb..0x0004cc
+ur 0x0004d0..0x0004eb
+ur 0x0004ee..0x0004f5
+ur 0x0004f8..0x0004f9
+ur 0x000531..0x000556
+ur 0x000559
+ur 0x000561..0x000586
+ur 0x0005d0..0x0005ea
+ur 0x0005f0..0x0005f2
+ur 0x000621..0x00063a
+ur 0x000641..0x00064a
+ur 0x000671..0x0006b7
+ur 0x0006ba..0x0006be
+ur 0x0006c0..0x0006ce
+ur 0x0006d0..0x0006d3
+ur 0x0006d5
+ur 0x0006e5..0x0006e6
+ur 0x000905..0x000939
+ur 0x00093d
+ur 0x000958..0x000961
+ur 0x000985..0x00098c
+ur 0x00098f..0x000990
+ur 0x000993..0x0009a8
+ur 0x0009aa..0x0009b0
+ur 0x0009b2
+ur 0x0009b6..0x0009b9
+ur 0x0009dc..0x0009dd
+ur 0x0009df..0x0009e1
+ur 0x0009f0..0x0009f1
+ur 0x000a05..0x000a0a
+ur 0x000a0f..0x000a10
+ur 0x000a13..0x000a28
+ur 0x000a2a..0x000a30
+ur 0x000a32..0x000a33
+ur 0x000a35..0x000a36
+ur 0x000a38..0x000a39
+ur 0x000a59..0x000a5c
+ur 0x000a5e
+ur 0x000a72..0x000a74
+ur 0x000a85..0x000a8b
+ur 0x000a8d
+ur 0x000a8f..0x000a91
+ur 0x000a93..0x000aa8
+ur 0x000aaa..0x000ab0
+ur 0x000ab2..0x000ab3
+ur 0x000ab5..0x000ab9
+ur 0x000abd
+ur 0x000ae0
+ur 0x000b05..0x000b0c
+ur 0x000b0f..0x000b10
+ur 0x000b13..0x000b28
+ur 0x000b2a..0x000b30
+ur 0x000b32..0x000b33
+ur 0x000b36..0x000b39
+ur 0x000b3d
+ur 0x000b5c..0x000b5d
+ur 0x000b5f..0x000b61
+ur 0x000b85..0x000b8a
+ur 0x000b8e..0x000b90
+ur 0x000b92..0x000b95
+ur 0x000b99..0x000b9a
+ur 0x000b9c
+ur 0x000b9e..0x000b9f
+ur 0x000ba3..0x000ba4
+ur 0x000ba8..0x000baa
+ur 0x000bae..0x000bb5
+ur 0x000bb7..0x000bb9
+ur 0x000c05..0x000c0c
+ur 0x000c0e..0x000c10
+ur 0x000c12..0x000c28
+ur 0x000c2a..0x000c33
+ur 0x000c35..0x000c39
+ur 0x000c60..0x000c61
+ur 0x000c85..0x000c8c
+ur 0x000c8e..0x000c90
+ur 0x000c92..0x000ca8
+ur 0x000caa..0x000cb3
+ur 0x000cb5..0x000cb9
+ur 0x000cde
+ur 0x000ce0..0x000ce1
+ur 0x000d05..0x000d0c
+ur 0x000d0e..0x000d10
+ur 0x000d12..0x000d28
+ur 0x000d2a..0x000d39
+ur 0x000d60..0x000d61
+ur 0x000e01..0x000e2e
+ur 0x000e30
+ur 0x000e32..0x000e33
+ur 0x000e40..0x000e45
+ur 0x000e81..0x000e82
+ur 0x000e84..0x000e84
+ur 0x000e87..0x000e88
+ur 0x000e8a
+ur 0x000e8d
+ur 0x000e94..0x000e97
+ur 0x000e99..0x000e9f
+ur 0x000ea1..0x000ea3
+ur 0x000ea5
+ur 0x000ea7
+ur 0x000eaa..0x000eab
+ur 0x000ead..0x000eae
+ur 0x000eb0
+ur 0x000eb2..0x000eb3
+ur 0x000ebd
+ur 0x000ec0..0x000ec4
+ur 0x000f40..0x000f47
+ur 0x000f49..0x000f69
+ur 0x0010a0..0x0010c5
+ur 0x0010d0..0x0010f6
+ur 0x001100
+ur 0x001102..0x001103
+ur 0x001105..0x001107
+ur 0x001109
+ur 0x00110b..0x00110c
+ur 0x00110e..0x001112
+ur 0x00113c
+ur 0x00113e
+ur 0x001140
+ur 0x00114c
+ur 0x00114e
+ur 0x001150
+ur 0x001154..0x001155
+ur 0x001159
+ur 0x00115f..0x001161
+ur 0x001163
+ur 0x001165
+ur 0x001167
+ur 0x001169
+ur 0x00116d..0x00116e
+ur 0x001172..0x001173
+ur 0x001175
+ur 0x00119e
+ur 0x0011a8
+ur 0x0011ab
+ur 0x0011ae..0x0011af
+ur 0x0011b7..0x0011b8
+ur 0x0011ba
+ur 0x0011bc..0x0011c2
+ur 0x0011eb
+ur 0x0011f0
+ur 0x0011f9
+ur 0x001e00..0x001e9b
+ur 0x001ea0..0x001ef9
+ur 0x001f00..0x001f15
+ur 0x001f18..0x001f1d
+ur 0x001f20..0x001f45
+ur 0x001f48..0x001f4d
+ur 0x001f50..0x001f57
+ur 0x001f59
+ur 0x001f5b
+ur 0x001f5d
+ur 0x001f5f..0x001f7d
+ur 0x001f80..0x001fb4
+ur 0x001fb6..0x001fbc
+ur 0x001fbe
+ur 0x001fc2..0x001fc4
+ur 0x001fc6..0x001fcc
+ur 0x001fd0..0x001fd3
+ur 0x001fd6..0x001fdb
+ur 0x001fe0..0x001fec
+ur 0x001ff2..0x001ff4
+ur 0x001ff6..0x001ffc
+ur 0x002126
+ur 0x00212a..0x00212b
+ur 0x00212e
+ur 0x002180..0x002182
+ur 0x003041..0x003094
+ur 0x0030a1..0x0030fa
+ur 0x003105..0x00312c
+ur 0x00ac00..0x00d7a3
+end
+name xmlIsDigit
+ur 0x30..0x39
+ur 0x660..0x669
+ur 0x6f0..0x6f9
+ur 0x966..0x96f
+ur 0x9e6..0x9ef
+ur 0xa66..0xa6f
+ur 0xae6..0xaef
+ur 0xb66..0xb6f
+ur 0xbe7..0xbef
+ur 0xc66..0xc6f
+ur 0xce6..0xcef
+ur 0xd66..0xd6f
+ur 0xe50..0xe59
+ur 0xed0..0xed9
+ur 0xf20..0xf29
+end
+name xmlIsCombining
+ur 0x0300..0x0345
+ur 0x0360..0x0361
+ur 0x0483..0x0486
+ur 0x0591..0x05A1
+ur 0x05A3..0x05B9
+ur 0x05BB..0x05BD
+ur 0x05BF
+ur 0x05C1..0x05C2
+ur 0x05C4
+ur 0x064B..0x0652
+ur 0x0670
+ur 0x06D6..0x06DC
+ur 0x06DD..0x06DF
+ur 0x06E0..0x06E4
+ur 0x06E7..0x06E8
+ur 0x06EA..0x06ED
+ur 0x0901..0x0903
+ur 0x093C
+ur 0x093E..0x094C
+ur 0x094D
+ur 0x0951..0x0954
+ur 0x0962..0x0963
+ur 0x0981..0x0983
+ur 0x09BC
+ur 0x09BE
+ur 0x09BF
+ur 0x09C0..0x09C4
+ur 0x09C7..0x09C8
+ur 0x09CB..0x09CD
+ur 0x09D7
+ur 0x09E2..0x09E3
+ur 0x0A02
+ur 0x0A3C
+ur 0x0A3E
+ur 0x0A3F
+ur 0x0A40..0x0A42
+ur 0x0A47..0x0A48
+ur 0x0A4B..0x0A4D
+ur 0x0A70..0x0A71
+ur 0x0A81..0x0A83
+ur 0x0ABC
+ur 0x0ABE..0x0AC5
+ur 0x0AC7..0x0AC9
+ur 0x0ACB..0x0ACD
+ur 0x0B01..0x0B03
+ur 0x0B3C
+ur 0x0B3E..0x0B43
+ur 0x0B47..0x0B48
+ur 0x0B4B..0x0B4D
+ur 0x0B56..0x0B57
+ur 0x0B82..0x0B83
+ur 0x0BBE..0x0BC2
+ur 0x0BC6..0x0BC8
+ur 0x0BCA..0x0BCD
+ur 0x0BD7
+ur 0x0C01..0x0C03
+ur 0x0C3E..0x0C44
+ur 0x0C46..0x0C48
+ur 0x0C4A..0x0C4D
+ur 0x0C55..0x0C56
+ur 0x0C82..0x0C83
+ur 0x0CBE..0x0CC4
+ur 0x0CC6..0x0CC8
+ur 0x0CCA..0x0CCD
+ur 0x0CD5..0x0CD6
+ur 0x0D02..0x0D03
+ur 0x0D3E..0x0D43
+ur 0x0D46..0x0D48
+ur 0x0D4A..0x0D4D
+ur 0x0D57
+ur 0x0E31
+ur 0x0E34..0x0E3A
+ur 0x0E47..0x0E4E
+ur 0x0EB1
+ur 0x0EB4..0x0EB9
+ur 0x0EBB..0x0EBC
+ur 0x0EC8..0x0ECD
+ur 0x0F18..0x0F19
+ur 0x0F35
+ur 0x0F37
+ur 0x0F39
+ur 0x0F3E
+ur 0x0F3F
+ur 0x0F71..0x0F84
+ur 0x0F86..0x0F8B
+ur 0x0F90..0x0F95
+ur 0x0F97
+ur 0x0F99..0x0FAD
+ur 0x0FB1..0x0FB7
+ur 0x0FB9
+ur 0x20D0..0x20DC
+ur 0x20E1
+ur 0x302A..0x302F
+ur 0x3099
+ur 0x309A
+end
+name xmlIsExtender
+ur 0xb7 0x2d0 0x2d1 0x387 0x640 0xe46 0xec6 0x3005 0x3031 0x3032
+ur 0x3033 0x3034 0x3035 0x309d 0x309e 0x30fc 0x30fd 0x30fe
+end
+name xmlIsIdeographic
+ur 0x4300..0x9fa5
+ur 0xf900..0xfa2d
+ur 0x3021..0x3029
+ur 0x3007
+end
diff --git a/chvalid.h b/chvalid.h
new file mode 100644
index 0000000..0b538eb
--- /dev/null
+++ b/chvalid.h
@@ -0,0 +1,105 @@
+/*
+ * chvalid.h: this header exports interfaces for the character
+ *		 range validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: Sat Oct 11 20:57:37 2003
+ * Sources: chvalid.def
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#ifndef __XML_CHVALID_H__
+#define __XML_CHVALID_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define our typedefs and structures
+ *
+ */
+typedef struct _xmlChSRange xmlChSRange;
+typedef xmlChSRange *xmlChSRangePtr;
+struct _xmlChSRange {
+    unsigned short	low;
+    unsigned short	high;
+};
+
+typedef struct _xmlChLRange xmlChLRange;
+typedef xmlChLRange *xmlChLRangePtr;
+struct _xmlChLRange {
+    unsigned		low;
+    unsigned		high;
+};
+
+typedef struct _xmlChRangeGroup xmlChRangeGroup;
+typedef xmlChRangeGroup *xmlChRangeGroupPtr;
+struct _xmlChRangeGroup {
+    int			nbShortRange;
+    int			nbLongRange;
+    xmlChSRangePtr	shortRange;	/* points to an array of ranges */
+    xmlChLRangePtr	longRange;
+};
+
+/* Range checking routine */
+int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
+
+#define xmlIsBaseChar_ch(c)	( ((0x41<= (c)) && ((c) <= 0x5a)) || \
+			((0x61<= (c)) && ((c) <= 0x7a)) || \
+			((0xc0<= (c)) && ((c) <= 0xd6)) || \
+			((0xd8<= (c)) && ((c) <= 0xf6)) || \
+			((0xf8<= (c)) && ((c) <= 0xff)))
+#define xmlIsBaseChar(c)	(((c) < 0x100) ? \
+				xmlIsBaseChar_ch((c)) : \
+				xmlCharInRange((c), &xmlIsBaseCharGroup))
+
+extern xmlChRangeGroup xmlIsBaseCharGroup;
+#define xmlIsBlank_ch(c)	( ((c) == 0x20) || \
+			((0x9<= (c)) && ((c) <= 0xa)) || \
+			((c) == 0xd))
+#define xmlIsBlank(c)	(((c) < 0x100) ? \
+				xmlIsBlank_ch((c)) : 0)
+
+#define xmlIsChar_ch(c)	( ((0x9<= (c)) && ((c) <= 0xa)) || \
+			((c) == 0xd) || \
+			((0x20<= (c)) && ((c) <= 0xff)))
+#define xmlIsChar(c)	(((c) < 0x100) ? \
+				xmlIsChar_ch((c)) : \
+				xmlCharInRange((c), &xmlIsCharGroup))
+
+extern xmlChRangeGroup xmlIsCharGroup;
+#define xmlIsCombining(c)	(((c) < 0x100) ? \
+				0 : \
+				xmlCharInRange((c), &xmlIsCombiningGroup))
+
+extern xmlChRangeGroup xmlIsCombiningGroup;
+#define xmlIsDigit_ch(c)	( ((0x30<= (c)) && ((c) <= 0x39)))
+#define xmlIsDigit(c)	(((c) < 0x100) ? \
+				xmlIsDigit_ch((c)) : \
+				xmlCharInRange((c), &xmlIsDigitGroup))
+
+extern xmlChRangeGroup xmlIsDigitGroup;
+#define xmlIsExtender_ch(c)	( ((c) == 0xb7))
+#define xmlIsExtender(c)	(((c) < 0x100) ? \
+				xmlIsExtender_ch((c)) : \
+				xmlCharInRange((c), &xmlIsExtenderGroup))
+
+extern xmlChRangeGroup xmlIsExtenderGroup;
+#define xmlIsIdeographic(c)	(((c) < 0x100) ? \
+				0 : \
+				xmlCharInRange((c), &xmlIsIdeographicGroup))
+
+extern xmlChRangeGroup xmlIsIdeographicGroup;
+extern unsigned char xmlIsPubidChar_tab[256];
+#define xmlIsPubidChar_ch(c)	(xmlIsPubidChar_tab[(c)])
+#define xmlIsPubidChar(c)	(((c) < 0x100) ? \
+				xmlIsPubidChar_ch((c)) : 0)
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_CHVALID_H__ */
diff --git a/genChRanges.py b/genChRanges.py
new file mode 100755
index 0000000..f35cc5c
--- /dev/null
+++ b/genChRanges.py
@@ -0,0 +1,465 @@
+#!/usr/bin/python -u
+#
+# Portions of this script have been (shamelessly) stolen from the
+# prior work of Daniel Veillard (genUnicode.py)
+#
+# I, however, take full credit for any bugs, errors or difficulties :-)
+#
+# William Brack
+# October 2003
+#
+
+import sys
+import string
+import time
+
+#
+# A little routine to assign a 'meaningful' name to a range
+#
+def rangename( intvl ):
+    (start, end) = intvl
+    rname = "r" + hex(start)[2:] + "x" + hex(end)[2:]
+    return rname
+
+#
+# A routine to take a list of yes/no (1, 0) values and turn it
+# into a list of ranges.  This will later be used to determine whether
+# to generate single-byte lookup tables, or inline comparisons
+#
+def makeRange(lst):
+    ret = []
+    pos = 0
+    while pos < len(lst):
+	try:		# index generates exception if not present
+	    s = lst[pos:].index(1)	# look for start of next range
+	except:
+	    break			# if no more, finished
+	pos += s			# pointer to start of possible range
+	try:
+	    e = lst[pos:].index(0)	# look for end of range
+	    e += pos
+	except:				# if no end, set to end of list
+	    e = len(lst)
+	ret.append((pos, e-1))		# append range tuple to list
+	pos = e + 1			# ready to check for next range
+    return ret
+
+sources = "chvalid.def"			# input filename
+
+# minTableSize gives the minimum number of ranges which must be present
+# before a 256-byte lookup table is produced.  If there are less than this
+# number, a macro with inline comparisons is generated
+minTableSize = 6
+
+# dictionary of ranges, key=range, element contains list of funcs using it
+Ranges = {}
+
+# dictionary of functions, key=name, element contains char-map and range-list
+Functs = {}
+
+state = 0
+
+try:
+    defines = open("chvalid.def", "r")
+except:
+    print "Missing chvalid.def, aborting ..."
+    sys.exit(1)
+
+#
+# The lines in the .def file have three types:-
+#   name:   Defines a new function block
+#   ur:	    Defines individual or ranges of unicode values
+#   end:    Indicates the end of the function block
+#
+# These lines are processed below.
+#
+for line in defines.readlines():
+    # ignore blank lines, or lines beginning with '#'
+    if line[0] == '#':
+        continue
+    line = string.strip(line)
+    if line == '':
+        continue
+    # split line into space-separated fields, then split on type
+    try:
+        fields = string.split(line, ' ')
+	#
+	# name line:
+	#   validate any previous function block already ended
+	#   validate this function not already defined
+	#   initialize an entry in the function dicitonary
+	#	including a mask table with no values yet defined
+	#
+	if fields[0] == 'name':
+	    name = fields[1]
+	    if state != 0:
+		print "'name' %s found before previous name" \
+		      "completed" % (fields[1])
+		continue
+	    state = 1
+	    if Functs.has_key(name):
+		print "name '%s' already present - may give" \
+		      " wrong results" % (name)
+	    else:
+		# dict entry with two list elements (chdata, rangedata)
+		Functs[name] = [ [], [] ]
+		for v in range(256):
+		    Functs[name][0].append(0)
+	#
+	# end line:
+	#   validate there was a preceding function name line
+	#   set state to show no current function active
+	#
+	elif fields[0] == 'end':
+	    if state == 0:
+		print "'end' found outside of function block"
+		continue
+	    state = 0
+
+	#
+	# ur line:
+	#   validate function has been defined
+	#   process remaining fields on the line, which may be either
+	#	individual unicode values or ranges of values
+	#
+	elif fields[0] == 'ur':
+	    if state != 1:
+		raise ValidationError, "'ur' found outside of 'name' block"
+	    for el in fields[1:]:
+		pos = string.find(el, '..')
+		# pos <=0 means not a range, so must be individual value
+		if pos <= 0:
+		    # cheap handling of hex or decimal values
+		    if el[0:2] == '0x':
+		        value = int(el[2:],16)
+		    elif el[0] == "'":
+			value = ord(el[1])
+		    else:
+			value = int(el)
+		    if ((value < 0) | (value > 0x1fffff)):
+			raise ValidationError, 'Illegal value (%s) in ch for'\
+				' name %s' % (el,name)
+		    # for ur we have only ranges (makes things simpler),
+		    # so convert val to range
+		    currange = (value, value)
+		# pos > 0 means this is a range, so isolate/validate
+		# the interval
+		else:
+		    # split the range into it's first-val, last-val
+		    (first, last) = string.split(el, "..")
+		    # convert values from text into binary
+		    if first[0:2] == '0x':	
+			start = int(first[2:],16)
+		    elif first[0] == "'":
+			start = ord(first[1])
+		    else:
+			start = int(first)
+		    if last[0:2] == '0x':
+			end = int(last[2:],16)
+		    elif last[0] == "'":
+			end = ord(last[1])
+		    else:
+			end = int(last)
+		    if (start < 0) | (end > 0x1fffff) | (start > end):
+			raise ValidationError, "Invalid range '%s'" % el
+		    currange = (start, end)
+		# common path - 'currange' has the range, now take care of it
+		# We split on single-byte values vs. multibyte
+		if currange[1] < 0x100:	# single-byte
+		    for ch in range(currange[0],currange[1]+1):
+			# validate that value not previously defined
+			if Functs[name][0][ch]:
+			    msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
+			    raise ValidationError, msg
+			Functs[name][0][ch] = 1
+		else:			# multi-byte
+		    if Ranges.has_key(currange):
+			Ranges[currange].append(name)
+		    else:
+			Ranges[currange] = [ name ]
+		    if currange in Functs[name][1]:
+			raise ValidationError, "range already defined in" \
+				" function"
+		    else:
+			Functs[name][1].append(currange)
+
+    except:
+	print "Failed to process line: %s" % (line)
+	raise
+#
+# At this point, the entire definition file has been processed.  Now we
+# enter the output phase, where we generate the two files chvalid.c and'
+# chvalid.h
+#
+# To do this, we first output the 'static' data (heading, fixed
+# definitions, etc.), then output the 'dynamic' data (the results
+# of the above processing), and finally output closing 'static' data
+# (e.g. the subroutine to process the ranges)
+#
+
+#
+# Generate the headings:
+#
+try:
+    header = open("chvalid.h", "w")
+except:
+    print "Failed to open chvalid.h"
+    sys.exit(1)
+
+try:
+    output = open("chvalid.c", "w")
+except:
+    print "Failed to open chvalid.c"
+    sys.exit(1)
+
+date = time.asctime(time.localtime(time.time()))
+
+header.write(
+"""/*
+ * chvalid.h: this header exports interfaces for the character
+ *		 range validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: %s
+ * Sources: %s
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#ifndef __XML_CHVALID_H__
+#define __XML_CHVALID_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Define our typedefs and structures
+ *
+ */
+typedef struct _xmlChSRange xmlChSRange;
+typedef xmlChSRange *xmlChSRangePtr;
+struct _xmlChSRange {
+    unsigned short	low;
+    unsigned short	high;
+};
+
+typedef struct _xmlChLRange xmlChLRange;
+typedef xmlChLRange *xmlChLRangePtr;
+struct _xmlChLRange {
+    unsigned		low;
+    unsigned		high;
+};
+
+typedef struct _xmlChRangeGroup xmlChRangeGroup;
+typedef xmlChRangeGroup *xmlChRangeGroupPtr;
+struct _xmlChRangeGroup {
+    int			nbShortRange;
+    int			nbLongRange;
+    xmlChSRangePtr	shortRange;	/* points to an array of ranges */
+    xmlChLRangePtr	longRange;
+};
+
+/* Range checking routine */
+int xmlCharInRange(unsigned int val, const xmlChRangeGroupPtr group);
+
+""" % (date, sources));
+output.write(
+"""/*
+ * chvalid.c:	this module implements the character range
+ *		validation APIs
+ *
+ * This file is automatically generated from the cvs source
+ * definition files using the genChRanges.py Python script
+ *
+ * Generation date: %s
+ * Sources: %s
+ * William Brack <wbrack@mmm.com.hk>
+ */
+
+#include "chvalid.h"
+
+/*
+ * The initial tables ({func_name}_tab) are used to validate whether a
+ * single-byte character is within the specified group.  Each table
+ * contains 256 bytes, with each byte representing one of the 256
+ * possible characters.  If the table byte is set, the character is
+ * allowed.
+ *
+ */
+""" % (date, sources));
+
+#
+# Now output the generated data.
+# We try to produce the best execution times.  Tests have shown that validation
+# with direct table lookup is, when there are a "small" number of valid items,
+# still not as fast as a sequence of inline compares.  So, if the single-byte
+# portion of a range has a "small" number of ranges, we output a macro for inline
+# compares, otherwise we output a 256-byte table and a macro to use it.
+#
+
+fkeys = Functs.keys()	# Dictionary of all defined functions
+fkeys.sort()		# Put some order to our output
+
+for f in fkeys:
+
+# First we convert the specified single-byte values into a group of ranges.
+# If the total number of such ranges is less than minTableSize, we generate
+# an inline macro for direct comparisons; if greater, we generate a lookup
+# table.
+    if max(Functs[f][0]) > 0:	# only check if at least one entry
+        rangeTable = makeRange(Functs[f][0])
+	numRanges = len(rangeTable)
+	if numRanges >= minTableSize:	# table is worthwhile
+	    header.write("extern unsigned char %s_tab[256];\n" % f)
+	    header.write("#define %s_ch(c)\t(%s_tab[(c)])\n" % (f, f))
+
+	    # write the constant data to the code file
+	    output.write("unsigned char %s_tab[256] = {\n" % f)
+	    pline = "   "
+	    for n in range(255):
+		pline += " 0x%02x," % Functs[f][0][n]
+		if len(pline) > 72:
+		    output.write(pline + "\n")
+		    pline = "   "
+	    output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])
+
+	else:		# inline check is used
+	    # first another little optimisation - if space is present,
+	    # put it at the front of the list so it is checked first
+	    try:
+		ix = rangeTable.remove((0x20, 0x20))
+		rangeTable.insert(0, (0x20, 0x20))
+	    except:
+		pass
+	    pline = "#define %s_ch(c)\t( " % f
+	    firstFlag = 1
+	    for rg in rangeTable:
+		if not firstFlag:
+		    pline += " || \\\n\t\t\t"
+		else:
+		    firstFlag = 0
+		if rg[0] == rg[1]:		# single value - check equal
+		    pline += "((c) == " + hex(rg[0]) + ")"
+		else:			# value range
+		    pline += "((" + hex(rg[0]) + "<= (c)) &&"
+		    pline += " ((c) <= " + hex(rg[1]) + "))"
+	    pline += ")\n"
+	    header.write(pline)
+
+    header.write("#define %s(c)\t(((c) < 0x100) ? \\\n\t\t\t\t" % f)
+    if max(Functs[f][0]) > 0:
+	header.write("%s_ch((c)) :" % f)
+    else:
+	header.write("0 :")
+
+    # if no ranges defined, value invalid if >= 0x100
+    if len(Functs[f][1]) == 0:
+	header.write(" 0)\n\n")
+    else:
+	header.write(" \\\n\t\t\t\txmlCharInRange((c), &%sGroup))\n\n"  % f)
+
+    if len(Functs[f][1]) > 0:
+	header.write("extern xmlChRangeGroup %sGroup;\n" % f)
+
+
+#
+# Next we do the unicode ranges
+#
+
+for f in fkeys:
+    if len(Functs[f][1]) > 0:	# only generate if unicode ranges present
+	rangeTable = Functs[f][1]
+	rangeTable.sort()	# ascending tuple sequence
+	numShort = 0
+	numLong  = 0
+	for rg in rangeTable:
+	    if rg[1] < 0x10000:	# if short value
+		if numShort == 0:	# first occurence
+		    pline = "static xmlChSRange %s_srng[] = { " % f
+		else:
+		    pline += ", "
+		numShort += 1
+		if len(pline) > 60:
+		    output.write(pline + "\n")
+		    pline = "    "
+		pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
+	    else:		# if long value
+		if numLong == 0:	# first occurence
+		    if numShort > 0:	# if there were shorts, finish them off
+			output.write(pline + "};\n")
+		    pline = "static xmlChLRange %s_lrng[] = { " % f
+		else:
+		    pline += ", "
+		numLong += 1
+		if len(pline) > 60:
+		    output.write(pline + "\n")
+		    pline = "    "
+		pline += "{0x%x, 0x%x}" % (rg[0], rg[1])
+	output.write(pline + "};\n")	# finish off last group
+
+	pline = "xmlChRangeGroup %sGroup = {%d, %d, " % (f, numShort, numLong)
+	if numShort > 0:
+	    pline += "%s_srng" % f
+	if numLong > 0:
+	    pline += ", %s_lrng" % f
+	
+	output.write(pline + "};\n\n")
+#
+# Run complete - write trailers and close the output files
+#
+
+header.write("""
+#ifdef __cplusplus
+}
+#endif
+#endif /* __XML_CHVALID_H__ */
+""");
+
+header.close()
+
+output.write(
+"""
+int
+xmlCharInRange (unsigned int val, xmlChRangeGroupPtr rptr) {
+    int low, high, mid;
+    xmlChSRangePtr sptr;
+    xmlChLRangePtr lptr;
+    if (val < 0x10000) {	/* is val in 'short' or 'long'  array? */
+	if (rptr->nbShortRange == 0)
+	    return 0;
+	low = 0;
+	high = rptr->nbShortRange;
+	sptr = rptr->shortRange;
+	while (low <= high) {
+	    mid = (low + high) / 2;
+	    if ((unsigned short) val < sptr[mid].low)
+		high = mid - 1;
+	    else if ((unsigned short) val > sptr[mid].high)
+		low = mid + 1;
+	    else
+		return 1;
+	}
+    } else {
+	if (rptr->nbLongRange == 0)
+	    return 0;
+	low = 0;
+	high = rptr->nbLongRange;
+	lptr = rptr->longRange;
+	while (low <= high) {
+	    mid = (low + high) / 2;
+	    if (val < lptr[mid].low)
+		high = mid - 1;
+	    else if (val > lptr[mid].high)
+		low = mid + 1;
+	    else
+		return 1;
+	}
+    }
+    return 0;
+}
+
+""");
+
+output.close()
diff --git a/include/libxml/Makefile.am b/include/libxml/Makefile.am
index b9b7014..1bf338c 100644
--- a/include/libxml/Makefile.am
+++ b/include/libxml/Makefile.am
@@ -43,7 +43,8 @@
 		dict.h \
 		SAX2.h \
 		xmlexports.h \
-		xmldwalk.h
+		xmldwalk.h \
+		chvalid.h
 
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(xmlincdir)
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 800cdef..2956064 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -13,6 +13,7 @@
 #include <libxml/xmlversion.h>
 #include <libxml/parser.h>
 #include <libxml/HTMLparser.h>
+#include <libxml/chvalid.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -48,8 +49,7 @@
  * [2] Char ::= #x9 | #xA | #xD | [#x20...]
  * any byte character in the accepted range
  */
-#define IS_BYTE_CHAR(c)							\
-    (((c) >= 0x20) || ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D))
+#define IS_BYTE_CHAR(c)	 xmlIsChar_ch(c)
 
 /**
  * IS_CHAR:
@@ -61,11 +61,7 @@
  *                  | [#x10000-#x10FFFF]
  * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
  */
-#define IS_CHAR(c)							\
-    ((((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
-     ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||			\
-     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
-     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
+#define IS_CHAR(c)   xmlIsChar(c)
 
 /**
  * IS_BLANK:
@@ -75,8 +71,7 @@
  *
  * [3] S ::= (#x20 | #x9 | #xD | #xA)+
  */
-#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) ||	\
-                     ((c) == 0x0D))
+#define IS_BLANK(c)  xmlIsBlank(c)
 
 /**
  * IS_BASECHAR:
@@ -193,15 +188,7 @@
 /*
  * Function to finish the work of the macros where needed.
  */
-XMLPUBFUN int XMLCALL			xmlIsBaseChar	(int c);
-XMLPUBFUN int XMLCALL			xmlIsBlank	(int c);
-XMLPUBFUN int XMLCALL			xmlIsPubidChar	(int c);
-XMLPUBFUN int XMLCALL			xmlIsLetter	(int c);
-XMLPUBFUN int XMLCALL			xmlIsDigit	(int c);
-XMLPUBFUN int XMLCALL			xmlIsIdeographic(int c);
-XMLPUBFUN int XMLCALL			xmlIsExtender	(int c);
-XMLPUBFUN int XMLCALL			xmlIsCombining	(int c);
-XMLPUBFUN int XMLCALL			xmlIsChar	(int c);
+XMLPUBFUN int XMLCALL                   xmlIsLetter     (int c);
 
 /**
  * Parser context.
diff --git a/parserInternals.c b/parserInternals.c
index 562c578..2d1bd69 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -53,6 +53,7 @@
 #include <libxml/catalog.h>
 #endif
 #include <libxml/globals.h>
+#include <libxml/chvalid.h>
 
 /*
  * Various global defaults for parsing
@@ -196,472 +197,6 @@
     }
 }
 
-/************************************************************************
- *									*
- * 		Some functions to avoid too large macros		*
- *									*
- ************************************************************************/
-
-/**
- * xmlIsChar:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
- *                  | [#x10000-#x10FFFF]
- * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
- * Also available as a macro IS_CHAR()
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsChar(int c) {
-    return(
-     ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||
-     (((c) >= 0x20) && ((c) <= 0xD7FF)) ||
-     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||
-     (((c) >= 0x10000) && ((c) <= 0x10FFFF)));
-}
-
-/**
- * xmlIsBlank:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [3] S ::= (#x20 | #x9 | #xD | #xA)+
- * Also available as a macro IS_BLANK()
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsBlank(int c) {
-    return(((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || ((c) == 0x0D));
-}
-
-static int xmlBaseArray[] = {
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
-  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
-  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
-  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
-  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
-  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
-  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
-};
-
-/**
- * xmlIsBaseChar:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [85] BaseChar ::= ... long list see REC ...
- *
- * VI is your friend !
- * :1,$ s/\[#x\([0-9A-Z]*\)-#x\([0-9A-Z]*\)\]/     (((c) >= 0x\1) \&\& ((c) <= 0x\2)) ||/
- * and 
- * :1,$ s/#x\([0-9A-Z]*\)/     ((c) == 0x\1) ||/
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsBaseChar(int c) {
-    if (c < 0x0100) return(xmlBaseArray[c]);
-    return((((c) >= 0x0100) && ((c) <= 0x0131)) ||
-      (((c) >= 0x0134) && ((c) <= 0x013E)) ||
-      (((c) >= 0x0141) && ((c) <= 0x0148)) ||
-      (((c) >= 0x014A) && ((c) <= 0x017E)) ||
-      (((c) >= 0x0180) && ((c) <= 0x01C3)) ||
-      (((c) >= 0x01CD) && ((c) <= 0x01F0)) ||
-      (((c) >= 0x01F4) && ((c) <= 0x01F5)) ||
-      (((c) >= 0x01FA) && ((c) <= 0x0217)) ||
-      (((c) >= 0x0250) && ((c) <= 0x02A8)) ||
-      (((c) >= 0x02BB) && ((c) <= 0x02C1)) ||
-      ((c) == 0x0386) ||
-      (((c) >= 0x0388) && ((c) <= 0x038A)) ||
-      ((c) == 0x038C) ||
-      (((c) >= 0x038E) && ((c) <= 0x03A1)) ||
-      (((c) >= 0x03A3) && ((c) <= 0x03CE)) ||
-      (((c) >= 0x03D0) && ((c) <= 0x03D6)) ||
-      ((c) == 0x03DA) ||
-      ((c) == 0x03DC) ||
-      ((c) == 0x03DE) ||
-      ((c) == 0x03E0) ||
-      (((c) >= 0x03E2) && ((c) <= 0x03F3)) ||
-      (((c) >= 0x0401) && ((c) <= 0x040C)) ||
-      (((c) >= 0x040E) && ((c) <= 0x044F)) ||
-      (((c) >= 0x0451) && ((c) <= 0x045C)) ||
-      (((c) >= 0x045E) && ((c) <= 0x0481)) ||
-      (((c) >= 0x0490) && ((c) <= 0x04C4)) ||
-      (((c) >= 0x04C7) && ((c) <= 0x04C8)) ||
-      (((c) >= 0x04CB) && ((c) <= 0x04CC)) ||
-      (((c) >= 0x04D0) && ((c) <= 0x04EB)) ||
-      (((c) >= 0x04EE) && ((c) <= 0x04F5)) ||
-      (((c) >= 0x04F8) && ((c) <= 0x04F9)) ||
-      (((c) >= 0x0531) && ((c) <= 0x0556)) ||
-      ((c) == 0x0559) ||
-      (((c) >= 0x0561) && ((c) <= 0x0586)) ||
-      (((c) >= 0x05D0) && ((c) <= 0x05EA)) ||
-      (((c) >= 0x05F0) && ((c) <= 0x05F2)) ||
-      (((c) >= 0x0621) && ((c) <= 0x063A)) ||
-      (((c) >= 0x0641) && ((c) <= 0x064A)) ||
-      (((c) >= 0x0671) && ((c) <= 0x06B7)) ||
-      (((c) >= 0x06BA) && ((c) <= 0x06BE)) ||
-      (((c) >= 0x06C0) && ((c) <= 0x06CE)) ||
-      (((c) >= 0x06D0) && ((c) <= 0x06D3)) ||
-      ((c) == 0x06D5) ||
-      (((c) >= 0x06E5) && ((c) <= 0x06E6)) ||
-     (((c) >= 0x905) && (	/* accelerator */
-      (((c) >= 0x0905) && ((c) <= 0x0939)) ||
-      ((c) == 0x093D) ||
-      (((c) >= 0x0958) && ((c) <= 0x0961)) ||
-      (((c) >= 0x0985) && ((c) <= 0x098C)) ||
-      (((c) >= 0x098F) && ((c) <= 0x0990)) ||
-      (((c) >= 0x0993) && ((c) <= 0x09A8)) ||
-      (((c) >= 0x09AA) && ((c) <= 0x09B0)) ||
-      ((c) == 0x09B2) ||
-      (((c) >= 0x09B6) && ((c) <= 0x09B9)) ||
-      (((c) >= 0x09DC) && ((c) <= 0x09DD)) ||
-      (((c) >= 0x09DF) && ((c) <= 0x09E1)) ||
-      (((c) >= 0x09F0) && ((c) <= 0x09F1)) ||
-      (((c) >= 0x0A05) && ((c) <= 0x0A0A)) ||
-      (((c) >= 0x0A0F) && ((c) <= 0x0A10)) ||
-      (((c) >= 0x0A13) && ((c) <= 0x0A28)) ||
-      (((c) >= 0x0A2A) && ((c) <= 0x0A30)) ||
-      (((c) >= 0x0A32) && ((c) <= 0x0A33)) ||
-      (((c) >= 0x0A35) && ((c) <= 0x0A36)) ||
-      (((c) >= 0x0A38) && ((c) <= 0x0A39)) ||
-      (((c) >= 0x0A59) && ((c) <= 0x0A5C)) ||
-      ((c) == 0x0A5E) ||
-      (((c) >= 0x0A72) && ((c) <= 0x0A74)) ||
-      (((c) >= 0x0A85) && ((c) <= 0x0A8B)) ||
-      ((c) == 0x0A8D) ||
-      (((c) >= 0x0A8F) && ((c) <= 0x0A91)) ||
-      (((c) >= 0x0A93) && ((c) <= 0x0AA8)) ||
-      (((c) >= 0x0AAA) && ((c) <= 0x0AB0)) ||
-      (((c) >= 0x0AB2) && ((c) <= 0x0AB3)) ||
-      (((c) >= 0x0AB5) && ((c) <= 0x0AB9)) ||
-      ((c) == 0x0ABD) ||
-      ((c) == 0x0AE0) ||
-      (((c) >= 0x0B05) && ((c) <= 0x0B0C)) ||
-      (((c) >= 0x0B0F) && ((c) <= 0x0B10)) ||
-      (((c) >= 0x0B13) && ((c) <= 0x0B28)) ||
-      (((c) >= 0x0B2A) && ((c) <= 0x0B30)) ||
-      (((c) >= 0x0B32) && ((c) <= 0x0B33)) ||
-      (((c) >= 0x0B36) && ((c) <= 0x0B39)) ||
-      ((c) == 0x0B3D) ||
-      (((c) >= 0x0B5C) && ((c) <= 0x0B5D)) ||
-      (((c) >= 0x0B5F) && ((c) <= 0x0B61)) ||
-      (((c) >= 0x0B85) && ((c) <= 0x0B8A)) ||
-      (((c) >= 0x0B8E) && ((c) <= 0x0B90)) ||
-      (((c) >= 0x0B92) && ((c) <= 0x0B95)) ||
-      (((c) >= 0x0B99) && ((c) <= 0x0B9A)) ||
-      ((c) == 0x0B9C) ||
-      (((c) >= 0x0B9E) && ((c) <= 0x0B9F)) ||
-      (((c) >= 0x0BA3) && ((c) <= 0x0BA4)) ||
-      (((c) >= 0x0BA8) && ((c) <= 0x0BAA)) ||
-      (((c) >= 0x0BAE) && ((c) <= 0x0BB5)) ||
-      (((c) >= 0x0BB7) && ((c) <= 0x0BB9)) ||
-      (((c) >= 0x0C05) && ((c) <= 0x0C0C)) ||
-      (((c) >= 0x0C0E) && ((c) <= 0x0C10)) ||
-      (((c) >= 0x0C12) && ((c) <= 0x0C28)) ||
-      (((c) >= 0x0C2A) && ((c) <= 0x0C33)) ||
-      (((c) >= 0x0C35) && ((c) <= 0x0C39)) ||
-      (((c) >= 0x0C60) && ((c) <= 0x0C61)) ||
-      (((c) >= 0x0C85) && ((c) <= 0x0C8C)) ||
-      (((c) >= 0x0C8E) && ((c) <= 0x0C90)) ||
-      (((c) >= 0x0C92) && ((c) <= 0x0CA8)) ||
-      (((c) >= 0x0CAA) && ((c) <= 0x0CB3)) ||
-      (((c) >= 0x0CB5) && ((c) <= 0x0CB9)) ||
-      ((c) == 0x0CDE) ||
-      (((c) >= 0x0CE0) && ((c) <= 0x0CE1)) ||
-      (((c) >= 0x0D05) && ((c) <= 0x0D0C)) ||
-      (((c) >= 0x0D0E) && ((c) <= 0x0D10)) ||
-      (((c) >= 0x0D12) && ((c) <= 0x0D28)) ||
-      (((c) >= 0x0D2A) && ((c) <= 0x0D39)) ||
-      (((c) >= 0x0D60) && ((c) <= 0x0D61)) ||
-      (((c) >= 0x0E01) && ((c) <= 0x0E2E)) ||
-      ((c) == 0x0E30) ||
-      (((c) >= 0x0E32) && ((c) <= 0x0E33)) ||
-      (((c) >= 0x0E40) && ((c) <= 0x0E45)) ||
-      (((c) >= 0x0E81) && ((c) <= 0x0E82)) ||
-      ((c) == 0x0E84) ||
-      (((c) >= 0x0E87) && ((c) <= 0x0E88)) ||
-      ((c) == 0x0E8A) ||
-      ((c) == 0x0E8D) ||
-      (((c) >= 0x0E94) && ((c) <= 0x0E97)) ||
-      (((c) >= 0x0E99) && ((c) <= 0x0E9F)) ||
-      (((c) >= 0x0EA1) && ((c) <= 0x0EA3)) ||
-      ((c) == 0x0EA5) ||
-      ((c) == 0x0EA7) ||
-      (((c) >= 0x0EAA) && ((c) <= 0x0EAB)) ||
-      (((c) >= 0x0EAD) && ((c) <= 0x0EAE)) ||
-      ((c) == 0x0EB0) ||
-      (((c) >= 0x0EB2) && ((c) <= 0x0EB3)) ||
-      ((c) == 0x0EBD) ||
-      (((c) >= 0x0EC0) && ((c) <= 0x0EC4)) ||
-      (((c) >= 0x0F40) && ((c) <= 0x0F47)) ||
-      (((c) >= 0x0F49) && ((c) <= 0x0F69)) ||
-     (((c) >= 0x10A0) && (	/* accelerator */
-      (((c) >= 0x10A0) && ((c) <= 0x10C5)) ||
-      (((c) >= 0x10D0) && ((c) <= 0x10F6)) ||
-      ((c) == 0x1100) ||
-      (((c) >= 0x1102) && ((c) <= 0x1103)) ||
-      (((c) >= 0x1105) && ((c) <= 0x1107)) ||
-      ((c) == 0x1109) ||
-      (((c) >= 0x110B) && ((c) <= 0x110C)) ||
-      (((c) >= 0x110E) && ((c) <= 0x1112)) ||
-      ((c) == 0x113C) ||
-      ((c) == 0x113E) ||
-      ((c) == 0x1140) ||
-      ((c) == 0x114C) ||
-      ((c) == 0x114E) ||
-      ((c) == 0x1150) ||
-      (((c) >= 0x1154) && ((c) <= 0x1155)) ||
-      ((c) == 0x1159) ||
-      (((c) >= 0x115F) && ((c) <= 0x1161)) ||
-      ((c) == 0x1163) ||
-      ((c) == 0x1165) ||
-      ((c) == 0x1167) ||
-      ((c) == 0x1169) ||
-      (((c) >= 0x116D) && ((c) <= 0x116E)) ||
-      (((c) >= 0x1172) && ((c) <= 0x1173)) ||
-      ((c) == 0x1175) ||
-      ((c) == 0x119E) ||
-      ((c) == 0x11A8) ||
-      ((c) == 0x11AB) ||
-      (((c) >= 0x11AE) && ((c) <= 0x11AF)) ||
-      (((c) >= 0x11B7) && ((c) <= 0x11B8)) ||
-      ((c) == 0x11BA) ||
-      (((c) >= 0x11BC) && ((c) <= 0x11C2)) ||
-      ((c) == 0x11EB) ||
-      ((c) == 0x11F0) ||
-      ((c) == 0x11F9) ||
-      (((c) >= 0x1E00) && ((c) <= 0x1E9B)) ||
-      (((c) >= 0x1EA0) && ((c) <= 0x1EF9)) ||
-      (((c) >= 0x1F00) && ((c) <= 0x1F15)) ||
-      (((c) >= 0x1F18) && ((c) <= 0x1F1D)) ||
-      (((c) >= 0x1F20) && ((c) <= 0x1F45)) ||
-      (((c) >= 0x1F48) && ((c) <= 0x1F4D)) ||
-      (((c) >= 0x1F50) && ((c) <= 0x1F57)) ||
-      ((c) == 0x1F59) ||
-      ((c) == 0x1F5B) ||
-      ((c) == 0x1F5D) ||
-      (((c) >= 0x1F5F) && ((c) <= 0x1F7D)) ||
-      (((c) >= 0x1F80) && ((c) <= 0x1FB4)) ||
-      (((c) >= 0x1FB6) && ((c) <= 0x1FBC)) ||
-      ((c) == 0x1FBE) ||
-      (((c) >= 0x1FC2) && ((c) <= 0x1FC4)) ||
-      (((c) >= 0x1FC6) && ((c) <= 0x1FCC)) ||
-      (((c) >= 0x1FD0) && ((c) <= 0x1FD3)) ||
-      (((c) >= 0x1FD6) && ((c) <= 0x1FDB)) ||
-      (((c) >= 0x1FE0) && ((c) <= 0x1FEC)) ||
-      (((c) >= 0x1FF2) && ((c) <= 0x1FF4)) ||
-      (((c) >= 0x1FF6) && ((c) <= 0x1FFC)) ||
-      ((c) == 0x2126) ||
-      (((c) >= 0x212A) && ((c) <= 0x212B)) ||
-      ((c) == 0x212E) ||
-      (((c) >= 0x2180) && ((c) <= 0x2182)) ||
-      (((c) >= 0x3041) && ((c) <= 0x3094)) ||
-      (((c) >= 0x30A1) && ((c) <= 0x30FA)) ||
-      (((c) >= 0x3105) && ((c) <= 0x312C)) ||
-      (((c) >= 0xAC00) && ((c) <= 0xD7A3))) /* accelerators */
-      ))));
-}
-
-/**
- * xmlIsDigit:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [88] Digit ::= ... long list see REC ...
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsDigit(int c) {
-    return(
-      (((c) >= 0x0030) && ((c) <= 0x0039)) ||
-     (((c) >= 0x660) && (	/* accelerator */
-      (((c) >= 0x0660) && ((c) <= 0x0669)) ||
-      (((c) >= 0x06F0) && ((c) <= 0x06F9)) ||
-      (((c) >= 0x0966) && ((c) <= 0x096F)) ||
-      (((c) >= 0x09E6) && ((c) <= 0x09EF)) ||
-      (((c) >= 0x0A66) && ((c) <= 0x0A6F)) ||
-      (((c) >= 0x0AE6) && ((c) <= 0x0AEF)) ||
-      (((c) >= 0x0B66) && ((c) <= 0x0B6F)) ||
-      (((c) >= 0x0BE7) && ((c) <= 0x0BEF)) ||
-      (((c) >= 0x0C66) && ((c) <= 0x0C6F)) ||
-      (((c) >= 0x0CE6) && ((c) <= 0x0CEF)) ||
-      (((c) >= 0x0D66) && ((c) <= 0x0D6F)) ||
-      (((c) >= 0x0E50) && ((c) <= 0x0E59)) ||
-      (((c) >= 0x0ED0) && ((c) <= 0x0ED9)) ||
-      (((c) >= 0x0F20) && ((c) <= 0x0F29))) /* accelerator */ ));
-}
-
-/**
- * xmlIsCombining:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [87] CombiningChar ::= ... long list see REC ...
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsCombining(int c) {
-    return(
-     (((c) >= 0x300) && (	/* accelerator */
-      (((c) >= 0x0300) && ((c) <= 0x0345)) ||
-      (((c) >= 0x0360) && ((c) <= 0x0361)) ||
-      (((c) >= 0x0483) && ((c) <= 0x0486)) ||
-      (((c) >= 0x0591) && ((c) <= 0x05A1)) ||
-      (((c) >= 0x05A3) && ((c) <= 0x05B9)) ||
-      (((c) >= 0x05BB) && ((c) <= 0x05BD)) ||
-      ((c) == 0x05BF) ||
-      (((c) >= 0x05C1) && ((c) <= 0x05C2)) ||
-      ((c) == 0x05C4) ||
-      (((c) >= 0x064B) && ((c) <= 0x0652)) ||
-      ((c) == 0x0670) ||
-      (((c) >= 0x06D6) && ((c) <= 0x06DC)) ||
-      (((c) >= 0x06DD) && ((c) <= 0x06DF)) ||
-      (((c) >= 0x06E0) && ((c) <= 0x06E4)) ||
-      (((c) >= 0x06E7) && ((c) <= 0x06E8)) ||
-      (((c) >= 0x06EA) && ((c) <= 0x06ED)) ||
-     (((c) >= 0x0901) && (	/* accelerator */
-      (((c) >= 0x0901) && ((c) <= 0x0903)) ||
-      ((c) == 0x093C) ||
-      (((c) >= 0x093E) && ((c) <= 0x094C)) ||
-      ((c) == 0x094D) ||
-      (((c) >= 0x0951) && ((c) <= 0x0954)) ||
-      (((c) >= 0x0962) && ((c) <= 0x0963)) ||
-      (((c) >= 0x0981) && ((c) <= 0x0983)) ||
-      ((c) == 0x09BC) ||
-      ((c) == 0x09BE) ||
-      ((c) == 0x09BF) ||
-      (((c) >= 0x09C0) && ((c) <= 0x09C4)) ||
-      (((c) >= 0x09C7) && ((c) <= 0x09C8)) ||
-      (((c) >= 0x09CB) && ((c) <= 0x09CD)) ||
-      ((c) == 0x09D7) ||
-      (((c) >= 0x09E2) && ((c) <= 0x09E3)) ||
-     (((c) >= 0x0A02) && (	/* accelerator */
-      ((c) == 0x0A02) ||
-      ((c) == 0x0A3C) ||
-      ((c) == 0x0A3E) ||
-      ((c) == 0x0A3F) ||
-      (((c) >= 0x0A40) && ((c) <= 0x0A42)) ||
-      (((c) >= 0x0A47) && ((c) <= 0x0A48)) ||
-      (((c) >= 0x0A4B) && ((c) <= 0x0A4D)) ||
-      (((c) >= 0x0A70) && ((c) <= 0x0A71)) ||
-      (((c) >= 0x0A81) && ((c) <= 0x0A83)) ||
-      ((c) == 0x0ABC) ||
-      (((c) >= 0x0ABE) && ((c) <= 0x0AC5)) ||
-      (((c) >= 0x0AC7) && ((c) <= 0x0AC9)) ||
-      (((c) >= 0x0ACB) && ((c) <= 0x0ACD)) ||
-      (((c) >= 0x0B01) && ((c) <= 0x0B03)) ||
-      ((c) == 0x0B3C) ||
-      (((c) >= 0x0B3E) && ((c) <= 0x0B43)) ||
-      (((c) >= 0x0B47) && ((c) <= 0x0B48)) ||
-      (((c) >= 0x0B4B) && ((c) <= 0x0B4D)) ||
-      (((c) >= 0x0B56) && ((c) <= 0x0B57)) ||
-      (((c) >= 0x0B82) && ((c) <= 0x0B83)) ||
-      (((c) >= 0x0BBE) && ((c) <= 0x0BC2)) ||
-      (((c) >= 0x0BC6) && ((c) <= 0x0BC8)) ||
-      (((c) >= 0x0BCA) && ((c) <= 0x0BCD)) ||
-      ((c) == 0x0BD7) ||
-      (((c) >= 0x0C01) && ((c) <= 0x0C03)) ||
-      (((c) >= 0x0C3E) && ((c) <= 0x0C44)) ||
-      (((c) >= 0x0C46) && ((c) <= 0x0C48)) ||
-      (((c) >= 0x0C4A) && ((c) <= 0x0C4D)) ||
-      (((c) >= 0x0C55) && ((c) <= 0x0C56)) ||
-      (((c) >= 0x0C82) && ((c) <= 0x0C83)) ||
-      (((c) >= 0x0CBE) && ((c) <= 0x0CC4)) ||
-      (((c) >= 0x0CC6) && ((c) <= 0x0CC8)) ||
-      (((c) >= 0x0CCA) && ((c) <= 0x0CCD)) ||
-      (((c) >= 0x0CD5) && ((c) <= 0x0CD6)) ||
-      (((c) >= 0x0D02) && ((c) <= 0x0D03)) ||
-      (((c) >= 0x0D3E) && ((c) <= 0x0D43)) ||
-      (((c) >= 0x0D46) && ((c) <= 0x0D48)) ||
-      (((c) >= 0x0D4A) && ((c) <= 0x0D4D)) ||
-      ((c) == 0x0D57) ||
-     (((c) >= 0x0E31) && (	/* accelerator */
-      ((c) == 0x0E31) ||
-      (((c) >= 0x0E34) && ((c) <= 0x0E3A)) ||
-      (((c) >= 0x0E47) && ((c) <= 0x0E4E)) ||
-      ((c) == 0x0EB1) ||
-      (((c) >= 0x0EB4) && ((c) <= 0x0EB9)) ||
-      (((c) >= 0x0EBB) && ((c) <= 0x0EBC)) ||
-      (((c) >= 0x0EC8) && ((c) <= 0x0ECD)) ||
-      (((c) >= 0x0F18) && ((c) <= 0x0F19)) ||
-      ((c) == 0x0F35) ||
-      ((c) == 0x0F37) ||
-      ((c) == 0x0F39) ||
-      ((c) == 0x0F3E) ||
-      ((c) == 0x0F3F) ||
-      (((c) >= 0x0F71) && ((c) <= 0x0F84)) ||
-      (((c) >= 0x0F86) && ((c) <= 0x0F8B)) ||
-      (((c) >= 0x0F90) && ((c) <= 0x0F95)) ||
-      ((c) == 0x0F97) ||
-      (((c) >= 0x0F99) && ((c) <= 0x0FAD)) ||
-      (((c) >= 0x0FB1) && ((c) <= 0x0FB7)) ||
-      ((c) == 0x0FB9) ||
-      (((c) >= 0x20D0) && ((c) <= 0x20DC)) ||
-      ((c) == 0x20E1) ||
-      (((c) >= 0x302A) && ((c) <= 0x302F)) ||
-      ((c) == 0x3099) ||
-      ((c) == 0x309A))))))))));
-}
-
-/**
- * xmlIsExtender:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
- *                   #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
- *                   [#x309D-#x309E] | [#x30FC-#x30FE]
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsExtender(int c) {
-    switch (c) {
-    case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387:
-    case 0x0640: case 0x0E46: case 0x0EC6: case 0x3005:
-    case 0x3031: case 0x3032: case 0x3033: case 0x3034:
-    case 0x3035: case 0x309D: case 0x309E: case 0x30FC:
-    case 0x30FD: case 0x30FE:
-	return 1;
-    default:
-	return 0;
-    }
-}
-
-/**
- * xmlIsIdeographic:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsIdeographic(int c) {
-    return(((c) < 0x0100) ? 0 :
-     (((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||
-     (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||
-     (((c) >= 0x3021) && ((c) <= 0x3029)) ||
-      ((c) == 0x3007));
-}
-
 /**
  * xmlIsLetter:
  * @c:  an unicode character (int)
@@ -676,29 +211,6 @@
     return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
 }
 
-/**
- * xmlIsPubidChar:
- * @c:  an unicode character (int)
- *
- * Check whether the character is allowed by the production
- * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
- *
- * Returns 0 if not, non-zero otherwise
- */
-int
-xmlIsPubidChar(int c) {
-    return(
-     ((c) == 0x20) || ((c) == 0x0D) || ((c) == 0x0A) ||
-     (((c) >= 'a') && ((c) <= 'z')) ||
-     (((c) >= 'A') && ((c) <= 'Z')) ||
-     (((c) >= '0') && ((c) <= '9')) ||
-     ((c) == '-') || ((c) == '\'') || ((c) == '(') || ((c) == ')') ||
-     ((c) == '+') || ((c) == ',') || ((c) == '.') || ((c) == '/') ||
-     ((c) == ':') || ((c) == '=') || ((c) == '?') || ((c) == ';') ||
-     ((c) == '!') || ((c) == '*') || ((c) == '#') || ((c) == '@') ||
-     ((c) == '$') || ((c) == '_') || ((c) == '%'));
-}
-
 /************************************************************************
  *									*
  * 		Input handling functions for progressive parsing	*