fixed validation of attributes content of type NAME NAMES NMTOKEN and

* valid.c: fixed validation of attributes content of type
  NAME NAMES NMTOKEN and NMTOKENS to accept internationalized
  values, very old bug. Fixes #67671
Daniel
diff --git a/ChangeLog b/ChangeLog
index db91021..7a50abb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Sun Jan 13 16:37:15 CET 2002 Daniel Veillard <daniel@veillard.com>
+
+	* valid.c: fixed validation of attributes content of type
+	  NAME NAMES NMTOKEN and NMTOKENS to accept internationalized
+	  values, very old bug. Fixes #67671
+
 Sun Jan 13 15:07:49 CET 2002 Daniel Veillard <daniel@veillard.com>
 
 	* parser.c include/libxml/parserInternals.h tree.c: integrated
diff --git a/parserInternals.c b/parserInternals.c
index 9a570c9..4af5d71 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1391,68 +1391,69 @@
  */
 
 int
-xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
+xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
+{
     if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
-	/*
-	 * We are supposed to handle UTF8, check it's valid
-	 * From rfc2044: encoding of the Unicode values on UTF-8:
-	 *
-	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
-	 * 0000 0000-0000 007F   0xxxxxxx
-	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
-	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
-	 *
-	 * Check for the 0x110000 limit too
-	 */
-	unsigned char c;
-	unsigned int val;
+        /*
+         * We are supposed to handle UTF8, check it's valid
+         * From rfc2044: encoding of the Unicode values on UTF-8:
+         *
+         * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
+         * 0000 0000-0000 007F   0xxxxxxx
+         * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
+         * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
+         *
+         * Check for the 0x110000 limit too
+         */
+        unsigned char c;
+        unsigned int val;
 
-	c = *cur;
-	if (c & 0x80) {
-	    if ((cur[1] & 0xc0) != 0x80)
-		goto encoding_error;
-	    if ((c & 0xe0) == 0xe0) {
+        c = *cur;
+        if (c & 0x80) {
+            if ((cur[1] & 0xc0) != 0x80)
+                goto encoding_error;
+            if ((c & 0xe0) == 0xe0) {
 
-		if ((cur[2] & 0xc0) != 0x80)
-		    goto encoding_error;
-		if ((c & 0xf0) == 0xf0) {
-		    if (((c & 0xf8) != 0xf0) ||
-			((cur[3] & 0xc0) != 0x80))
-			goto encoding_error;
-		    /* 4-byte code */
-		    *len = 4;
-		    val = (cur[0] & 0x7) << 18;
-		    val |= (cur[1] & 0x3f) << 12;
-		    val |= (cur[2] & 0x3f) << 6;
-		    val |= cur[3] & 0x3f;
-		} else {
-		  /* 3-byte code */
-		    *len = 3;
-		    val = (cur[0] & 0xf) << 12;
-		    val |= (cur[1] & 0x3f) << 6;
-		    val |= cur[2] & 0x3f;
-		}
-	    } else {
-	      /* 2-byte code */
-		*len = 2;
-		val = (cur[0] & 0x1f) << 6;
-		val |= cur[1] & 0x3f;
-	    }
-	    if (!IS_CHAR(val)) {
-		if ((ctxt->sax != NULL) &&
-		    (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData, 
-				     "Char 0x%X out of allowed range\n", val);
-		ctxt->errNo = XML_ERR_INVALID_ENCODING;
-		ctxt->wellFormed = 0;
-		ctxt->disableSAX = 1;
-	    }    
-	    return(val);
-	} else {
-	    /* 1-byte code */
-	    *len = 1;
-	    return((int) *cur);
-	}
+                if ((cur[2] & 0xc0) != 0x80)
+                    goto encoding_error;
+                if ((c & 0xf0) == 0xf0) {
+                    if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
+                        goto encoding_error;
+                    /* 4-byte code */
+                    *len = 4;
+                    val = (cur[0] & 0x7) << 18;
+                    val |= (cur[1] & 0x3f) << 12;
+                    val |= (cur[2] & 0x3f) << 6;
+                    val |= cur[3] & 0x3f;
+                } else {
+                    /* 3-byte code */
+                    *len = 3;
+                    val = (cur[0] & 0xf) << 12;
+                    val |= (cur[1] & 0x3f) << 6;
+                    val |= cur[2] & 0x3f;
+                }
+            } else {
+                /* 2-byte code */
+                *len = 2;
+                val = (cur[0] & 0x1f) << 6;
+                val |= cur[1] & 0x3f;
+            }
+            if (!IS_CHAR(val)) {
+                if ((ctxt != NULL) && (ctxt->sax != NULL) &&
+                    (ctxt->sax->error != NULL))
+                    ctxt->sax->error(ctxt->userData,
+                                     "Char 0x%X out of allowed range\n",
+                                     val);
+                ctxt->errNo = XML_ERR_INVALID_ENCODING;
+                ctxt->wellFormed = 0;
+                ctxt->disableSAX = 1;
+            }
+            return (val);
+        } else {
+            /* 1-byte code */
+            *len = 1;
+            return ((int) *cur);
+        }
     }
     /*
      * Assume it's a fixed length encoding (1) with
@@ -1460,8 +1461,9 @@
      * XML constructs only use < 128 chars
      */
     *len = 1;
-    return((int) *cur);
+    return ((int) *cur);
 encoding_error:
+
     /*
      * If we detect an UTF8 error that probably mean that the
      * input encoding didn't get properly advertised in the
@@ -1469,17 +1471,20 @@
      * to ISO-Latin-1 (if you don't like this policy, just declare the
      * encoding !)
      */
-    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
-	ctxt->sax->error(ctxt->userData, 
-			 "Input is not proper UTF-8, indicate encoding !\n");
-	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
-			ctxt->input->cur[0], ctxt->input->cur[1],
-			ctxt->input->cur[2], ctxt->input->cur[3]);
+    if (ctxt != NULL) {
+        if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
+            ctxt->sax->error(ctxt->userData,
+                         "Input is not proper UTF-8, indicate encoding !\n");
+            ctxt->sax->error(ctxt->userData,
+                             "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+                             ctxt->input->cur[0], ctxt->input->cur[1],
+                             ctxt->input->cur[2], ctxt->input->cur[3]);
+        }
+        ctxt->errNo = XML_ERR_INVALID_ENCODING;
     }
-    ctxt->errNo = XML_ERR_INVALID_ENCODING;
 
     *len = 1;
-    return((int) *cur);
+    return ((int) *cur);
 }
 
 /**
diff --git a/valid.c b/valid.c
index 24ce16a..55c7b9e 100644
--- a/valid.c
+++ b/valid.c
@@ -2565,23 +2565,29 @@
 static int
 xmlValidateNameValue(const xmlChar *value) {
     const xmlChar *cur;
+    int val, len;
 
     if (value == NULL) return(0);
     cur = value;
-    
-    if (!IS_LETTER(*cur) && (*cur != '_') &&
-        (*cur != ':')) {
+    val = xmlStringCurrentChar(NULL, cur, &len);
+    cur += len;
+    if (!IS_LETTER(val) && (val != '_') &&
+        (val != ':')) {
 	return(0);
     }
 
-    while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
-           (*cur == '.') || (*cur == '-') ||
-	   (*cur == '_') || (*cur == ':') || 
-	   (IS_COMBINING(*cur)) ||
-	   (IS_EXTENDER(*cur)))
-	   cur++;
+    val = xmlStringCurrentChar(NULL, cur, &len);
+    cur += len;
+    while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+           (val == '.') || (val == '-') ||
+	   (val == '_') || (val == ':') || 
+	   (IS_COMBINING(val)) ||
+	   (IS_EXTENDER(val))) {
+	val = xmlStringCurrentChar(NULL, cur, &len);
+	cur += len;
+    }
 
-    if (*cur != 0) return(0);
+    if (val != 0) return(0);
 
     return(1);
 }
@@ -2598,39 +2604,53 @@
 static int
 xmlValidateNamesValue(const xmlChar *value) {
     const xmlChar *cur;
+    int val, len;
 
     if (value == NULL) return(0);
     cur = value;
+    val = xmlStringCurrentChar(NULL, cur, &len);
+    cur += len;
     
-    if (!IS_LETTER(*cur) && (*cur != '_') &&
-        (*cur != ':')) {
+    if (!IS_LETTER(val) && (val != '_') &&
+        (val != ':')) {
 	return(0);
     }
 
-    while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
-           (*cur == '.') || (*cur == '-') ||
-	   (*cur == '_') || (*cur == ':') || 
-	   (IS_COMBINING(*cur)) ||
-	   (IS_EXTENDER(*cur)))
-	   cur++;
-
-    while (IS_BLANK(*cur)) {
-	while (IS_BLANK(*cur)) cur++;
-
-	if (!IS_LETTER(*cur) && (*cur != '_') &&
-	    (*cur != ':')) {
-	    return(0);
-	}
-
-	while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
-	       (*cur == '.') || (*cur == '-') ||
-	       (*cur == '_') || (*cur == ':') || 
-	       (IS_COMBINING(*cur)) ||
-	       (IS_EXTENDER(*cur)))
-	       cur++;
+    val = xmlStringCurrentChar(NULL, cur, &len);
+    cur += len;
+    while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+           (val == '.') || (val == '-') ||
+	   (val == '_') || (val == ':') || 
+	   (IS_COMBINING(val)) ||
+	   (IS_EXTENDER(val))) {
+	val = xmlStringCurrentChar(NULL, cur, &len);
+	cur += len;
     }
 
-    if (*cur != 0) return(0);
+    while (IS_BLANK(val)) {
+	while (IS_BLANK(val)) {
+	    val = xmlStringCurrentChar(NULL, cur, &len);
+	    cur += len;
+	}
+
+	if (!IS_LETTER(val) && (val != '_') &&
+	    (val != ':')) {
+	    return(0);
+	}
+	val = xmlStringCurrentChar(NULL, cur, &len);
+	cur += len;
+
+	while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+	       (val == '.') || (val == '-') ||
+	       (val == '_') || (val == ':') || 
+	       (IS_COMBINING(val)) ||
+	       (IS_EXTENDER(val))) {
+	    val = xmlStringCurrentChar(NULL, cur, &len);
+	    cur += len;
+	}
+    }
+
+    if (val != 0) return(0);
 
     return(1);
 }
@@ -2649,25 +2669,30 @@
 static int
 xmlValidateNmtokenValue(const xmlChar *value) {
     const xmlChar *cur;
+    int val, len;
 
     if (value == NULL) return(0);
     cur = value;
+    val = xmlStringCurrentChar(NULL, cur, &len);
+    cur += len;
     
-    if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
-        (*cur != '.') && (*cur != '-') &&
-        (*cur != '_') && (*cur != ':') && 
-        (!IS_COMBINING(*cur)) &&
-        (!IS_EXTENDER(*cur)))
+    if (!IS_LETTER(val) && !IS_DIGIT(val) &&
+        (val != '.') && (val != '-') &&
+        (val != '_') && (val != ':') && 
+        (!IS_COMBINING(val)) &&
+        (!IS_EXTENDER(val)))
 	return(0);
 
-    while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
-           (*cur == '.') || (*cur == '-') ||
-	   (*cur == '_') || (*cur == ':') || 
-	   (IS_COMBINING(*cur)) ||
-	   (IS_EXTENDER(*cur)))
-	   cur++;
+    while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+           (val == '.') || (val == '-') ||
+	   (val == '_') || (val == ':') || 
+	   (IS_COMBINING(val)) ||
+	   (IS_EXTENDER(val))) {
+	val = xmlStringCurrentChar(NULL, cur, &len);
+	cur += len;
+    }
 
-    if (*cur != 0) return(0);
+    if (val != 0) return(0);
 
     return(1);
 }
@@ -2686,45 +2711,59 @@
 static int
 xmlValidateNmtokensValue(const xmlChar *value) {
     const xmlChar *cur;
+    int val, len;
 
     if (value == NULL) return(0);
     cur = value;
+    val = xmlStringCurrentChar(NULL, cur, &len);
+    cur += len;
     
-    while (IS_BLANK(*cur)) cur++;
-    if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
-        (*cur != '.') && (*cur != '-') &&
-        (*cur != '_') && (*cur != ':') && 
-        (!IS_COMBINING(*cur)) &&
-        (!IS_EXTENDER(*cur)))
-	return(0);
-
-    while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
-           (*cur == '.') || (*cur == '-') ||
-	   (*cur == '_') || (*cur == ':') || 
-	   (IS_COMBINING(*cur)) ||
-	   (IS_EXTENDER(*cur)))
-	   cur++;
-
-    while (IS_BLANK(*cur)) {
-	while (IS_BLANK(*cur)) cur++;
-	if (*cur == 0) return(1);
-
-	if (!IS_LETTER(*cur) && !IS_DIGIT(*cur) &&
-	    (*cur != '.') && (*cur != '-') &&
-	    (*cur != '_') && (*cur != ':') && 
-	    (!IS_COMBINING(*cur)) &&
-	    (!IS_EXTENDER(*cur)))
-	    return(0);
-
-	while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
-	       (*cur == '.') || (*cur == '-') ||
-	       (*cur == '_') || (*cur == ':') || 
-	       (IS_COMBINING(*cur)) ||
-	       (IS_EXTENDER(*cur)))
-	       cur++;
+    while (IS_BLANK(val)) {
+	val = xmlStringCurrentChar(NULL, cur, &len);
+	cur += len;
     }
 
-    if (*cur != 0) return(0);
+    if (!IS_LETTER(val) && !IS_DIGIT(val) &&
+        (val != '.') && (val != '-') &&
+        (val != '_') && (val != ':') && 
+        (!IS_COMBINING(val)) &&
+        (!IS_EXTENDER(val)))
+	return(0);
+
+    while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+           (val == '.') || (val == '-') ||
+	   (val == '_') || (val == ':') || 
+	   (IS_COMBINING(val)) ||
+	   (IS_EXTENDER(val))) {
+	val = xmlStringCurrentChar(NULL, cur, &len);
+	cur += len;
+    }
+
+    while (IS_BLANK(val)) {
+	while (IS_BLANK(val)) {
+	    val = xmlStringCurrentChar(NULL, cur, &len);
+	    cur += len;
+	}
+	if (val == 0) return(1);
+
+	if (!IS_LETTER(val) && !IS_DIGIT(val) &&
+	    (val != '.') && (val != '-') &&
+	    (val != '_') && (val != ':') && 
+	    (!IS_COMBINING(val)) &&
+	    (!IS_EXTENDER(val)))
+	    return(0);
+
+	while ((IS_LETTER(val)) || (IS_DIGIT(val)) ||
+	       (val == '.') || (val == '-') ||
+	       (val == '_') || (val == ':') || 
+	       (IS_COMBINING(val)) ||
+	       (IS_EXTENDER(val))) {
+	    val = xmlStringCurrentChar(NULL, cur, &len);
+	    cur += len;
+	}
+    }
+
+    if (val != 0) return(0);
 
     return(1);
 }