had to change 2 internal parsing API when processing document content to

* include/libxml/parserInternals.h parser.c: had to change
  2 internal parsing API when processing document content
  to check the start and end of element content are defined
  in the same entity
* valid.c include/libxml/valid.h: attribute normalization can
  generate a validity error added xmlValidCtxtNormalizeAttributeValue()
  with the context to report it.
* SAX.c: fixed the last known bugs, crazy validation constraints
  when a document is standalone seems correctly handled. There
  is a couple of open issues left which need consideration especially
  PE93 on external unparsed entities and standalone status.
  Ran 1819 tests: 1817 suceeded, 2 failed and 0 generated an error in 8.26 s.
  The 2 tests left failing are actually in error. Cleanup done.
Daniel
diff --git a/ChangeLog b/ChangeLog
index 6ccf4ba..b099f07 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+Tue Feb 19 22:01:35 CET 2002 Daniel Veillard <daniel@veillard.com>
+
+	* include/libxml/parserInternals.h parser.c: had to change
+	  2 internal parsing API when processing document content
+	  to check the start and end of element content are defined
+	  in the same entity
+	* valid.c include/libxml/valid.h: attribute normalization can
+	  generate a validity error added xmlValidCtxtNormalizeAttributeValue()
+	  with the context to report it.
+	* SAX.c: fixed the last known bugs, crazy validation constraints
+	  when a document is standalone seems correctly handled. There
+	  is a couple of open issues left which need consideration especially
+	  PE93 on external unparsed entities and standalone status. 
+	  Ran 1819 tests: 1817 suceeded, 2 failed and 0 generated an error in 8.26 s.
+	  The 2 tests left failing are actually in error. Cleanup done.
+
 Tue Feb 19 15:17:02 CET 2002 Daniel Veillard <daniel@veillard.com>
 
 	* valid.c: implemented E59 spaces in CDATA does not match the
diff --git a/SAX.c b/SAX.c
index cbf21cf..2665998 100644
--- a/SAX.c
+++ b/SAX.c
@@ -830,8 +830,13 @@
      * Needed for HTML too:
      *   http://www.w3.org/TR/html4/types.html#h-6.2
      */
-    nval = xmlValidNormalizeAttributeValue(ctxt->myDoc, ctxt->node,
+    ctxt->vctxt.valid = 1;
+    nval = xmlValidCtxtNormalizeAttributeValue(&ctxt->vctxt,
+	                                   ctxt->myDoc, ctxt->node,
 					   fullname, value);
+    if (ctxt->vctxt.valid != 1) {
+	ctxt->valid = 0;
+    }
     if (nval != NULL)
 	value = nval;
 
@@ -985,16 +990,21 @@
  * Check defaulted attributes from the DTD
  */
 static void
-xmlCheckDefaultedAttributesFromDtd(xmlParserCtxtPtr ctxt,
-	xmlDtdPtr dtd, const xmlChar *name,
+xmlCheckDefaultedAttributes(xmlParserCtxtPtr ctxt, const xmlChar *name,
 	const xmlChar *prefix, const xmlChar **atts) {
     xmlElementPtr elemDecl;
     const xmlChar *att;
+    int internal = 1;
     int i;
 
-    if ((dtd == NULL) || (name == NULL))
-	return;
-    elemDecl = xmlGetDtdQElementDesc(dtd, name, prefix);
+    elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->intSubset, name, prefix);
+    if (elemDecl == NULL) {
+	elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset, name, prefix);
+	internal = 0;
+    }
+
+process_external_subset:
+
     if (elemDecl != NULL) {
 	xmlAttributePtr attr = elemDecl->attributes;
 	/*
@@ -1008,7 +1018,10 @@
 		if ((attr->defaultValue != NULL) &&
 		    (xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset,
 					attr->elem, attr->name,
-					attr->prefix) == attr)) {
+					attr->prefix) == attr) &&
+		    (xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset,
+					attr->elem, attr->name,
+					attr->prefix) == NULL)) {
 		    xmlChar *fulln;
 
 		    if (attr->prefix != NULL) {
@@ -1039,9 +1052,7 @@
 			    ctxt->vctxt.error(ctxt->vctxt.userData,
       "standalone: attribute %s on %s defaulted from external subset\n",
 					      fulln, attr->elem);
-			/* Waiting on the XML Core WG decision on this
 			ctxt->valid = 0;
-			 */
 		    }
 		}
 		attr = attr->nexth;
@@ -1053,7 +1064,18 @@
 	 */
 	attr = elemDecl->attributes;
 	while (attr != NULL) {
-	    if (attr->defaultValue != NULL) {
+	    /*
+	     * Make sure that attributes redefinition occuring in the
+	     * internal subset are not overriden by definitions in the
+	     * external subset.
+	     */
+	    if ((attr->defaultValue != NULL) &&
+		(xmlGetDtdQAttrDesc(ctxt->myDoc->extSubset,
+				    attr->elem, attr->name,
+				    attr->prefix) == attr) &&
+		(xmlGetDtdQAttrDesc(ctxt->myDoc->intSubset,
+				    attr->elem, attr->name,
+				    attr->prefix) == NULL)) {
 		/*
 		 * the element should be instantiated in the tree if:
 		 *  - this is a namespace prefix
@@ -1090,13 +1112,20 @@
 			    att = atts[i];
 			}
 		    }
-		    if (att == NULL)
+		    if (att == NULL) {
 			attribute(ctxt, fulln, attr->defaultValue);
+		    }
 		    xmlFree(fulln);
 		}
 	    }
 	    attr = attr->nexth;
 	}
+	if (internal == 1) {
+	    elemDecl = xmlGetDtdQElementDesc(ctxt->myDoc->extSubset,
+		                             name, prefix);
+	    internal = 0;
+	    goto process_external_subset;
+	}
     }
 }
 
@@ -1206,12 +1235,7 @@
     if ((!ctxt->html) &&
 	((ctxt->myDoc->intSubset != NULL) ||
 	 (ctxt->myDoc->extSubset != NULL))) {
-	if (ctxt->myDoc->intSubset != NULL)
-	    xmlCheckDefaultedAttributesFromDtd(ctxt, ctxt->myDoc->intSubset,
-		                               name, prefix, atts);
-	if (ctxt->myDoc->extSubset != NULL)
-	    xmlCheckDefaultedAttributesFromDtd(ctxt, ctxt->myDoc->extSubset,
-		                               name, prefix, atts);
+	xmlCheckDefaultedAttributes(ctxt, name, prefix, atts);
     }
 
     /*
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 2b4c1a6..93e230e 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -273,14 +273,16 @@
 						 xmlEnumerationPtr *tree);
 void			xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
 xmlElementContentPtr	xmlParseElementMixedContentDecl
-						(xmlParserCtxtPtr ctxt);
+						(xmlParserCtxtPtr ctxt,
+						 xmlParserInputPtr inputchk);
 #ifdef VMS
 xmlElementContentPtr	xmlParseElementChildrenContentD
 						(xmlParserCtxtPtr ctxt);
 #define xmlParseElementChildrenContentDecl	xmlParseElementChildrenContentD
 #else
 xmlElementContentPtr	xmlParseElementChildrenContentDecl
-						(xmlParserCtxtPtr ctxt);
+						(xmlParserCtxtPtr ctxt,
+						 xmlParserInputPtr inputchk);
 #endif
 int			xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
 						 xmlChar *name,
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index d49a264..db427ff 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -223,6 +223,11 @@
 					 xmlNodePtr elem,
 					 const xmlChar *name,
 					 const xmlChar *value);
+xmlChar *	xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt,
+					 xmlDocPtr doc,
+					 xmlNodePtr elem,
+					 const xmlChar *name,
+					 const xmlChar *value);
 int		xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 		                         xmlAttributePtr attr);
diff --git a/parser.c b/parser.c
index 6d6df5b..09833e0 100644
--- a/parser.c
+++ b/parser.c
@@ -4132,7 +4132,7 @@
  * returns: the list of the xmlElementContentPtr describing the element choices
  */
 xmlElementContentPtr
-xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
+xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
     xmlElementContentPtr ret = NULL, cur = NULL, n;
     xmlChar *elem = NULL;
 
@@ -4145,7 +4145,13 @@
 	SKIP_BLANKS;
 	SHRINK;
 	if (RAW == ')') {
-	    ctxt->entity = ctxt->input;
+	    if ((ctxt->validate) && (ctxt->input != inputchk)) {
+		ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
+		if (ctxt->vctxt.error != NULL)
+		    ctxt->vctxt.error(ctxt->vctxt.userData, 
+"Element content declaration doesn't start and stop in the same entity\n");
+		ctxt->valid = 0;
+	    }
 	    NEXT;
 	    ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
 	    if (RAW == '*') {
@@ -4203,7 +4209,13 @@
 	        xmlFree(elem);
             }
 	    ret->ocur = XML_ELEMENT_CONTENT_MULT;
-	    ctxt->entity = ctxt->input;
+	    if ((ctxt->validate) && (ctxt->input != inputchk)) {
+		ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
+		if (ctxt->vctxt.error != NULL)
+		    ctxt->vctxt.error(ctxt->vctxt.userData, 
+"Element content declaration doesn't start and stop in the same entity\n");
+		ctxt->valid = 0;
+	    }
 	    SKIP(2);
 	} else {
 	    if (elem != NULL) xmlFree(elem);
@@ -4273,7 +4285,7 @@
 #else
 xmlParseElementChildrenContentDecl
 #endif
-(xmlParserCtxtPtr ctxt) {
+(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
     xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
     xmlChar *elem;
     xmlChar type = 0;
@@ -4281,10 +4293,12 @@
     SKIP_BLANKS;
     GROW;
     if (RAW == '(') {
+	xmlParserInputPtr input = ctxt->input;
+
         /* Recurse on first child */
 	NEXT;
 	SKIP_BLANKS;
-        cur = ret = xmlParseElementChildrenContentDecl(ctxt);
+        cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
 	SKIP_BLANKS;
 	GROW;
     } else {
@@ -4437,10 +4451,11 @@
 	SKIP_BLANKS;
 	GROW;
 	if (RAW == '(') {
+	    xmlParserInputPtr input = ctxt->input;
 	    /* Recurse on second child */
 	    NEXT;
 	    SKIP_BLANKS;
-	    last = xmlParseElementChildrenContentDecl(ctxt);
+	    last = xmlParseElementChildrenContentDecl(ctxt, input);
 	    SKIP_BLANKS;
 	} else {
 	    elem = xmlParseName(ctxt);
@@ -4483,7 +4498,13 @@
 	if (last != NULL)
 	    last->parent = cur;
     }
-    ctxt->entity = ctxt->input;
+    if ((ctxt->validate) && (ctxt->input != inputchk)) {
+	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
+	if (ctxt->vctxt.error != NULL)
+	    ctxt->vctxt.error(ctxt->vctxt.userData, 
+"Element content declaration doesn't start and stop in the same entity\n");
+	ctxt->valid = 0;
+    }
     NEXT;
     if (RAW == '?') {
 	if (ret != NULL)
@@ -4583,20 +4604,12 @@
         (NXT(2) == 'C') && (NXT(3) == 'D') &&
         (NXT(4) == 'A') && (NXT(5) == 'T') &&
         (NXT(6) == 'A')) {
-        tree = xmlParseElementMixedContentDecl(ctxt);
+        tree = xmlParseElementMixedContentDecl(ctxt, input);
 	res = XML_ELEMENT_TYPE_MIXED;
     } else {
-        tree = xmlParseElementChildrenContentDecl(ctxt);
+        tree = xmlParseElementChildrenContentDecl(ctxt, input);
 	res = XML_ELEMENT_TYPE_ELEMENT;
     }
-    if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
-	ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
-	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-	    ctxt->sax->error(ctxt->userData, 
-"Element content declaration doesn't start and stop in the same entity\n");
-	ctxt->wellFormed = 0;
-	ctxt->disableSAX = 1;
-    }
     SKIP_BLANKS;
     *result = tree;
     return(res);
diff --git a/valid.c b/valid.c
index 79a3512..c6db9e4 100644
--- a/valid.c
+++ b/valid.c
@@ -2970,11 +2970,99 @@
 }
 
 /**
+ * xmlValidCtxtNormalizeAttributeValue:
+ * @ctxt: the validation context
+ * @doc:  the document
+ * @elem:  the parent
+ * @name:  the attribute name
+ * @value:  the attribute value
+ * @ctxt:  the validation context or NULL
+ *
+ * Does the validation related extra step of the normalization of attribute
+ * values:
+ *
+ * If the declared value is not CDATA, then the XML processor must further
+ * process the normalized attribute value by discarding any leading and
+ * trailing space (#x20) characters, and by replacing sequences of space
+ * (#x20) characters by single space (#x20) character.
+ *
+ * Also  check VC: Standalone Document Declaration in P32, and update
+ *  ctxt->valid accordingly
+ *
+ * returns a new normalized string if normalization is needed, NULL otherwise
+ *      the caller must free the returned value.
+ */
+
+xmlChar *
+xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+	     xmlNodePtr elem, const xmlChar *name, const xmlChar *value) {
+    xmlChar *ret, *dst;
+    const xmlChar *src;
+    xmlAttributePtr attrDecl = NULL;
+    int extsubset = 0;
+
+    if (doc == NULL) return(NULL);
+    if (elem == NULL) return(NULL);
+    if (name == NULL) return(NULL);
+    if (value == NULL) return(NULL);
+
+    if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) {
+	xmlChar qname[500];
+	snprintf((char *) qname, sizeof(qname), "%s:%s",
+		 elem->ns->prefix, elem->name);
+        qname[sizeof(qname) - 1] = 0;
+	attrDecl = xmlGetDtdAttrDesc(doc->intSubset, qname, name);
+	if ((attrDecl == NULL) && (doc->extSubset != NULL)) {
+	    attrDecl = xmlGetDtdAttrDesc(doc->extSubset, qname, name);
+	    if (attrDecl != NULL)
+		extsubset = 1;
+	}
+    }
+    if ((attrDecl == NULL) && (doc->intSubset != NULL))
+	attrDecl = xmlGetDtdAttrDesc(doc->intSubset, elem->name, name);
+    if ((attrDecl == NULL) && (doc->extSubset != NULL)) {
+	attrDecl = xmlGetDtdAttrDesc(doc->extSubset, elem->name, name);
+	if (attrDecl != NULL)
+	    extsubset = 1;
+    }
+
+    if (attrDecl == NULL)
+	return(NULL);
+    if (attrDecl->atype == XML_ATTRIBUTE_CDATA)
+	return(NULL);
+
+    ret = xmlStrdup(value);
+    if (ret == NULL)
+	return(NULL);
+    src = value;
+    dst = ret;
+    while (*src == 0x20) src++;
+    while (*src != 0) {
+	if (*src == 0x20) {
+	    while (*src == 0x20) src++;
+	    if (*src != 0)
+		*dst++ = 0x20;
+	} else {
+	    *dst++ = *src++;
+	}
+    }
+    *dst = 0;
+    if ((doc->standalone) && (extsubset == 1) && (!xmlStrEqual(value, ret))) {
+	VERROR(ctxt->userData, 
+"standalone: %s on %s value had to be normalized based on external subset declaration\n",
+	       name, elem->name);
+	ctxt->valid = 0;
+    }
+    return(ret);
+}
+
+/**
  * xmlValidNormalizeAttributeValue:
  * @doc:  the document
  * @elem:  the parent
  * @name:  the attribute name
  * @value:  the attribute value
+ * @ctxt:  the validation context or NULL
  *
  * Does the validation related extra step of the normalization of attribute
  * values:
@@ -3234,7 +3322,6 @@
 	       elem->name);
 	ret = 0;
     }
-
     /* One ID per Element Type
      * already done when registering the attribute
     if (xmlScanIDAttributeDecl(ctxt, elem) > 1) {
@@ -4195,9 +4282,10 @@
     xmlElementContentPtr cont;
     xmlAttributePtr attr;
     xmlNodePtr child;
-    int ret = 1;
+    int ret = 1, tmp;
     const xmlChar *name;
     const xmlChar *prefix = NULL;
+    int extsubset = 0;
 
     CHECK_DTD;
 
@@ -4275,9 +4363,12 @@
     if (prefix != NULL) {
 	elemDecl = xmlGetDtdQElementDesc(doc->intSubset,
 		                         elem->name, prefix);
-	if ((elemDecl == NULL) && (doc->extSubset != NULL))
+	if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
 	    elemDecl = xmlGetDtdQElementDesc(doc->extSubset,
 		                             elem->name, prefix);
+	    if (elemDecl != NULL)
+		extsubset = 1;
+	}
     }
 
     /*
@@ -4287,8 +4378,11 @@
      */
     if (elemDecl == NULL) {
 	elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name);
-	if ((elemDecl == NULL) && (doc->extSubset != NULL))
+	if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
 	    elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name);
+	    if (elemDecl != NULL)
+		extsubset = 1;
+	}
     }
     if (elemDecl == NULL) {
 	VERROR(ctxt->userData, "No declaration for element %s\n",
@@ -4314,6 +4408,7 @@
 	    /* I don't think anything is required then */
 	    break;
         case XML_ELEMENT_TYPE_MIXED:
+
 	    /* simple case of declared as #PCDATA */
 	    if ((elemDecl->content != NULL) &&
 		(elemDecl->content->type == XML_ELEMENT_CONTENT_PCDATA)) {
@@ -4386,9 +4481,35 @@
 	    }
 	    break;
         case XML_ELEMENT_TYPE_ELEMENT:
+	    if ((doc->standalone == 1) && (extsubset == 1)) {
+		/*
+		 * VC: Standalone Document Declaration
+		 *     - element types with element content, if white space
+		 *       occurs directly within any instance of those types.
+		 */
+		child = elem->children;
+		while (child != NULL) {
+		    if (child->type == XML_TEXT_NODE) {
+			const xmlChar *content = child->content;
+
+			while (IS_BLANK(*content))
+			    content++;
+			if (*content == 0) {
+			    VERROR(ctxt->userData,
+"standalone: %s declared in the external subset contains white spaces nodes\n",
+				   elem->name);
+			    ret = 0;
+			    break;
+			}
+		    }
+		    child =child->next;
+		}
+	    }
 	    child = elem->children;
 	    cont = elemDecl->content;
-	    ret = xmlValidateElementContent(ctxt, child, elemDecl, 1);
+	    tmp = xmlValidateElementContent(ctxt, child, elemDecl, 1);
+	    if (tmp <= 0)
+		ret = tmp;
 	    break;
     }