Cleanup and testing with the KDE help center files:
- valid.c: cleanup, more useful debugging
- parserInternals.c: cleanup vctxt.nodeTab (de)allocation
- xmlIO.c: entity loading is printed as an error when validating
Daniel
diff --git a/valid.c b/valid.c
index 48f7e15..a1bb5ba 100644
--- a/valid.c
+++ b/valid.c
@@ -28,14 +28,22 @@
 #include <libxml/xmlerror.h>
 #include <libxml/list.h>
 
-#define ALLOW_UNDETERMINISTIC_MODELS
-
 /*
  * Generic function for accessing stacks in the Validity Context
  */
 
 #define PUSH_AND_POP(scope, type, name)					\
 scope int name##VPush(xmlValidCtxtPtr ctxt, type value) {		\
+    if (ctxt->name##Max <= 0) {						\
+	ctxt->name##Max = 4;						\
+        ctxt->name##Tab = (type *) xmlMalloc(				\
+	             ctxt->name##Max * sizeof(ctxt->name##Tab[0]));	\
+        if (ctxt->name##Tab == NULL) {					\
+	    xmlGenericError(xmlGenericErrorContext,			\
+		    "malloc failed !\n");				\
+	    return(0);							\
+	}								\
+    }									\
     if (ctxt->name##Nr >= ctxt->name##Max) {				\
 	ctxt->name##Max *= 2;						\
         ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,		\
@@ -63,8 +71,6 @@
     return(ret);							\
 }									\
 
-#ifdef ALLOW_UNDETERMINISTIC_MODELS
-
 /*
  * I will use a home made algorithm less complex and easier to
  * debug/maintin than a generic NFA -> DFA state based algo. The
@@ -133,9 +139,6 @@
     return(ctxt->vstateNr);
 }
 
-
-#endif /* ALLOW_UNDETERMINISTIC_MODELS */
-
 PUSH_AND_POP(static, xmlNodePtr, node)
 
 /* #define DEBUG_VALID_ALGO */
@@ -3228,270 +3231,6 @@
     return(ret);
 }
 
-#ifndef ALLOW_UNDETERMINISTIC_MODELS
-
-/* Find the next XML_ELEMENT_NODE, subject to the content constraints.
- * Return -1 if we found something unexpected, or 1 otherwise.
- */
-
-static int
-xmlValidateFindNextElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
-                           xmlElementContentPtr cont)
-{
-    DEBUG_VALID_MSG("skipping to next element");
-    while (*child && (*child)->type != XML_ELEMENT_NODE) {
-	switch ((*child)->type) {
-	    /*
-	     * If there is an entity declared and it's not empty
-	     * Push the current node on the stack and process with the
-	     * entity content.
-	     */
-	    case XML_ENTITY_REF_NODE:
-		if (((*child)->children != NULL) &&
-		    ((*child)->children->children != NULL)) {
-		    nodeVPush(ctxt, *child);
-		    *child = (*child)->children->children;
-		    continue;
-		}
-		break;
-
-	    /* These things are ignored (skipped) during validation.  */
-	    case XML_PI_NODE:
-	    case XML_COMMENT_NODE:
-	    case XML_XINCLUDE_START:
-	    case XML_XINCLUDE_END:
-		break;
-
-	    case XML_TEXT_NODE:
-		if (xmlIsBlankNode(*child)
-		    && (cont->type == XML_ELEMENT_CONTENT_ELEMENT
-		    || cont->type == XML_ELEMENT_CONTENT_SEQ
-		    || cont->type == XML_ELEMENT_CONTENT_OR))
-		    break;
-		DEBUG_VALID_MSG("failed non-blank");
-		return(-1);
-
-	    default:
-		DEBUG_VALID_MSG("failed unknown type");
-		return(-1);
-	}
-	*child = (*child)->next;
-    }
-#ifdef DEBUG_VALID_ALGO
-    if (*child != NULL) {
-	DEBUG_VALID_MSG((*child)->name);
-    }
-    DEBUG_VALID_MSG("found ...");
-#endif
-
-    return(1);
-}
-
-int xmlValidateElementTypeElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
-				  xmlElementContentPtr cont);
-
-/**
- * xmlValidateElementTypeExpr:
- * @ctxt:  the validation context
- * @child:  pointer to the child list
- * @cont:  pointer to the content declaration
- *
- * Try to validate the content of an element of type element
- * but don't handle the occurence factor
- *
- * returns 1 if valid or 0 and -1 if PCDATA stuff is found,
- *         also update child value in-situ.
- */
-
-static int
-xmlValidateElementTypeExpr(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
-			   xmlElementContentPtr cont) {
-    xmlNodePtr cur;
-    int ret = 1;
-
-    if (cont == NULL) return(-1);
-    DEBUG_VALID_STATE(*child, cont)
-    ret = xmlValidateFindNextElement(ctxt, child, cont);
-    if (ret < 0)
-	    return(-1);
-    DEBUG_VALID_STATE(*child, cont)
-    switch (cont->type) {
-	case XML_ELEMENT_CONTENT_PCDATA:
-	    if (*child == NULL) return(0);
-	    if ((*child)->type == XML_TEXT_NODE) {
-		DEBUG_VALID_MSG("pcdata found");
-		return(1);
-	    }
-	    return(0);
-	case XML_ELEMENT_CONTENT_ELEMENT:
-	    if (*child == NULL) return(0);
-	    ret = (xmlStrEqual((*child)->name, cont->name));
-	    if (ret == 1) {
-		DEBUG_VALID_MSG("element found, skip to next");
-		while ((*child)->next == NULL) {
-                    if (((*child)->parent != NULL) &&
-			((*child)->parent->type == XML_ENTITY_DECL)) {
-			*child = nodeVPop(ctxt);
-		    } else
-			break;
-		}
-	        *child = (*child)->next;
-	    }
-	    return(ret);
-	case XML_ELEMENT_CONTENT_OR:
-	    cur = *child;
-	    ret = xmlValidateElementTypeElement(ctxt, child, cont->c1);
-	    if (ret == -1) return(-1);
-	    if (ret == 1) {
-		DEBUG_VALID_MSG("or succeeded first branch");
-		return(1);
-	    }
-	    /* rollback and retry the other path */
-	    *child = cur;
-	    ret = xmlValidateElementTypeElement(ctxt, child, cont->c2);
-	    if (ret == -1) return(-1);
-	    if (ret == 0) {
-		DEBUG_VALID_MSG("or failed both branches");
-		*child = cur;
-		return(0);
-	    }
-	    DEBUG_VALID_MSG("or succeeded second branch");
-	    return(1);
-	case XML_ELEMENT_CONTENT_SEQ:
-	    cur = *child;
-	    ret = xmlValidateElementTypeElement(ctxt, child, cont->c1);
-	    if (ret == -1) return(-1);
-	    if (ret == 0) {
-		DEBUG_VALID_MSG("sequence failed");
-		*child = cur;
-		return(0);
-	    }
-	    ret = xmlValidateElementTypeElement(ctxt, child, cont->c2);
-	    if (ret == -1) return(-1);
-	    if (ret == 0) {
-		*child = cur;
-		return(0);
-	    }
-	    DEBUG_VALID_MSG("sequence succeeded");
-	    return(1);
-    }
-    return(ret);
-}
-
-/**
- * xmlValidateElementTypeElement:
- * @ctxt:  the validation context
- * @child:  pointer to the child list
- * @cont:  pointer to the content declaration
- *
- * Try to validate the content of an element of type element
- * yeah, Yet Another Regexp Implementation, and recursive
- *
- * returns 1 if valid or 0 and -1 if PCDATA stuff is found,
- *         also update child and content values in-situ.
- */
-
-int
-xmlValidateElementTypeElement(xmlValidCtxtPtr ctxt, xmlNodePtr *child,
-			      xmlElementContentPtr cont) {
-    xmlNodePtr cur;
-    int ret;
-
-    if (cont == NULL) return(-1);
-
-    DEBUG_VALID_STATE(*child, cont)
-    ret = xmlValidateFindNextElement(ctxt, child, cont);
-    if (ret < 0)
-	    return(-1);
-    DEBUG_VALID_STATE(*child, cont)
-    cur = *child;
-    ret = xmlValidateElementTypeExpr(ctxt, child, cont);
-    if (ret == -1) return(-1);
-    switch (cont->ocur) {
-	case XML_ELEMENT_CONTENT_ONCE:
-	    if (ret == 1) {
-		DEBUG_VALID_MSG("once found, skip to next");
-		/* skip ignorable elems */
-		while ((*child != NULL) &&
-		       ((*child)->type == XML_PI_NODE
-                        || (*child)->type == XML_COMMENT_NODE
-                        || (*child)->type == XML_XINCLUDE_START
-                        || (*child)->type == XML_XINCLUDE_END)) {
-		    while ((*child)->next == NULL) {
-			if (((*child)->parent != NULL) &&
-			    ((*child)->parent->type == XML_ENTITY_REF_NODE)) {
-			    *child = (*child)->parent;
-			} else
-			    break;
-		    }
-		    *child = (*child)->next;
-		}
-		return(1);
-	    }
-	    *child = cur;
-	    return(0);
-	case XML_ELEMENT_CONTENT_OPT:
-	    if (ret == 0) {
-		*child = cur;
-	        return(1);
-	    }
-	    if (ret == 1) {
-		DEBUG_VALID_MSG("optional found, skip to next");
-		/* skip ignorable elems */
-		while ((*child != NULL) &&
-		       ((*child)->type == XML_PI_NODE
-                        || (*child)->type == XML_COMMENT_NODE
-                        || (*child)->type == XML_XINCLUDE_START
-                        || (*child)->type == XML_XINCLUDE_END)) {
-		    while ((*child)->next == NULL) {
-			if (((*child)->parent != NULL) &&
-			    ((*child)->parent->type == XML_ENTITY_REF_NODE)) {
-			    *child = (*child)->parent;
-			} else
-			    break;
-		    }
-		    *child = (*child)->next;
-		}
-		return(1);
-	    }
-	    break;
-	case XML_ELEMENT_CONTENT_MULT:
-	    if (ret == 0) {
-		*child = cur;
-	        break;
-	    }
-	    /* no break on purpose */
-	case XML_ELEMENT_CONTENT_PLUS:
-	    if (ret == 0) {
-		*child = cur;
-	        return(0);
-	    }
-	    DEBUG_VALID_MSG("mult/plus found");
-	    if (ret == -1) return(-1);
-	    cur = *child;
-	    do {
-		if (*child == NULL)
-		    break; /* while */
-		if ((*child)->type == XML_TEXT_NODE
-                    && xmlIsBlankNode(*child)) {
-		    *child = (*child)->next;
-		    continue;
-		}
-		ret = xmlValidateElementTypeExpr(ctxt, child, cont);
-		if (ret == 1)
-		    cur = *child;
-	    } while (ret == 1);
-	    if (ret == -1) return(-1);
-	    *child = cur;
-	    break;
-    }
-    if (ret == -1) return(-1);
-
-    return(xmlValidateFindNextElement(ctxt, child, cont));
-}
-
-#else /* ALLOW_UNDETERMINISTIC_MODELS */
-
 /**
  * xmlValidateSkipIgnorable:
  * @ctxt:  the validation context
@@ -3833,10 +3572,73 @@
 }
 
 /**
+ * xmlSprintfElements:
+ * @buf:  an output buffer
+ * @content:  An element
+ * @glob: 1 if one must print the englobing parenthesis, 0 otherwise
+ *
+ * This will dump the list of elements to the buffer
+ * Intended just for the debug routine
+ */
+static void
+xmlSprintfElements(char *buf, xmlNodePtr node, int glob) {
+    xmlNodePtr cur;
+
+    if (node == NULL) return;
+    if (glob) strcat(buf, "(");
+    cur = node;
+    while (cur != NULL) {
+        switch (cur->type) {
+            case XML_ELEMENT_NODE:
+	         strcat(buf, (char *) cur->name);
+		 if (cur->next != NULL)
+		     strcat(buf, " ");
+		 break;
+            case XML_TEXT_NODE:
+		 if (xmlIsBlankNode(cur))
+		     break;
+            case XML_CDATA_SECTION_NODE:
+            case XML_ENTITY_REF_NODE:
+	         strcat(buf, "CDATA");
+		 if (cur->next != NULL)
+		     strcat(buf, " ");
+		 break;
+            case XML_ATTRIBUTE_NODE:
+            case XML_DOCUMENT_NODE:
+#ifdef LIBXML_SGML_ENABLED
+	    case XML_SGML_DOCUMENT_NODE:
+#endif
+	    case XML_HTML_DOCUMENT_NODE:
+            case XML_DOCUMENT_TYPE_NODE:
+            case XML_DOCUMENT_FRAG_NODE:
+            case XML_NOTATION_NODE:
+	    case XML_NAMESPACE_DECL:
+	         strcat(buf, "???");
+		 if (cur->next != NULL)
+		     strcat(buf, " ");
+		 break;
+            case XML_ENTITY_NODE:
+            case XML_PI_NODE:
+            case XML_DTD_NODE:
+            case XML_COMMENT_NODE:
+	    case XML_ELEMENT_DECL:
+	    case XML_ATTRIBUTE_DECL:
+	    case XML_ENTITY_DECL:
+	    case XML_XINCLUDE_START:
+	    case XML_XINCLUDE_END:
+		 break;
+	}
+	cur = cur->next;
+    }
+    if (glob) strcat(buf, ")");
+}
+
+/**
  * xmlValidateElementContent:
  * @ctxt:  the validation context
  * @child:  the child list
  * @cont:  pointer to the content declaration
+ * @warn:  emit the error message
  *
  * Try to validate the content model of an element
  *
@@ -3845,9 +3647,9 @@
 
 static int
 xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child,
-		   xmlElementContentPtr cont) {
+		   xmlElementContentPtr cont, int warn) {
     int ret;
-    xmlNodePtr repl = NULL, last = NULL, tmp;
+    xmlNodePtr repl = NULL, last = NULL, cur, tmp;
 
     /*
      * Allocate the stack
@@ -3878,21 +3680,22 @@
 	 * sufficient to run the validation process on it
 	 */
 	DEBUG_VALID_MSG("Found an entity reference, linearizing");
-	while (child != NULL) {
-	    switch (child->type) {
+	cur = child;
+	while (cur != NULL) {
+	    switch (cur->type) {
 		case XML_ENTITY_REF_NODE:
 		    /*
 		     * Push the current node to be able to roll back
 		     * and process within the entity
 		     */
-		    if ((child->children != NULL) &&
-			(child->children->children != NULL)) {
-			nodeVPush(ctxt, child);
-			child = child->children->children;
+		    if ((cur->children != NULL) &&
+			(cur->children->children != NULL)) {
+			nodeVPush(ctxt, cur);
+			cur = cur->children->children;
 			continue;
 		    }
 		case XML_TEXT_NODE:
-		    if (xmlIsBlankNode(child))
+		    if (xmlIsBlankNode(cur))
 			break;
 		case XML_ELEMENT_NODE:
 		    /*
@@ -3907,9 +3710,9 @@
 			ret = -1;
 			goto done;
 		    }
-		    tmp->type = child->type;
-		    tmp->name = child->name;
-		    tmp->ns = child->ns;
+		    tmp->type = cur->type;
+		    tmp->name = cur->name;
+		    tmp->ns = cur->ns;
 		    tmp->next = NULL;
 		    if (repl == NULL)
 			repl = last = tmp;
@@ -3924,12 +3727,12 @@
 	    /*
 	     * Switch to next element
 	     */
-	    child = child->next;
-	    while (child == NULL) {
-		child = nodeVPop(ctxt);
-		if (child == NULL)
+	    cur = cur->next;
+	    while (cur == NULL) {
+		cur = nodeVPop(ctxt);
+		if (cur == NULL)
 		    break;
-		child = child->next;
+		cur = cur->next;
 	    }
 	}
 
@@ -3945,6 +3748,29 @@
 	STATE = 0;
 	ret = xmlValidateElementType(ctxt);
     }
+    if ((warn) && (ret != 1)) {
+	char expr[5000];
+	char list[5000];
+
+	expr[0] = 0;
+	xmlSprintfElementContent(expr, cont, 1);
+	list[0] = 0;
+	if (repl != NULL)
+	    xmlSprintfElements(list, repl, 1);
+	else
+	    xmlSprintfElements(list, child, 1);
+
+	if ((child->parent != NULL) && (child->parent->name != NULL)) {
+	    VERROR(ctxt->userData,
+   "Element %s content doesn't follow the Dtd\nExpecting %s, got %s\n",
+	       child->parent->name, expr, list);
+	} else {
+	    VERROR(ctxt->userData,
+       "Element content doesn't follow the Dtd\nExpecting %s, got %s\n",
+	       expr, list);
+	}
+	ret = 0;
+    }
 
 
 done:
@@ -3957,11 +3783,18 @@
 	repl = tmp;
     }
     ctxt->vstateMax = 0;
-    xmlFree(ctxt->vstateTab);
+    if (ctxt->vstateTab != NULL) {
+	xmlFree(ctxt->vstateTab);
+	ctxt->vstateTab = NULL;
+    }
+    ctxt->nodeMax = 0;
+    if (ctxt->nodeTab != NULL) {
+	xmlFree(ctxt->nodeTab);
+	ctxt->nodeTab = NULL;
+    }
     return(ret);
 
 }
-#endif /* ALLOW_UNDETERMINISTIC_MODELS */
 
 /**
  * xmlSprintfElementChilds:
@@ -4229,45 +4062,8 @@
         case XML_ELEMENT_TYPE_ELEMENT:
 	    child = elem->children;
 	    cont = elemDecl->content;
-#ifdef ALLOW_UNDETERMINISTIC_MODELS
-	    ret = xmlValidateElementContent(ctxt, child, cont);
-	    if (ret != 1) {
-	        char expr[1000];
-	        char list[2000];
-
-		expr[0] = 0;
-		xmlSprintfElementContent(expr, cont, 1);
-		list[0] = 0;
-		xmlSprintfElementChilds(list, elem, 1);
-
-		VERROR(ctxt->userData,
-	   "Element %s content doesn't follow the Dtd\nExpecting %s, got %s\n",
-	               elem->name, expr, list);
-		ret = 0;
-	    }
-#else
-	    ret = xmlValidateElementTypeElement(ctxt, &child, cont);
-	    while ((child != NULL) && (child->type == XML_TEXT_NODE) &&
-		(xmlIsBlankNode(child))) {
-		child = child->next;
-		continue;
-	    }
-	    if ((ret == 0) || (child != NULL)) {
-	        char expr[1000];
-	        char list[2000];
-
-		expr[0] = 0;
-		xmlSprintfElementContent(expr, cont, 1);
-		list[0] = 0;
-		xmlSprintfElementChilds(list, elem, 1);
-
-		VERROR(ctxt->userData,
-	   "Element %s content doesn't follow the Dtd\nExpecting %s, got %s\n",
-	               elem->name, expr, list);
-		ret = 0;
-	    }
+	    ret = xmlValidateElementContent(ctxt, child, cont, 1);
 	    break;
-#endif
     }
 
     /* [ VC: Required Attribute ] */
@@ -4293,7 +4089,8 @@
 			if (nameSpace == NULL) {
 			    if (qualified < 0) 
 				qualified = 0;
-	    		} else if (!xmlStrEqual(nameSpace->prefix, attr->prefix)) {
+	    		} else if (!xmlStrEqual(nameSpace->prefix,
+				                attr->prefix)) {
 			    if (qualified < 1) 
 				qualified = 1;
 			} else