more work on RelaxNG streaming validation trying to improve the subset

* relaxng.c xmllint.c: more work on RelaxNG streaming validation
  trying to improve the subset compiled, and more testing.
* doc/downloads.html doc/xml.html doc/xmlmem.html: some updates on the
  documentation
* test/relaxng/tutor11_1_3.xml: fixes the DTD path
* result/relaxng/*.err: fix some of the outputs
Daniel
diff --git a/ChangeLog b/ChangeLog
index c82ea6f..fa43615 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Wed Apr 16 17:46:50 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+	* relaxng.c xmllint.c: more work on RelaxNG streaming validation
+	  trying to improve the subset compiled, and more testing.
+	* doc/downloads.html doc/xml.html doc/xmlmem.html: some updates on the
+	  documentation
+	* test/relaxng/tutor11_1_3.xml: fixes the DTD path
+	* result/relaxng/*.err: fix some of the outputs
+
 Wed Apr 16 01:28:15 CEST 2003 Daniel Veillard <daniel@veillard.com>
 
 	* relaxng.c xmlreader.c xmllint.c include/libxml/relaxng.h
diff --git a/doc/downloads.html b/doc/downloads.html
index aaa1188..7daf0df 100644
--- a/doc/downloads.html
+++ b/doc/downloads.html
@@ -109,7 +109,7 @@
     any architecture supported by Red Hat.</li>
   <li>
 <p><a href="mailto:igor@zlatkovic.com">Igor  Zlatkovic</a></p>
-     is now the maintainer of the Windows port, <a href="http://www.zlatkovic.com/projects/libxml/index.html">he provides
+    is now the maintainer of the Windows port, <a href="http://www.zlatkovic.com/projects/libxml/index.html">he provides
     binaries</a>.</li>
   <li>
 <a href="mailto:Gary.Pennington@sun.com">Gary Pennington</a> provides
diff --git a/doc/xml.html b/doc/xml.html
index 9470892..2c45b25 100644
--- a/doc/xml.html
+++ b/doc/xml.html
@@ -580,7 +580,7 @@
     href="ftp://xmlsoft.org/">xmlsoft.org</a>, the source RPM will compile on
     any architecture supported by Red Hat.</li>
   <li><p><a href="mailto:igor@zlatkovic.com">Igor  Zlatkovic</a></p>
-     is now the maintainer of the Windows port, <a
+    is now the maintainer of the Windows port, <a
     href="http://www.zlatkovic.com/projects/libxml/index.html">he provides
     binaries</a>.</li>
   <li><a href="mailto:Gary.Pennington@sun.com">Gary Pennington</a> provides
@@ -2460,9 +2460,14 @@
     memory when parsed). Validation will add a amount of memory required for
     maintaining the external Dtd state which should be linear with the
     complexity of the content model defined by the Dtd</li>
+  <li>If you need to work with fixed memory requirements or don't need the
+    full DOM tree then using the <a href="xmlreader.html">xmlReader
+    interface</a> is probably the best way to proceed, it still allows to
+    validate or operate on subset of the tree if needed.</li>
   <li>If you don't care about the advanced features of libxml like
-    validation, DOM, XPath or XPointer, but really need to work fixed memory
-    requirements, then the SAX interface should be used.</li>
+    validation, DOM, XPath or XPointer, don't use entities, need to work with
+    fixed memory requirements, and try to get the fastest parsing possible
+    then the SAX interface should be used, but it has known restrictions.</li>
 </ul>
 
 <p></p>
diff --git a/doc/xmlmem.html b/doc/xmlmem.html
index f863c80..7d0b3d4 100644
--- a/doc/xmlmem.html
+++ b/doc/xmlmem.html
@@ -214,9 +214,14 @@
     memory when parsed). Validation will add a amount of memory required for
     maintaining the external Dtd state which should be linear with the
     complexity of the content model defined by the Dtd</li>
+  <li>If you need to work with fixed memory requirements or don't need the
+    full DOM tree then using the <a href="xmlreader.html">xmlReader
+    interface</a> is probably the best way to proceed, it still allows to
+    validate or operate on subset of the tree if needed.</li>
   <li>If you don't care about the advanced features of libxml like
-    validation, DOM, XPath or XPointer, but really need to work fixed memory
-    requirements, then the SAX interface should be used.</li>
+    validation, DOM, XPath or XPointer, don't use entities, need to work with
+    fixed memory requirements, and try to get the fastest parsing possible
+    then the SAX interface should be used, but it has known restrictions.</li>
 </ul>
 <p></p>
 <p><a href="bugs.html">Daniel Veillard</a></p>
diff --git a/relaxng.c b/relaxng.c
index cb3c130..b7a0711 100644
--- a/relaxng.c
+++ b/relaxng.c
@@ -3617,6 +3617,78 @@
 }
 
 /**
+ * xmlRelaxNGGenerateAttributes:
+ * @ctxt:  a Relax-NG parser context
+ * @def:  the definition definition
+ *
+ * Check if the definition can only generate attributes
+ *
+ * Returns 1 if yes, 0 if no and -1 in case of error.
+ */
+static int
+xmlRelaxNGGenerateAttributes(xmlRelaxNGParserCtxtPtr ctxt,
+	                     xmlRelaxNGDefinePtr def) {
+    xmlRelaxNGDefinePtr parent, cur, tmp;
+
+    /*
+     * Don't run that check in case of error. Infinite recursion
+     * becomes possible.
+     */
+    if (ctxt->nbErrors != 0)
+	return(-1);
+
+    parent = NULL;
+    cur = def;
+    while (cur != NULL) {
+	if ((cur->type == XML_RELAXNG_ELEMENT) ||
+	    (cur->type == XML_RELAXNG_TEXT) ||
+	    (cur->type == XML_RELAXNG_DATATYPE) ||
+	    (cur->type == XML_RELAXNG_PARAM) ||
+	    (cur->type == XML_RELAXNG_LIST) ||
+	    (cur->type == XML_RELAXNG_VALUE) ||
+	    (cur->type == XML_RELAXNG_EMPTY))
+	    return(0);
+	if ((cur->type == XML_RELAXNG_CHOICE) ||
+	    (cur->type == XML_RELAXNG_INTERLEAVE) ||
+	    (cur->type == XML_RELAXNG_GROUP) ||
+	    (cur->type == XML_RELAXNG_ONEORMORE) ||
+	    (cur->type == XML_RELAXNG_ZEROORMORE) ||
+	    (cur->type == XML_RELAXNG_OPTIONAL) ||
+	    (cur->type == XML_RELAXNG_PARENTREF) ||
+	    (cur->type == XML_RELAXNG_EXTERNALREF) ||
+	    (cur->type == XML_RELAXNG_REF) ||
+	    (cur->type == XML_RELAXNG_DEF)) {
+	    if (cur->content != NULL) {
+		parent = cur;
+		cur = cur->content;
+		tmp = cur;
+		while (tmp != NULL) {
+		    tmp->parent = parent;
+		    tmp = tmp->next;
+		}
+		continue;
+	    }
+	}
+	if (cur == def)
+	    break;
+	if (cur->next != NULL) {
+	    cur = cur->next;
+	    continue;
+	}
+	do {
+	    cur = cur->parent;
+	    if (cur == NULL) break;
+	    if (cur == def) return(1);
+	    if (cur->next != NULL) {
+		cur = cur->next;
+		break;
+	    }
+	} while (cur != NULL);
+    }
+    return(1);
+}
+	                     
+/**
  * xmlRelaxNGGetElements:
  * @ctxt:  a Relax-NG parser context
  * @def:  the definition definition
@@ -5122,6 +5194,7 @@
 		    }
 		    break;
 		case XML_RELAXNG_ATTRIBUTE:
+		    /* HERE !!! */
 		    cur->next = ret->attrs;
 		    ret->attrs = cur;
 		    break;
@@ -5741,6 +5814,47 @@
 	    if (cur->nameClass != NULL)
 		xmlRelaxNGSimplify(ctxt, cur->nameClass, cur);
 	    /*
+	     * On Elements, try to move attribute only generating rules on
+	     * the attrs rules.
+	     */
+	    if (cur->type == XML_RELAXNG_ELEMENT) {
+	        int attronly;
+		xmlRelaxNGDefinePtr tmp, pre;
+
+	        while (cur->content != NULL) {
+		    attronly = xmlRelaxNGGenerateAttributes(ctxt, cur->content);
+		    if (attronly == 1) {
+		        /*
+			 * migrate cur->content to attrs
+			 */
+		        tmp = cur->content;
+			cur->content = tmp->next;
+			tmp->next = cur->attrs;
+			cur->attrs = tmp;
+		    } else {
+		        /*
+			 * cur->content can generate elements or text
+			 */
+			break;
+		    }
+		}
+		pre = cur->content;
+		while ((pre != NULL) && (pre->next != NULL)) {
+		    tmp = pre->next;
+		    attronly = xmlRelaxNGGenerateAttributes(ctxt, tmp);
+		    if (attronly == 1) {
+		        /*
+			 * migrate tmp to attrs
+			 */
+			pre->next = tmp->next;
+			tmp->next = cur->attrs;
+			cur->attrs = tmp;
+		    } else {
+			pre = tmp;
+		    }
+		}
+	    }
+	    /*
 	     * This may result in a simplification
 	     */
 	    if ((cur->type == XML_RELAXNG_GROUP) ||
@@ -7106,18 +7220,18 @@
     /*
      * try to compile (parts of) the schemas
      */
-    if ((ctxt->grammar != NULL) && (ctxt->grammar->start != NULL)) {
-        if (ctxt->grammar->start->type != XML_RELAXNG_START) {
+    if ((ret->topgrammar != NULL) && (ret->topgrammar->start != NULL)) {
+        if (ret->topgrammar->start->type != XML_RELAXNG_START) {
 	    xmlRelaxNGDefinePtr def;
 
 	    def = xmlRelaxNGNewDefine(ctxt, NULL);
 	    if (def != NULL) {
 		def->type = XML_RELAXNG_START;
-		def->content = ctxt->grammar->start;
-		ctxt->grammar->start = def;
+		def->content = ret->topgrammar->start;
+		ret->topgrammar->start = def;
 	    }
 	}
-	xmlRelaxNGTryCompile(ctxt, ctxt->grammar->start);
+	xmlRelaxNGTryCompile(ctxt, ret->topgrammar->start);
     }
 
     /*
@@ -7603,9 +7717,9 @@
 				   void *inputdata) {
     xmlRelaxNGValidCtxtPtr ctxt = (xmlRelaxNGValidCtxtPtr) inputdata;
     xmlRelaxNGDefinePtr define = (xmlRelaxNGDefinePtr) transdata;
-    xmlRelaxNGValidStatePtr state;
+    xmlRelaxNGValidStatePtr state, oldstate;
     xmlNodePtr node = ctxt->pnode;
-    int ret;
+    int ret, oldflags;
 
 #ifdef DEBUG_PROGRESSIVE
     xmlGenericError(xmlGenericErrorContext,
@@ -7674,6 +7788,7 @@
 	ctxt->pstate = -1;
 	return;
     }
+    oldstate = ctxt->state;
     ctxt->state = state;
     if (define->attrs != NULL) {
 	ret = xmlRelaxNGValidateAttributeList(ctxt, define->attrs);
@@ -7682,13 +7797,40 @@
 	    VALID_ERR2(XML_RELAXNG_ERR_ATTRVALID, node->name);
 	}
     }
-    ctxt->state->seq = NULL;
-    ret = xmlRelaxNGValidateElementEnd(ctxt);
-    if (ret != 0) {
-        ctxt->pstate = -1;
+    if (ctxt->state != NULL) {
+	ctxt->state->seq = NULL;
+	ret = xmlRelaxNGValidateElementEnd(ctxt);
+	if (ret != 0) {
+	    ctxt->pstate = -1;
+	}
+	xmlRelaxNGFreeValidState(ctxt, ctxt->state);
+    } else if (ctxt->states != NULL) {
+	int tmp = -1, i;
+
+        oldflags = ctxt->flags;
+	ctxt->flags |= FLAGS_IGNORABLE;
+
+	for (i = 0; i < ctxt->states->nbState; i++) {
+	    state = ctxt->states->tabState[i];
+	    ctxt->state = state;
+	    ctxt->state->seq = NULL;
+
+	    if (xmlRelaxNGValidateElementEnd(ctxt) == 0)
+		tmp = 0;
+	    xmlRelaxNGFreeValidState(ctxt, state);
+	}
+	xmlRelaxNGFreeStates(ctxt, ctxt->states);
+	ctxt->states = NULL;
+	if ((ret == 0) && (tmp == -1))
+	    ctxt->pstate = -1;
+	ctxt->flags = oldflags;
     }
-    xmlRelaxNGFreeValidState(ctxt, state);
-    ctxt->state = NULL;
+    if (ctxt->pstate == -1) {
+	if ((ctxt->flags & FLAGS_IGNORABLE) == 0) {
+	    xmlRelaxNGDumpValidError(ctxt);
+	}
+    }
+    ctxt->state = oldstate;
 }
 
 /**
@@ -8551,12 +8693,38 @@
 static int
 xmlRelaxNGValidateAttributeList(xmlRelaxNGValidCtxtPtr ctxt, 
 	                        xmlRelaxNGDefinePtr defines) {
-    int ret = 0;
-    while (defines != NULL) {
-	if (xmlRelaxNGValidateAttribute(ctxt, defines) != 0)
-	    ret = -1;
-        defines = defines->next;
+    int ret = 0, res;
+    int needmore = 0;
+    xmlRelaxNGDefinePtr cur;
+
+    cur = defines;
+    while (cur != NULL) {
+        if (cur->type == XML_RELAXNG_ATTRIBUTE) {
+	    if (xmlRelaxNGValidateAttribute(ctxt, cur) != 0)
+		ret = -1;
+	} else
+	    needmore = 1;
+        cur = cur->next;
     }
+    if (!needmore)
+	return(ret);
+    cur = defines;
+    while (cur != NULL) {
+        if (cur->type != XML_RELAXNG_ATTRIBUTE) {
+	    if ((ctxt->state != NULL) || (ctxt->states != NULL)) {
+		res = xmlRelaxNGValidateDefinition(ctxt, cur);
+		if (res < 0)
+		    ret = -1;
+	    } else {
+		VALID_ERR(XML_RELAXNG_ERR_NOSTATE);
+		return(-1);
+	    }
+	    if (res == -1) /* continues on -2 */
+		break;
+	}
+        cur = cur->next;
+    }
+      
     return(ret);
 }
 
@@ -9162,22 +9330,55 @@
                 }
             }
             if (define->contModel != NULL) {
+	        xmlRelaxNGValidStatePtr nstate, tmpstate = ctxt->state;
+		xmlRelaxNGStatesPtr tmpstates = ctxt->states;
+		xmlNodePtr nseq;
+
+	        nstate = xmlRelaxNGNewValidState(ctxt, node);
+	        ctxt->state = nstate;
+	        ctxt->states = NULL;
+
                 tmp = xmlRelaxNGValidateCompiledContent(ctxt,
                                                         define->contModel,
                                                         ctxt->state->seq);
+		nseq = ctxt->state->seq;
+		ctxt->state = tmpstate;
+		ctxt->states = tmpstates;
+		xmlRelaxNGFreeValidState(ctxt, nstate);
+
 #ifdef DEBUG_COMPILE
 		xmlGenericError(xmlGenericErrorContext,
 			"Validating content of '%s' : %d\n", define->name, tmp);
 #endif
-                state = ctxt->state;
-                if (tmp == 0) {
-                    tmp = xmlRelaxNGValidateElementEnd(ctxt);
-		    if (tmp != 0)
-		        ret = -1;
-		} else {
+                if (tmp != 0)
 		    ret = -1;
-		}
-                xmlRelaxNGFreeValidState(ctxt, state);
+
+                if (ctxt->states != NULL) {
+                    tmp = -1;
+
+                    ctxt->flags |= FLAGS_IGNORABLE;
+
+                    for (i = 0; i < ctxt->states->nbState; i++) {
+                        state = ctxt->states->tabState[i];
+                        ctxt->state = state;
+			ctxt->state->seq = nseq;
+
+                        if (xmlRelaxNGValidateElementEnd(ctxt) == 0)
+                            tmp = 0;
+                        xmlRelaxNGFreeValidState(ctxt, state);
+                    }
+                    xmlRelaxNGFreeStates(ctxt, ctxt->states);
+                    ctxt->flags = oldflags;
+                    ctxt->states = NULL;
+                    if ((ret == 0) && (tmp == -1))
+                        ret = -1;
+                } else {
+                    state = ctxt->state;
+		    ctxt->state->seq = nseq;
+                    if (ret == 0)
+                        ret = xmlRelaxNGValidateElementEnd(ctxt);
+                    xmlRelaxNGFreeValidState(ctxt, state);
+                }
             } else {
                 if (define->content != NULL) {
                     tmp = xmlRelaxNGValidateDefinitionList(ctxt,
diff --git a/result/relaxng/docbook_0.err b/result/relaxng/docbook_0.err
index 86c4912..e69de29 100644
--- a/result/relaxng/docbook_0.err
+++ b/result/relaxng/docbook_0.err
@@ -1,3 +0,0 @@
-./test/relaxng/docbook_0.xml:1864: error: Entity 'copy' not defined
-             <sgmltag>&amp;copy;</sgmltag> &mdash; copyright sign (&copy;)
-                                                                         ^
diff --git a/result/relaxng/tutor11_3_1.err b/result/relaxng/tutor11_3_1.err
index 1d13cf4..f3d6d11 100644
--- a/result/relaxng/tutor11_3_1.err
+++ b/result/relaxng/tutor11_3_1.err
@@ -1 +1,2 @@
 Attributes conflicts in group
+Relax-NG schema ./test/relaxng/tutor11_3.rng failed to compile
diff --git a/result/relaxng/tutor3_7_1.err b/result/relaxng/tutor3_7_1.err
index abeed91..e36d2bb 100644
--- a/result/relaxng/tutor3_7_1.err
+++ b/result/relaxng/tutor3_7_1.err
@@ -1 +1,2 @@
 xmlRelaxNGParseElement: element has no content
+Relax-NG schema ./test/relaxng/tutor3_7.rng failed to compile
diff --git a/result/relaxng/tutor4_4_1.err b/result/relaxng/tutor4_4_1.err
index dd78d26..8b40445 100644
--- a/result/relaxng/tutor4_4_1.err
+++ b/result/relaxng/tutor4_4_1.err
@@ -1 +1,2 @@
 Detected a cycle in inline references
+Relax-NG schema ./test/relaxng/tutor4_4.rng failed to compile
diff --git a/result/relaxng/tutor5_3_1.err b/result/relaxng/tutor5_3_1.err
index 9b9aa0c..343fa31 100644
--- a/result/relaxng/tutor5_3_1.err
+++ b/result/relaxng/tutor5_3_1.err
@@ -1 +1,2 @@
 Element bad has a content type error
+Relax-NG schema ./test/relaxng/tutor5_3.rng failed to compile
diff --git a/test/relaxng/tutor11_1_3.xml b/test/relaxng/tutor11_1_3.xml
index 4e93fb3..c778ce0 100644
--- a/test/relaxng/tutor11_1_3.xml
+++ b/test/relaxng/tutor11_1_3.xml
@@ -1,5 +1,5 @@
 <?xml version='1.0' encoding='ISO-8859-1' standalone='no'?>
-<!DOCTYPE spec SYSTEM "dtds/spec.dtd" [
+<!DOCTYPE spec SYSTEM "../valid/dtds/spec.dtd" [
 
 <!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 -->
 
diff --git a/xmllint.c b/xmllint.c
index 2aac293..60c175b 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -624,18 +624,41 @@
     if (reader != NULL) {
 	if (valid)
 	    xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1);
-	if (relaxng != NULL)
-	    xmlTextReaderRelaxNGValidate(reader, relaxng);
+	else
+	    xmlTextReaderSetParserProp(reader, XML_PARSER_LOADDTD, 1);
+	if (relaxng != NULL) {
+	    if (timing) {
+		startTimer();
+	    }
+	    ret = xmlTextReaderRelaxNGValidate(reader, relaxng);
+	    if (ret < 0) {
+		xmlGenericError(xmlGenericErrorContext,
+			"Relax-NG schema %s failed to compile\n", relaxng);
+		relaxng = NULL;
+	    }
+	    if (timing) {
+		endTimer("Compiling the schemas");
+	    }
+	}
 
 	/*
 	 * Process all nodes in sequence
 	 */
+	if (timing) {
+	    startTimer();
+	}
 	ret = xmlTextReaderRead(reader);
 	while (ret == 1) {
 	    if (debug)
 		processNode(reader);
 	    ret = xmlTextReaderRead(reader);
 	}
+	if (timing) {
+	    if ((valid) || (relaxng != NULL))
+		endTimer("Parsing and validating");
+	    else
+		endTimer("Parsing");
+	}
 
 	if (valid) {
 	    if (xmlTextReaderIsValid(reader) != 1) {
@@ -1502,9 +1525,11 @@
     }
 
 #ifdef LIBXML_SCHEMAS_ENABLED
-    if (relaxng != NULL) {
+    if ((relaxng != NULL) && (stream == 0)) {
 	xmlRelaxNGParserCtxtPtr ctxt;
 
+        /* forces loading the DTDs */
+        xmlLoadExtDtdDefaultValue |= 1; 
 	if (timing) {
 	    startTimer();
 	}
@@ -1514,6 +1539,11 @@
 		(xmlRelaxNGValidityWarningFunc) fprintf,
 		stderr);
 	relaxngschemas = xmlRelaxNGParse(ctxt);
+	if (relaxngschemas == NULL) {
+	    xmlGenericError(xmlGenericErrorContext,
+		    "Relax-NG schema %s failed to compile\n", relaxng);
+	    relaxng = NULL;
+	}
 	xmlRelaxNGFreeParserCtxt(ctxt);
 	if (timing) {
 	    endTimer("Compiling the schemas");