fixing bug #108976 get the ID/REFs to reference the ID in the document

* parser.c: fixing bug #108976 get the ID/REFs to reference
  the ID in the document content and not in the entity copy
* SAX.c include/libxml/parser.h: more checking of the ID/REF
  stuff, better solution for #107208
* xmlregexp.c: removed a direct printf, dohhh
* xmlreader.c: fixed a bug on streaming validation of empty
  elements in entities
* result/VC/ElementValid8 test/VCM/v20.xml result/valid/xhtml1.xhtml:
  cleanup of the validation tests
* test/valid/id* test/valid/dtds/destfoo.ent result/valid/id*:
  added more ID/IDREF tests to the suite
Daniel
diff --git a/ChangeLog b/ChangeLog
index 4f70eec..082735b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+Sun Mar 23 12:57:00 CET 2003 Daniel Veillard <daniel@veillard.com>
+
+	* parser.c: fixing bug #108976 get the ID/REFs to reference 
+	  the ID in the document content and not in the entity copy
+	* SAX.c include/libxml/parser.h: more checking of the ID/REF
+	  stuff, better solution for #107208
+	* xmlregexp.c: removed a direct printf, dohhh
+        * xmlreader.c: fixed a bug on streaming validation of empty 
+	  elements in entities
+	* result/VC/ElementValid8 test/VCM/v20.xml result/valid/xhtml1.xhtml:
+	  cleanup of the validation tests
+	* test/valid/id* test/valid/dtds/destfoo.ent result/valid/id*:
+	  added more ID/IDREF tests to the suite
+
 Sat Mar 22 23:38:08 CET 2003 Daniel Veillard <daniel@veillard.com>
 
 	* xmlreader.c: fixed #107043 removing 2 warnings with Sun One
diff --git a/SAX.c b/SAX.c
index 3ee76e7..6e3cba1 100644
--- a/SAX.c
+++ b/SAX.c
@@ -880,7 +880,7 @@
 		                          0,0,0);
 	    ctxt->depth--;
 	} else {
-	    val = value;
+	    val = (xmlChar *) value;
 	}
 
 	if (val[0] != 0) {
@@ -932,7 +932,7 @@
 		                          0,0,0);
 	    ctxt->depth--;
 	} else {
-	    val = value;
+	    val = (xmlChar *) value;
 	}
 
 	if (val[0] == 0) {
@@ -1068,9 +1068,9 @@
 	    ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
 					       ctxt->node, ret, value);
 	}
-    } else if ((((ctxt->replaceEntities == 0) && (ctxt->external != 2)) ||
-	        ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) &&
-	       (ctxt->depth == 0)) {
+    } else if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) &&
+	       (((ctxt->replaceEntities == 0) && (ctxt->external != 2)) ||
+	        ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0)))) {
         /*
 	 * when validating, the ID registration is done at the attribute
 	 * validation level. Otherwise we have to do specific handling here.
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index e6725a3..d221825 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -134,6 +134,14 @@
 #define XML_COMPLETE_ATTRS	4
 
 /**
+ * XML_SKIP_IDS:
+ *
+ * Bit in the loadsubset context field to tell to not do ID/REFs registration.
+ * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
+ */
+#define XML_SKIP_IDS		8
+
+/**
  * xmlParserCtxt:
  *
  * The parser context.
diff --git a/parser.c b/parser.c
index f29d87d..1a53b7f 100644
--- a/parser.c
+++ b/parser.c
@@ -5620,24 +5620,57 @@
 		     * a simple tree copy for all references except the first
 		     * In the first occurrence list contains the replacement
 		     */
-		    if (list == NULL) {
-			xmlNodePtr new = NULL, cur, firstChild = NULL;
+		    if ((list == NULL) && (ent->owner == 0)) {
+			xmlNodePtr nw = NULL, cur, firstChild = NULL;
 			cur = ent->children;
 			while (cur != NULL) {
-			    new = xmlCopyNode(cur, 1);
-			    if (new != NULL) {
-				new->_private = cur->_private;
+			    nw = xmlCopyNode(cur, 1);
+			    if (nw != NULL) {
+				nw->_private = cur->_private;
 				if (firstChild == NULL){
-				    firstChild = new;
+				    firstChild = nw;
 				}
-				xmlAddChild(ctxt->node, new);
+				xmlAddChild(ctxt->node, nw);
 			    }
 			    if (cur == ent->last)
 				break;
 			    cur = cur->next;
 			}
 			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)			      
-			  xmlAddEntityReference(ent, firstChild, new);
+			  xmlAddEntityReference(ent, firstChild, nw);
+		    } else if (list == NULL) {
+			xmlNodePtr nw = NULL, cur, next, last,
+			           firstChild = NULL;
+			/*
+			 * Copy the entity child list and make it the new
+			 * entity child list. The goal is to make sure any
+			 * ID or REF referenced will be the one from the
+			 * document content and not the entity copy.
+			 */
+			cur = ent->children;
+			ent->children = NULL;
+			last = ent->last;
+			ent->last = NULL;
+			while (cur != NULL) {
+			    next = cur->next;
+			    cur->next = NULL;
+			    cur->parent = NULL;
+			    nw = xmlCopyNode(cur, 1);
+			    if (nw != NULL) {
+				nw->_private = cur->_private;
+				if (firstChild == NULL){
+				    firstChild = cur;
+				}
+				xmlAddChild((xmlNodePtr) ent, nw);
+				xmlAddChild(ctxt->node, cur);
+			    }
+			    if (cur == last)
+				break;
+			    cur = next;
+			}
+			ent->owner = 1;
+			if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)			      
+			  xmlAddEntityReference(ent, firstChild, nw);
 		    } else {
 			/*
 			 * the name change is to avoid coalescing of the
@@ -9976,6 +10009,12 @@
 
     ctxt->validate = 0;
     ctxt->loadsubset = oldctxt->loadsubset;
+    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
+	/*
+	 * ID/IDREF registration will be done in xmlValidateElement below
+	 */
+	ctxt->loadsubset |= XML_SKIP_IDS;
+    }
 
     xmlParseContent(ctxt);
     if ((RAW == '<') && (NXT(1) == '/')) {
diff --git a/result/VC/ElementValid8 b/result/VC/ElementValid8
index 02072ab..73840df 100644
--- a/result/VC/ElementValid8
+++ b/result/VC/ElementValid8
@@ -1,3 +1,3 @@
-./test/VC/ElementValid8:7: validity warning: Content model for Element doc is ambiguous
+./test/VC/ElementValid8:7: validity error: Content model of doc is not determinist: ((a , b) | (a , c))
 <doc><a/><c> doc is non-deterministic </c></doc>
                                                ^
diff --git a/result/valid/id1.xml b/result/valid/id1.xml
new file mode 100644
index 0000000..4f0b9f7
--- /dev/null
+++ b/result/valid/id1.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (src | dest)*>
+<!ELEMENT src EMPTY>
+<!ELEMENT dest EMPTY>
+<!ATTLIST src ref IDREF #IMPLIED>
+<!ATTLIST dest id ID #IMPLIED>
+]>
+<doc>
+  <src ref="foo"/>
+  <dest id="foo"/>
+  <src ref="foo"/>
+</doc>
diff --git a/result/valid/id1.xml.err b/result/valid/id1.xml.err
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/result/valid/id1.xml.err
diff --git a/result/valid/id2.xml b/result/valid/id2.xml
new file mode 100644
index 0000000..0cef4a6
--- /dev/null
+++ b/result/valid/id2.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (src | dest)*>
+<!ELEMENT src EMPTY>
+<!ELEMENT dest EMPTY>
+<!ATTLIST src ref IDREF #IMPLIED>
+<!ATTLIST dest id ID #IMPLIED>
+<!ENTITY dest "<dest id='foo'/>">
+]>
+<doc>
+  <src ref="foo"/>
+  &dest;
+  <src ref="foo"/>
+</doc>
diff --git a/result/valid/id2.xml.err b/result/valid/id2.xml.err
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/result/valid/id2.xml.err
diff --git a/result/valid/id3.xml b/result/valid/id3.xml
new file mode 100644
index 0000000..623603c
--- /dev/null
+++ b/result/valid/id3.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+<!DOCTYPE doc [
+<!ELEMENT doc (src | dest)*>
+<!ELEMENT src EMPTY>
+<!ELEMENT dest EMPTY>
+<!ATTLIST src ref IDREF #IMPLIED>
+<!ATTLIST dest id ID #IMPLIED>
+<!ENTITY dest SYSTEM "dtds/destfoo.ent">
+]>
+<doc>
+  <src ref="foo"/>
+  &dest;
+  <src ref="foo"/>
+</doc>
diff --git a/result/valid/id3.xml.err b/result/valid/id3.xml.err
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/result/valid/id3.xml.err
diff --git a/result/valid/xhtml1.xhtml b/result/valid/xhtml1.xhtml
index 1e9ac6f..58d20da 100644
--- a/result/valid/xhtml1.xhtml
+++ b/result/valid/xhtml1.xhtml
@@ -6,7 +6,7 @@
 <title>XHTML 1.0: The Extensible HyperText Markup
 Language</title>
 <link rel="stylesheet" href="W3C-PR.css" type="text/css" />
-<style type="text/css"><![CDATA[
+<style type="text/css">
 span.term { font-style: italic; color: rgb(0, 0, 192) }
 code {
 	color: green;
@@ -49,7 +49,7 @@
 }
 .tocline { list-style: none; }
 table.exceptions { background-color: rgb(255,255,153); }
-]]></style>
+</style>
 </head>
 <body>
 <div class="navbar">
diff --git a/test/VCM/v20.xml b/test/VCM/v20.xml
index a337efa..a9b0529 100644
--- a/test/VCM/v20.xml
+++ b/test/VCM/v20.xml
@@ -1,10 +1,10 @@
 <!DOCTYPE doc [
-<!ELEMENT doc ((a | b)*, a, b) >
+<!ELEMENT doc ((a | b)*, c, b) >
 <!ELEMENT a EMPTY>
 <!ELEMENT b EMPTY>
 <!ELEMENT c EMPTY>
 ]>
 <doc>
-<a/>
+<c/>
 <b/>
 </doc>
diff --git a/test/valid/dtds/destfoo.ent b/test/valid/dtds/destfoo.ent
new file mode 100644
index 0000000..0791e1a
--- /dev/null
+++ b/test/valid/dtds/destfoo.ent
@@ -0,0 +1 @@
+<dest id='foo'/>
diff --git a/test/valid/id1.xml b/test/valid/id1.xml
new file mode 100644
index 0000000..7390f5e
--- /dev/null
+++ b/test/valid/id1.xml
@@ -0,0 +1,13 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (src | dest)*>
+<!ELEMENT src EMPTY>
+<!ELEMENT dest EMPTY>
+<!ATTLIST src ref IDREF #IMPLIED>
+<!ATTLIST dest id ID #IMPLIED>
+]>
+<doc>
+  <src ref="foo"/>
+  <dest id="foo"/>
+  <src ref="foo"/>
+</doc>
+
diff --git a/test/valid/id2.xml b/test/valid/id2.xml
new file mode 100644
index 0000000..5b4a77a
--- /dev/null
+++ b/test/valid/id2.xml
@@ -0,0 +1,14 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (src | dest)*>
+<!ELEMENT src EMPTY>
+<!ELEMENT dest EMPTY>
+<!ATTLIST src ref IDREF #IMPLIED>
+<!ATTLIST dest id ID #IMPLIED>
+<!ENTITY dest "<dest id='foo'/>">
+]>
+<doc>
+  <src ref="foo"/>
+  &dest;
+  <src ref="foo"/>
+</doc>
+
diff --git a/test/valid/id3.xml b/test/valid/id3.xml
new file mode 100644
index 0000000..933d06d
--- /dev/null
+++ b/test/valid/id3.xml
@@ -0,0 +1,14 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (src | dest)*>
+<!ELEMENT src EMPTY>
+<!ELEMENT dest EMPTY>
+<!ATTLIST src ref IDREF #IMPLIED>
+<!ATTLIST dest id ID #IMPLIED>
+<!ENTITY dest SYSTEM "dtds/destfoo.ent">
+]>
+<doc>
+  <src ref="foo"/>
+  &dest;
+  <src ref="foo"/>
+</doc>
+
diff --git a/valid.c b/valid.c
index 599a818..ee2e678 100644
--- a/valid.c
+++ b/valid.c
@@ -5170,6 +5170,7 @@
     xmlElementPtr eDecl;
     int extsubset = 0;
 
+/* printf("PushElem %s\n", qname); */
     if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) {
 	xmlValidStatePtr state = ctxt->vstate;
 	xmlElementPtr elemDecl;
@@ -5257,6 +5258,7 @@
 xmlValidatePushCData(xmlValidCtxtPtr ctxt, const xmlChar *data, int len) {
     int ret = 1;
 
+/* printf("CDATA %s %d\n", data, len); */
     if (len <= 0)
 	return(ret);
     if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) {
@@ -5330,6 +5332,7 @@
 		      const xmlChar *qname ATTRIBUTE_UNUSED) {
     int ret = 1;
 
+/* printf("PopElem %s\n", qname); */
     if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) {
 	xmlValidStatePtr state = ctxt->vstate;
 	xmlElementPtr elemDecl;
diff --git a/xmlreader.c b/xmlreader.c
index 31bdcef..d696bd8 100644
--- a/xmlreader.c
+++ b/xmlreader.c
@@ -524,6 +524,8 @@
 	if (node->children != NULL) {
 	    node = node->children;
 	    continue;
+	} else if (node->type == XML_ELEMENT_NODE) {
+	    xmlTextReaderValidatePop(reader);
 	}
 	if (node->next != NULL) {
 	    node = node->next;
diff --git a/xmlregexp.c b/xmlregexp.c
index 481837b..c49f2a6 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -455,7 +455,6 @@
 		prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
 		if (prev != 0) {
 		    if (prev != targetno + 1) {
-			printf("not determinist\n");
 			ret->determinist = 0;
 #ifdef DEBUG_COMPACTION
 			printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",