another entity processing update from Markus Henke Daniel

* tree.c: another entity processing update from Markus Henke
Daniel
diff --git a/ChangeLog b/ChangeLog
index 87789fc..2569fb5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Fri Apr 19 18:26:04 CEST 2002 Daniel Veillard <daniel@veillard.com>
+
+	* tree.c: another entity processing update from Markus Henke
+
 Fri Apr 19 17:14:24 CEST 2002 Bjorn Reese <breese@users.sourceforge.net>
 
 	* trionan.c: fixed crash on OSF/1
diff --git a/include/libxml/xmlautomata.h b/include/libxml/xmlautomata.h
index 853acc4..c7d5b05 100644
--- a/include/libxml/xmlautomata.h
+++ b/include/libxml/xmlautomata.h
@@ -60,6 +60,16 @@
 						 int min,
 						 int max,
 						 void *data);
+xmlAutomataStatePtr	xmlAutomataNewOnceTrans	(xmlAutomataPtr am,
+						 xmlAutomataStatePtr from,
+						 xmlAutomataStatePtr to,
+						 const xmlChar *token,
+						 int min,
+						 int max,
+						 void *data);
+xmlAutomataStatePtr	xmlAutomataNewAllTrans	(xmlAutomataPtr am,
+						 xmlAutomataStatePtr from,
+						 xmlAutomataStatePtr to);
 xmlAutomataStatePtr	xmlAutomataNewEpsilon	(xmlAutomataPtr am,
 						 xmlAutomataStatePtr from,
 						 xmlAutomataStatePtr to);
diff --git a/tree.c b/tree.c
index 086cb16..e5a4958 100644
--- a/tree.c
+++ b/tree.c
@@ -902,56 +902,73 @@
  * Returns a pointer to the string copy, the caller must free it.
  */
 xmlChar *
-xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
+xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
+{
     xmlNodePtr node = list;
     xmlChar *ret = NULL;
     xmlEntityPtr ent;
 
-    if (list == NULL) return(NULL);
+    if (list == NULL)
+        return (NULL);
 
     while (node != NULL) {
         if ((node->type == XML_TEXT_NODE) ||
-	    (node->type == XML_CDATA_SECTION_NODE)) {
-	    if (inLine) {
-		ret = xmlStrcat(ret, node->content);
-	    } else {
-	        xmlChar *buffer;
-
-		buffer = xmlEncodeEntitiesReentrant(doc, node->content);
-		if (buffer != NULL) {
-		    ret = xmlStrcat(ret, buffer);
-		    xmlFree(buffer);
-		}
-            }
-	} else if (node->type == XML_ENTITY_REF_NODE) {
-	    if (inLine) {
-		ent = xmlGetDocEntity(doc, node->name);
-		if (ent != NULL)
-		    ret = xmlStrcat(ret, ent->content);
-		else {
-		    ret = xmlStrcat(ret, node->content);
-		}    
+            (node->type == XML_CDATA_SECTION_NODE)) {
+            if (inLine) {
+                ret = xmlStrcat(ret, node->content);
             } else {
-	        xmlChar buf[2];
-		buf[0] = '&'; buf[1] = 0;
-		ret = xmlStrncat(ret, buf, 1);
-		ret = xmlStrcat(ret, node->name);
-		buf[0] = ';'; buf[1] = 0;
-		ret = xmlStrncat(ret, buf, 1);
-	    }
-	}
-#if 0
-	else {
-	    xmlGenericError(xmlGenericErrorContext,
-		    "xmlGetNodeListString : invalid node type %d\n",
-	            node->type);
-	}
-#endif
-	node = node->next;
-    }
-    return(ret);
-}
+                xmlChar *buffer;
 
+                buffer = xmlEncodeEntitiesReentrant(doc, node->content);
+                if (buffer != NULL) {
+                    ret = xmlStrcat(ret, buffer);
+                    xmlFree(buffer);
+                }
+            }
+        } else if (node->type == XML_ENTITY_REF_NODE) {
+            if (inLine) {
+                ent = xmlGetDocEntity(doc, node->name);
+                if (ent != NULL) {
+                    xmlChar *buffer;
+
+                    /* an entity content can be any "well balanced chunk",
+                     * i.e. the result of the content [43] production:
+                     * http://www.w3.org/TR/REC-xml#NT-content.
+                     * So it can contain text, CDATA section or nested
+                     * entity reference nodes (among others).
+                     * -> we recursive  call xmlNodeListGetString()
+                     * which handles these types */
+                    buffer = xmlNodeListGetString(doc, ent->children, 1);
+                    if (buffer != NULL) {
+                        ret = xmlStrcat(ret, buffer);
+                        xmlFree(buffer);
+                    }
+                } else {
+                    ret = xmlStrcat(ret, node->content);
+                }
+            } else {
+                xmlChar buf[2];
+
+                buf[0] = '&';
+                buf[1] = 0;
+                ret = xmlStrncat(ret, buf, 1);
+                ret = xmlStrcat(ret, node->name);
+                buf[0] = ';';
+                buf[1] = 0;
+                ret = xmlStrncat(ret, buf, 1);
+            }
+        }
+#if 0
+        else {
+            xmlGenericError(xmlGenericErrorContext,
+                            "xmlGetNodeListString : invalid node type %d\n",
+                            node->type);
+        }
+#endif
+        node = node->next;
+    }
+    return (ret);
+}
 /**
  * xmlNodeListGetRawString:
  * @doc:  the document
@@ -965,54 +982,73 @@
  * Returns a pointer to the string copy, the caller must free it.
  */
 xmlChar *
-xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
+xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine)
+{
     xmlNodePtr node = list;
     xmlChar *ret = NULL;
     xmlEntityPtr ent;
 
-    if (list == NULL) return(NULL);
+    if (list == NULL)
+        return (NULL);
 
     while (node != NULL) {
         if ((node->type == XML_TEXT_NODE) ||
-	    (node->type == XML_CDATA_SECTION_NODE)) {
-	    if (inLine) {
-		ret = xmlStrcat(ret, node->content);
-	    } else {
-	        xmlChar *buffer;
-
-		buffer = xmlEncodeSpecialChars(doc, node->content);
-		if (buffer != NULL) {
-		    ret = xmlStrcat(ret, buffer);
-		    xmlFree(buffer);
-		}
-            }
-	} else if (node->type == XML_ENTITY_REF_NODE) {
-	    if (inLine) {
-		ent = xmlGetDocEntity(doc, node->name);
-		if (ent != NULL)
-		    ret = xmlStrcat(ret, ent->content);
-		else {
-		    ret = xmlStrcat(ret, node->content);
-		}    
+            (node->type == XML_CDATA_SECTION_NODE)) {
+            if (inLine) {
+                ret = xmlStrcat(ret, node->content);
             } else {
-	        xmlChar buf[2];
-		buf[0] = '&'; buf[1] = 0;
-		ret = xmlStrncat(ret, buf, 1);
-		ret = xmlStrcat(ret, node->name);
-		buf[0] = ';'; buf[1] = 0;
-		ret = xmlStrncat(ret, buf, 1);
-	    }
-	}
+                xmlChar *buffer;
+
+                buffer = xmlEncodeSpecialChars(doc, node->content);
+                if (buffer != NULL) {
+                    ret = xmlStrcat(ret, buffer);
+                    xmlFree(buffer);
+                }
+            }
+        } else if (node->type == XML_ENTITY_REF_NODE) {
+            if (inLine) {
+                ent = xmlGetDocEntity(doc, node->name);
+                if (ent != NULL) {
+                    xmlChar *buffer;
+
+                    /* an entity content can be any "well balanced chunk",
+                     * i.e. the result of the content [43] production:
+                     * http://www.w3.org/TR/REC-xml#NT-content.
+                     * So it can contain text, CDATA section or nested
+                     * entity reference nodes (among others).
+                     * -> we recursive  call xmlNodeListGetRawString()
+                     * which handles these types */
+                    buffer =
+                        xmlNodeListGetRawString(doc, ent->children, 1);
+                    if (buffer != NULL) {
+                        ret = xmlStrcat(ret, buffer);
+                        xmlFree(buffer);
+                    }
+                } else {
+                    ret = xmlStrcat(ret, node->content);
+                }
+            } else {
+                xmlChar buf[2];
+
+                buf[0] = '&';
+                buf[1] = 0;
+                ret = xmlStrncat(ret, buf, 1);
+                ret = xmlStrcat(ret, node->name);
+                buf[0] = ';';
+                buf[1] = 0;
+                ret = xmlStrncat(ret, buf, 1);
+            }
+        }
 #if 0
-	else {
-	    xmlGenericError(xmlGenericErrorContext,
-		    "xmlGetNodeListString : invalid node type %d\n",
-	            node->type);
-	}
+        else {
+            xmlGenericError(xmlGenericErrorContext,
+                            "xmlGetNodeListString : invalid node type %d\n",
+                            node->type);
+        }
 #endif
-	node = node->next;
+        node = node->next;
     }
-    return(ret);
+    return (ret);
 }
 
 /**
@@ -3763,122 +3799,159 @@
  *     It's up to the caller to free the memory.
  */
 xmlChar *
-xmlNodeGetContent(xmlNodePtr cur) {
-    if (cur == NULL) return(NULL);
+xmlNodeGetContent(xmlNodePtr cur)
+{
+    if (cur == NULL)
+        return (NULL);
     switch (cur->type) {
         case XML_DOCUMENT_FRAG_NODE:
-        case XML_ELEMENT_NODE: {
-	    xmlNodePtr tmp = cur;
-	    xmlBufferPtr buffer;
-	    xmlChar *ret;
+        case XML_ELEMENT_NODE:{
+                xmlNodePtr tmp = cur;
+                xmlBufferPtr buffer;
+                xmlChar *ret;
 
-            buffer = xmlBufferCreate();
-	    if (buffer == NULL)
-		return(NULL);
-	    while (tmp != NULL) {
-		switch (tmp->type) {
-		    case XML_CDATA_SECTION_NODE:
-		    case XML_TEXT_NODE:
-			if (tmp->content != NULL)
-			    xmlBufferCat(buffer, tmp->content);
-			break;
-		    case XML_ENTITY_REF_NODE: {
-		        xmlEntityPtr ent;
+                buffer = xmlBufferCreate();
+                if (buffer == NULL)
+                    return (NULL);
+                while (tmp != NULL) {
+                    switch (tmp->type) {
+                        case XML_CDATA_SECTION_NODE:
+                        case XML_TEXT_NODE:
+                            if (tmp->content != NULL)
+                                xmlBufferCat(buffer, tmp->content);
+                            break;
+                        case XML_ENTITY_REF_NODE:{
+                                /* recursive substitution of entity references */
+                                xmlChar *cont = xmlNodeGetContent(tmp);
 
-			ent = xmlGetDocEntity(cur->doc, tmp->name);
-			if (ent != NULL)
-			    xmlBufferCat(buffer, ent->content);
-		    }
-		    default:
-			break;
-		}
-		/*
-		 * Skip to next node
-		 */
-		if (tmp->children != NULL) {
-		    if (tmp->children->type != XML_ENTITY_DECL) {
-			tmp = tmp->children;
-			continue;
-		    }
-		}
-		if (tmp == cur)
-		    break;
+                                if (cont) {
+                                    xmlBufferCat(buffer,
+                                                 (const xmlChar *) cont);
+                                    xmlFree(cont);
+                                }
+                                break;
+                            }
+                        default:
+                            break;
+                    }
+                    /*
+                     * Skip to next node
+                     */
+                    if (tmp->children != NULL) {
+                        if (tmp->children->type != XML_ENTITY_DECL) {
+                            tmp = tmp->children;
+                            continue;
+                        }
+                    }
+                    if (tmp == cur)
+                        break;
 
-		if (tmp->next != NULL) {
-		    tmp = tmp->next;
-		    continue;
-		}
-		
-		do {
-		    tmp = tmp->parent;
-		    if (tmp == NULL)
-			break;
-		    if (tmp == cur) {
-			tmp = NULL;
-			break;
-		    }
-		    if (tmp->next != NULL) {
-			tmp = tmp->next;
-			break;
-		    }
-		} while (tmp != NULL);
-	    }
-	    ret = buffer->content;
-	    buffer->content = NULL;
-	    xmlBufferFree(buffer);
-	    return(ret);
-        }
-        case XML_ATTRIBUTE_NODE: {
-	    xmlAttrPtr attr = (xmlAttrPtr) cur;
-	    if (attr->parent != NULL)
-		return(xmlNodeListGetString(attr->parent->doc, attr->children, 1));
-	    else
-		return(xmlNodeListGetString(NULL, attr->children, 1));
-	    break;
-	}
+                    if (tmp->next != NULL) {
+                        tmp = tmp->next;
+                        continue;
+                    }
+
+                    do {
+                        tmp = tmp->parent;
+                        if (tmp == NULL)
+                            break;
+                        if (tmp == cur) {
+                            tmp = NULL;
+                            break;
+                        }
+                        if (tmp->next != NULL) {
+                            tmp = tmp->next;
+                            break;
+                        }
+                    } while (tmp != NULL);
+                }
+                ret = buffer->content;
+                buffer->content = NULL;
+                xmlBufferFree(buffer);
+                return (ret);
+            }
+        case XML_ATTRIBUTE_NODE:{
+                xmlAttrPtr attr = (xmlAttrPtr) cur;
+
+                if (attr->parent != NULL)
+                    return (xmlNodeListGetString
+                            (attr->parent->doc, attr->children, 1));
+                else
+                    return (xmlNodeListGetString(NULL, attr->children, 1));
+                break;
+            }
         case XML_COMMENT_NODE:
         case XML_PI_NODE:
-	    if (cur->content != NULL)
-	        return(xmlStrdup(cur->content));
-	    return(NULL);
-        case XML_ENTITY_REF_NODE:
-	    /*
-	     * Locate the entity, and get it's content
-	     * @@@
-	     */
-            return(NULL);
+            if (cur->content != NULL)
+                return (xmlStrdup(cur->content));
+            return (NULL);
+        case XML_ENTITY_REF_NODE:{
+                xmlEntityPtr ent;
+                xmlNodePtr tmp;
+                xmlBufferPtr buffer;
+                xmlChar *ret;
+
+                /* lookup entity declaration */
+                ent = xmlGetDocEntity(cur->doc, cur->name);
+                if (ent == NULL)
+                    return (NULL);
+
+                buffer = xmlBufferCreate();
+                if (buffer == NULL)
+                    return (NULL);
+
+                /* an entity content can be any "well balanced chunk",
+                 * i.e. the result of the content [43] production:
+                 * http://www.w3.org/TR/REC-xml#NT-content
+                 * -> we iterate through child nodes and recursive call
+                 * xmlNodeGetContent() which handles all possible node types */
+                tmp = ent->children;
+                while (tmp) {
+                    xmlChar *cont = xmlNodeGetContent(tmp);
+
+                    if (cont) {
+                        xmlBufferCat(buffer, (const xmlChar *) cont);
+                        xmlFree(cont);
+                    }
+                    tmp = tmp->next;
+                }
+
+                ret = buffer->content;
+                buffer->content = NULL;
+                xmlBufferFree(buffer);
+                return (ret);
+            }
         case XML_ENTITY_NODE:
         case XML_DOCUMENT_NODE:
         case XML_HTML_DOCUMENT_NODE:
         case XML_DOCUMENT_TYPE_NODE:
         case XML_NOTATION_NODE:
         case XML_DTD_NODE:
-	case XML_XINCLUDE_START:
-	case XML_XINCLUDE_END:
+        case XML_XINCLUDE_START:
+        case XML_XINCLUDE_END:
 #ifdef LIBXML_DOCB_ENABLED
-	case XML_DOCB_DOCUMENT_NODE:
+        case XML_DOCB_DOCUMENT_NODE:
 #endif
-	    return(NULL);
-	case XML_NAMESPACE_DECL:
-	    return(xmlStrdup(((xmlNsPtr)cur)->href));
+            return (NULL);
+        case XML_NAMESPACE_DECL:
+            return (xmlStrdup(((xmlNsPtr) cur)->href));
         case XML_ELEMENT_DECL:
-	    /* TODO !!! */
-	    return(NULL);
+            /* TODO !!! */
+            return (NULL);
         case XML_ATTRIBUTE_DECL:
-	    /* TODO !!! */
-	    return(NULL);
+            /* TODO !!! */
+            return (NULL);
         case XML_ENTITY_DECL:
-	    /* TODO !!! */
-	    return(NULL);
+            /* TODO !!! */
+            return (NULL);
         case XML_CDATA_SECTION_NODE:
         case XML_TEXT_NODE:
-	    if (cur->content != NULL)
-	        return(xmlStrdup(cur->content));
-            return(NULL);
+            if (cur->content != NULL)
+                return (xmlStrdup(cur->content));
+            return (NULL);
     }
-    return(NULL);
+    return (NULL);
 }
- 
 /**
  * xmlNodeSetContent:
  * @cur:  the node being modified
diff --git a/xmlregexp.c b/xmlregexp.c
index 31657ea..5d63c90 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -109,6 +109,8 @@
     XML_REGEXP_QUANT_OPT,
     XML_REGEXP_QUANT_MULT,
     XML_REGEXP_QUANT_PLUS,
+    XML_REGEXP_QUANT_ONCEONLY,
+    XML_REGEXP_QUANT_ALL,
     XML_REGEXP_QUANT_RANGE
 } xmlRegQuantType;
 
@@ -279,6 +281,8 @@
 
 };
 
+#define REGEXP_ALL_COUNTER 0x123456
+
 static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
 
 /************************************************************************
@@ -630,6 +634,10 @@
 	    fprintf(output, "+ "); break;
 	case XML_REGEXP_QUANT_RANGE:
 	    fprintf(output, "range "); break;
+	case XML_REGEXP_QUANT_ONCEONLY:
+	    fprintf(output, "onceonly "); break;
+	case XML_REGEXP_QUANT_ALL:
+	    fprintf(output, "all "); break;
     }
 }
 static void
@@ -943,6 +951,24 @@
 }
 
 /**
+ * xmlFAGenerateAllTransition:
+ * ctxt:  a regexp parser context
+ * from:  the from state
+ * to:  the target state or NULL for building a new one
+ *
+ */
+static void
+xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
+			   xmlRegStatePtr from, xmlRegStatePtr to) {
+    if (to == NULL) {
+	to = xmlRegNewState(ctxt);
+	xmlRegStatePush(ctxt, to);
+	ctxt->state = to;
+    }
+    xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
+}
+
+/**
  * xmlFAGenerateEpsilonTransition:
  * ctxt:  a regexp parser context
  * from:  the from state
@@ -3424,6 +3450,69 @@
 }
 
 /**
+ * xmlAutomataNewOnceTrans:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ * @token: the input string associated to that transition
+ * @min:  the minimum successive occurences of token
+ * @min:  the maximum successive occurences of token
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a transition from the @from state to the target state
+ * activated by a succession of input of value @token and whose number
+ * is between @min and @max, moreover that transistion can only be crossed
+ * once.
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
+			 xmlAutomataStatePtr to, const xmlChar *token,
+			 int min, int max, void *data) {
+    xmlRegAtomPtr atom;
+    int counter;
+
+    if ((am == NULL) || (from == NULL) || (token == NULL))
+	return(NULL);
+    if (min < 1)
+	return(NULL);
+    if ((max < min) || (max < 1))
+	return(NULL);
+    atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
+    if (atom == NULL)
+	return(NULL);
+    atom->valuep = xmlStrdup(token);
+    atom->data = data;
+    atom->quant = XML_REGEXP_QUANT_ONCEONLY;
+    if (min == 0)
+	atom->min = 1;
+    else
+	atom->min = min;
+    atom->max = max;
+    /*
+     * associate a counter to the transition.
+     */
+    counter = xmlRegGetCounter(am);
+    am->counters[counter].min = 1;
+    am->counters[counter].max = 1;
+
+    /* xmlFAGenerateTransitions(am, from, to, atom); */
+    if (to == NULL) {
+	to = xmlRegNewState(am);
+	xmlRegStatePush(am, to);
+    }
+    xmlRegStateAddTrans(am, from, atom, to, counter, -1);
+    xmlRegAtomPush(am, atom);
+    am->state = to;
+    if (to == NULL)
+	to = am->state;
+    if (to == NULL)
+	return(NULL);
+    return(to);
+}
+
+/**
  * xmlAutomataNewState:
  * @am: an automata
  *
@@ -3466,6 +3555,30 @@
 }
 
 /**
+ * xmlAutomataNewAllTrans:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a an ALL transition from the @from state to the
+ * target state. That transition is an epsilon transition allowed only when
+ * all transitions from the @from node have been activated.
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
+		      xmlAutomataStatePtr to) {
+    if ((am == NULL) || (from == NULL))
+	return(NULL);
+    xmlFAGenerateAllTransition(am, from, to);
+    if (to == NULL)
+	return(am->state);
+    return(to);
+}
+
+/**
  * xmlAutomataNewCounter:
  * @am: an automata
  * @min:  the minimal value on the counter
diff --git a/xmlschemas.c b/xmlschemas.c
index 8cb9400..1926063 100644
--- a/xmlschemas.c
+++ b/xmlschemas.c
@@ -2119,7 +2119,7 @@
     if (type == NULL)
         return (NULL);
     type->node = node;
-    type->type = XML_SCHEMA_TYPE_SEQUENCE;
+    type->type = XML_SCHEMA_TYPE_ALL;
     type->id = xmlGetProp(node, BAD_CAST "id");
     type->minOccurs = xmlGetMinOccurs(ctxt, node);
     type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
@@ -3037,6 +3037,26 @@
 	    break;
 	}
 	case XML_SCHEMA_TYPE_ALL: {
+	    xmlAutomataStatePtr end;
+	    xmlAutomataStatePtr start;
+	    xmlSchemaTypePtr subtypes;
+	    xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type;
+
+	    subtypes = type->subtypes;
+	    if (subtypes == NULL)
+		break;
+	    start = ctxt->state;
+	    while (subtypes != NULL) {
+		ctxt->state = start;
+		elem = (xmlSchemaElementPtr) subtypes;
+
+		/* TODO : handle the namespace too */
+		xmlAutomataNewOnceTrans(ctxt->am, ctxt->state, ctxt->state,
+			        elem->name, elem->minOccurs, elem->maxOccurs,
+				subtypes);
+		subtypes = subtypes->next;
+	    }
+	    ctxt->state = xmlAutomataNewAllTrans(ctxt->am, ctxt->state, NULL);
 	    TODO
 	    break;
 	}