Starting work precompiling parts of RelaxNG schemas. Not plugged onto

* relaxng.c xmlregexp.c include/libxml/xmlautomata.h
  include/libxml/xmlregexp.h: Starting work precompiling
  parts of RelaxNG schemas. Not plugged onto validity checking
  yet, just the regexp building part. Needed to extend some
  of the automata and regexp APIs.
Daniel
diff --git a/relaxng.c b/relaxng.c
index 6217904..4deb647 100644
--- a/relaxng.c
+++ b/relaxng.c
@@ -130,12 +130,14 @@
     XML_RELAXNG_START		/* Used to keep track of starts on grammars */
 } xmlRelaxNGType;
 
-#define IS_NULLABLE		1
-#define IS_NOT_NULLABLE		2
-#define IS_INDETERMINIST	4
-#define IS_MIXED		8
-#define IS_TRIABLE		16
-#define IS_PROCESSED		32
+#define IS_NULLABLE		(1 << 0)
+#define IS_NOT_NULLABLE		(1 << 1)
+#define IS_INDETERMINIST	(1 << 2)
+#define IS_MIXED		(1 << 3)
+#define IS_TRIABLE		(1 << 4)
+#define IS_PROCESSED		(1 << 5)
+#define IS_COMPILABLE		(1 << 6)
+#define IS_NOT_COMPILABLE	(1 << 7)
 
 struct _xmlRelaxNGDefine {
     xmlRelaxNGType type;	/* the type of definition */
@@ -152,6 +154,7 @@
     xmlRelaxNGDefinePtr nextHash;/* next define in defs/refs hash tables */
     short           depth;       /* used for the cycle detection */
     short           dflags;      /* define related flags */
+    xmlRegexpPtr    contModel;	 /* a compiled content model if available */
 };
 
 /**
@@ -229,6 +232,10 @@
     xmlRelaxNGIncludePtr *incTab;     /* array of incs */
 
     int                   idref;      /* requires idref checking */
+
+    /* used to compile content models */
+    xmlAutomataPtr        am;         /* the automata */
+    xmlAutomataStatePtr   state;      /* used to build the automata */
 };
 
 #define FLAGS_IGNORABLE		1
@@ -806,6 +813,8 @@
 	xmlFree(define->ns);
     if (define->value != NULL)
 	xmlFree(define->value);
+    if (define->contModel != NULL)
+        xmlRegFreeRegexp(define->contModel);
     xmlFree(define);
 }
 
@@ -1806,7 +1815,6 @@
 #define VALID_ERR2P(a, b) xmlRelaxNGAddValidError(ctxt, a, b, NULL, 1);
 #define VALID_ERR3P(a, b, c) xmlRelaxNGAddValidError(ctxt, a, b, c, 1);
 
-#ifdef DEBUG
 static const char *
 xmlRelaxNGDefName(xmlRelaxNGDefinePtr def) {
     if (def == NULL)
@@ -1837,7 +1845,6 @@
     }
     return("unknown");
 }
-#endif
 
 /**
  * xmlRelaxNGGetErrorString:
@@ -2658,6 +2665,9 @@
  * 									*
  ************************************************************************/
 
+static int xmlRelaxNGTryCompile(xmlRelaxNGParserCtxtPtr ctxt,
+                                xmlRelaxNGDefinePtr def);
+
 /**
  * xmlRelaxNGIsCompileable:
  * @define:  the definition to check
@@ -2668,25 +2678,48 @@
  */
 static int
 xmlRelaxNGIsCompileable(xmlRelaxNGDefinePtr def) {
+    int ret = -1;
+
     if (def == NULL) {
 	return(-1);
     }
+    if ((def->type != XML_RELAXNG_ELEMENT) &&
+        (def->dflags & IS_COMPILABLE))
+	return(1);
+    if ((def->type != XML_RELAXNG_ELEMENT) &&
+        (def->dflags & IS_NOT_COMPILABLE))
+	return(0);
     switch(def->type) {
         case XML_RELAXNG_REF:
         case XML_RELAXNG_EXTERNALREF:
         case XML_RELAXNG_PARENTREF:
+	    if (def->depth == -20) {
+	        return(1);
+	    } else {
+	        def->depth = -20;
+		ret = xmlRelaxNGIsCompileable(def->content);
+	    }
+	    break;
         case XML_RELAXNG_NOOP:
         case XML_RELAXNG_START:
-	    return(xmlRelaxNGIsCompileable(def->content));
+	    ret = xmlRelaxNGIsCompileable(def->content);
+	    break;
         case XML_RELAXNG_TEXT:
-        case XML_RELAXNG_DATATYPE:
-        case XML_RELAXNG_LIST:
-        case XML_RELAXNG_PARAM:
-        case XML_RELAXNG_VALUE:
-
         case XML_RELAXNG_EMPTY:
+	    ret = 1;
+	    break;
         case XML_RELAXNG_ELEMENT:
-	    return(1);
+	    if (((def->dflags & IS_NOT_COMPILABLE) == 0) &&
+	        ((def->dflags & IS_COMPILABLE) == 0)) {
+		ret = xmlRelaxNGIsCompileable(def->content);
+		if (ret == 0) def->dflags |= IS_NOT_COMPILABLE;
+		if (ret == 1) def->dflags |= IS_COMPILABLE;
+	    }
+	    if ((def->nameClass != NULL) || (def->name == NULL))
+	        return(0);
+	    else
+		return(1);
+	    break;
         case XML_RELAXNG_OPTIONAL:
         case XML_RELAXNG_ZEROORMORE:
         case XML_RELAXNG_ONEORMORE:
@@ -2694,25 +2727,269 @@
         case XML_RELAXNG_GROUP:
         case XML_RELAXNG_DEF: {
 	    xmlRelaxNGDefinePtr list;
-	    int ret;
 
 	    list = def->content;
 	    while (list != NULL) {
 		ret = xmlRelaxNGIsCompileable(list);
 		if (ret != 1)
-		    return(ret);
+		    break;
 		list = list->next;
 	    }
-	    return(1);
+	    break;
         }
         case XML_RELAXNG_EXCEPT:
         case XML_RELAXNG_ATTRIBUTE:
         case XML_RELAXNG_INTERLEAVE:
-	    return(0);
+        case XML_RELAXNG_DATATYPE:
+        case XML_RELAXNG_LIST:
+        case XML_RELAXNG_PARAM:
+        case XML_RELAXNG_VALUE:
+	    ret = 0;
+	    break;
         case XML_RELAXNG_NOT_ALLOWED:
-	    return(-1);
+	    ret = -1;
+	    break;
     }
-    return(-1);
+    if (ret == 0) def->dflags |= IS_NOT_COMPILABLE;
+    if (ret == 1) def->dflags |= IS_COMPILABLE;
+    return(ret);
+}
+
+/**
+ * xmlRelaxNGCompile:
+ * ctxt:  the RelaxNG parser context
+ * @define:  the definition tree to compile
+ *
+ * Compile the set of definitions, it works recursively, till the
+ * element boundaries, where it tries to compile the content if possible
+ *
+ * Returns 0 if success and -1 in case of error
+ */
+static int
+xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def) {
+    int ret = 0;
+    xmlRelaxNGDefinePtr list;
+
+    if ((ctxt == NULL) || (def == NULL)) return(-1);
+
+    switch(def->type) {
+        case XML_RELAXNG_START:
+            if ((xmlRelaxNGIsCompileable(def) == 1) && (def->depth != -25)) {
+		xmlAutomataPtr oldam = ctxt->am;
+		xmlAutomataStatePtr oldstate = ctxt->state;
+
+                def->depth = -25;
+
+		list = def->content;
+		ctxt->am = xmlNewAutomata();
+		if (ctxt->am == NULL)
+		    return(-1);
+		ctxt->state = xmlAutomataGetInitState(ctxt->am);
+		while (list != NULL) {
+		    xmlRelaxNGCompile(ctxt, list);
+		    list = list->next;
+		}
+		xmlAutomataSetFinalState(ctxt->am, ctxt->state);
+		def->contModel = xmlAutomataCompile(ctxt->am);
+		xmlRegexpIsDeterminist(def->contModel);
+
+		xmlFreeAutomata(ctxt->am);
+		ctxt->state = oldstate;
+		ctxt->am = oldam;
+	    }
+	    break;
+        case XML_RELAXNG_ELEMENT:
+	    if ((ctxt->am != NULL) && (def->name != NULL)) {
+		ctxt->state = xmlAutomataNewTransition2(ctxt->am,
+		   ctxt->state, NULL, def->name, def->ns, NULL);
+	    }
+            if ((def->dflags & IS_COMPILABLE) && (def->depth != -25)) {
+		xmlAutomataPtr oldam = ctxt->am;
+		xmlAutomataStatePtr oldstate = ctxt->state;
+
+                def->depth = -25;
+
+		list = def->content;
+		ctxt->am = xmlNewAutomata();
+		if (ctxt->am == NULL)
+		    return(-1);
+		ctxt->state = xmlAutomataGetInitState(ctxt->am);
+		while (list != NULL) {
+		    xmlRelaxNGCompile(ctxt, list);
+		    list = list->next;
+		}
+		xmlAutomataSetFinalState(ctxt->am, ctxt->state);
+		def->contModel = xmlAutomataCompile(ctxt->am);
+		xmlRegexpIsDeterminist(def->contModel);
+
+		xmlFreeAutomata(ctxt->am);
+		ctxt->state = oldstate;
+		ctxt->am = oldam;
+	    } else {
+		xmlAutomataPtr oldam = ctxt->am;
+
+	        /*
+		 * we can't build the content model for this element content
+		 * but it still might be possible to build it for some of its
+		 * children, recurse.
+		 */
+	        ret = xmlRelaxNGTryCompile(ctxt, def);
+		ctxt->am = oldam;
+	    }
+	    break;
+        case XML_RELAXNG_REF:
+        case XML_RELAXNG_EXTERNALREF:
+        case XML_RELAXNG_PARENTREF:
+        case XML_RELAXNG_NOOP:
+	    ret = xmlRelaxNGCompile(ctxt, def->content);
+	    break;
+        case XML_RELAXNG_OPTIONAL: {
+	    xmlAutomataStatePtr oldstate = ctxt->state;
+	    
+	    xmlRelaxNGCompile(ctxt, def->content);
+	    xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state);
+	    break;
+	}
+        case XML_RELAXNG_ZEROORMORE: {
+	    xmlAutomataStatePtr oldstate;
+
+	    ctxt->state = xmlAutomataNewEpsilon(ctxt->am, ctxt->state, NULL);
+	    oldstate = ctxt->state;
+	    xmlRelaxNGCompile(ctxt, def->content);
+	    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate);
+	    ctxt->state = xmlAutomataNewEpsilon(ctxt->am, oldstate, NULL);
+	    break;
+	}
+        case XML_RELAXNG_ONEORMORE: {
+	    xmlAutomataStatePtr oldstate;
+
+	    xmlRelaxNGCompile(ctxt, def->content);
+	    oldstate = ctxt->state;
+	    xmlRelaxNGCompile(ctxt, def->content);
+	    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate);
+	    ctxt->state = xmlAutomataNewEpsilon(ctxt->am, oldstate, NULL);
+	    break;
+	}
+        case XML_RELAXNG_CHOICE: {
+	    xmlAutomataStatePtr target = NULL;
+	    xmlAutomataStatePtr oldstate = ctxt->state;
+	    
+	    list = def->content;
+	    while (list != NULL) {
+	        ctxt->state = oldstate;
+		xmlRelaxNGCompile(ctxt, list);
+		if (target == NULL)
+		    target = ctxt->state;
+		else {
+		    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, target);
+		}
+		list = list->next;
+	    }
+
+	    break;
+	}
+        case XML_RELAXNG_GROUP:
+        case XML_RELAXNG_DEF:
+	    list = def->content;
+	    while (list != NULL) {
+		xmlRelaxNGCompile(ctxt, list);
+		list = list->next;
+	    }
+	    break;
+        case XML_RELAXNG_TEXT: {
+	    xmlAutomataStatePtr oldstate;
+
+	    ctxt->state = xmlAutomataNewEpsilon(ctxt->am, ctxt->state, NULL);
+	    oldstate = ctxt->state;
+	    xmlRelaxNGCompile(ctxt, def->content);
+	    xmlAutomataNewTransition(ctxt->am, ctxt->state, ctxt->state,
+	                             BAD_CAST "#text", NULL);
+	    ctxt->state = xmlAutomataNewEpsilon(ctxt->am, oldstate, NULL);
+	    break;
+	}
+        case XML_RELAXNG_EMPTY:
+	    break;
+        case XML_RELAXNG_EXCEPT:
+        case XML_RELAXNG_ATTRIBUTE:
+        case XML_RELAXNG_INTERLEAVE:
+        case XML_RELAXNG_NOT_ALLOWED:
+        case XML_RELAXNG_DATATYPE:
+        case XML_RELAXNG_LIST:
+        case XML_RELAXNG_PARAM:
+        case XML_RELAXNG_VALUE:
+	    TODO /* This should not happen and generate an internal error */
+	    printf("trying to compile %s\n", xmlRelaxNGDefName(def));
+
+	    break;
+    }
+    return(ret);
+}
+
+/**
+ * xmlRelaxNGTryCompile:
+ * ctxt:  the RelaxNG parser context
+ * @define:  the definition tree to compile
+ *
+ * Try to compile the set of definitions, it works recursively,
+ * possibly ignoring parts which cannot be compiled.
+ *
+ * Returns 0 if success and -1 in case of error
+ */
+static int
+xmlRelaxNGTryCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def) {
+    int ret = 0;
+    xmlRelaxNGDefinePtr list;
+
+    if ((ctxt == NULL) || (def == NULL)) return(-1);
+
+    if ((def->type == XML_RELAXNG_START) ||
+        (def->type == XML_RELAXNG_ELEMENT)) {
+	ret = xmlRelaxNGIsCompileable(def);
+	if ((def->dflags & IS_COMPILABLE) && (def->depth != -25)) {
+	    ctxt->am = NULL;
+	    ret = xmlRelaxNGCompile(ctxt, def);
+	    return(ret);
+	}
+    }
+    switch(def->type) {
+        case XML_RELAXNG_REF:
+        case XML_RELAXNG_EXTERNALREF:
+        case XML_RELAXNG_PARENTREF:
+        case XML_RELAXNG_NOOP:
+        case XML_RELAXNG_START:
+	    ret = xmlRelaxNGTryCompile(ctxt, def->content);
+	    break;
+        case XML_RELAXNG_TEXT:
+        case XML_RELAXNG_DATATYPE:
+        case XML_RELAXNG_LIST:
+        case XML_RELAXNG_PARAM:
+        case XML_RELAXNG_VALUE:
+        case XML_RELAXNG_EMPTY:
+        case XML_RELAXNG_ELEMENT:
+	    ret = 0;
+	    break;
+        case XML_RELAXNG_OPTIONAL:
+        case XML_RELAXNG_ZEROORMORE:
+        case XML_RELAXNG_ONEORMORE:
+        case XML_RELAXNG_CHOICE:
+        case XML_RELAXNG_GROUP:
+        case XML_RELAXNG_DEF:
+	    list = def->content;
+	    while (list != NULL) {
+		ret = xmlRelaxNGTryCompile(ctxt, list);
+		if (ret != 0)
+		    break;
+		list = list->next;
+	    }
+	    break;
+        case XML_RELAXNG_EXCEPT:
+        case XML_RELAXNG_ATTRIBUTE:
+        case XML_RELAXNG_INTERLEAVE:
+        case XML_RELAXNG_NOT_ALLOWED:
+	    ret = 0;
+	    break;
+    }
+    return(ret);
 }
 
 /************************************************************************
@@ -6788,6 +7065,12 @@
     }
 
     /*
+     * try to compile (parts of) the schemas
+     */
+    if (ctxt->grammar != NULL)
+	xmlRelaxNGTryCompile(ctxt, ctxt->grammar->start);
+
+    /*
      * Transfer the pointer for cleanup at the schema level.
      */
     ret->doc = doc;