extended xmlRegExecErrInfo() and xmlRegExecNextValues() to return error

* include/libxml/xmlregexp.h xmlregexp.c: extended xmlRegExecErrInfo()
  and xmlRegExecNextValues() to return error transition strings too,
  and sink state detection and handling.
Daniel
diff --git a/xmlregexp.c b/xmlregexp.c
index 099f68c..2b1919e 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -139,7 +139,8 @@
 typedef enum {
     XML_REGEXP_START_STATE = 1,
     XML_REGEXP_FINAL_STATE,
-    XML_REGEXP_TRANS_STATE
+    XML_REGEXP_TRANS_STATE,
+    XML_REGEXP_SINK_STATE
 } xmlRegStateType;
 
 typedef enum {
@@ -207,7 +208,6 @@
     xmlRegMarkedType mark;
     xmlRegMarkedType reached;
     int no;
-
     int maxTrans;
     int nbTrans;
     xmlRegTrans *trans;
@@ -1596,11 +1596,16 @@
     /*
      * build the completed transitions bypassing the epsilons
      * Use a marking algorithm to avoid loops
+     * mark sink states too.
      */
     for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
 	state = ctxt->states[statenr];
 	if (state == NULL)
 	    continue;
+	if ((state->nbTrans == 0) &&
+	    (state->type != XML_REGEXP_FINAL_STATE)) {
+	    state->type = XML_REGEXP_SINK_STATE;
+	}
 	for (transnr = 0;transnr < state->nbTrans;transnr++) {
 	    if ((state->trans[transnr].atom == NULL) &&
 		(state->trans[transnr].to >= 0)) {
@@ -1677,6 +1682,7 @@
 		}
 	    }
 	}
+
 	/*
 	 * find the next accessible state not explored
 	 */
@@ -2698,6 +2704,10 @@
 		printf("entering state %d\n", target);
 #endif
 		if (comp->compact[target * (comp->nbstrings + 1)] ==
+		    XML_REGEXP_SINK_STATE)
+		    goto error;
+
+		if (comp->compact[target * (comp->nbstrings + 1)] ==
 		    XML_REGEXP_FINAL_STATE)
 		    return(1);
 		return(0);
@@ -2711,6 +2721,7 @@
 #ifdef DEBUG_PUSH
     printf("failed to find a transition for %s on state %d\n", value, state);
 #endif
+error:
     if (exec->errString != NULL)
         xmlFree(exec->errString);
     exec->errString = xmlStrdup(value);
@@ -2975,6 +2986,20 @@
 #ifdef DEBUG_PUSH
 		printf("entering state %d\n", trans->to);
 #endif
+                if ((exec->comp->states[trans->to] != NULL) &&
+		    (exec->comp->states[trans->to]->type ==
+		     XML_REGEXP_SINK_STATE)) {
+		    /*
+		     * entering a sink state, save the current state as error
+		     * state.
+		     */
+		    if (exec->errString != NULL)
+			xmlFree(exec->errString);
+		    exec->errString = xmlStrdup(value);
+		    exec->errState = exec->state;
+		    memcpy(exec->errCounts, exec->counts,
+			   exec->comp->nbCounters * sizeof(int));
+		}
 		exec->state = exec->comp->states[trans->to];
 		exec->transno = 0;
 		if (trans->atom != NULL) {
@@ -3010,10 +3035,11 @@
 	if ((exec->transno != 0) || (exec->state->nbTrans == 0)) {
 rollback:
             /*
-	     * if we didn't yet rollback on the current input store
-	     * the current state as the error state.
+	     * if we didn't yet rollback on the current input
+	     * store the current state as the error state.
 	     */
-	    if (progress) {
+	    if ((progress) && (exec->state != NULL) &&
+	        (exec->state->type != XML_REGEXP_SINK_STATE)) {
 	        progress = 0;
 		if (exec->errString != NULL)
 		    xmlFree(exec->errString);
@@ -3113,6 +3139,7 @@
  * @exec: a regexp execution context
  * @err: error extraction or normal one
  * @nbval: pointer to the number of accepted values IN/OUT
+ * @nbneg: return number of negative transitions
  * @values: pointer to the array of acceptable values
  * @terminal: return value if this was a terminal state
  *
@@ -3123,14 +3150,18 @@
  */
 static int
 xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
-                    int *nbval, xmlChar **values, int *terminal) {
+                    int *nbval, int *nbneg,
+		    xmlChar **values, int *terminal) {
     int maxval;
+    int nb = 0;
 
-    if ((exec == NULL) || (nbval == NULL) || (values == NULL) || (*nbval <= 0))
+    if ((exec == NULL) || (nbval == NULL) || (nbneg == NULL) || 
+        (values == NULL) || (*nbval <= 0))
         return(-1);
 
     maxval = *nbval;
     *nbval = 0;
+    *nbneg = 0;
     if ((exec->comp != NULL) && (exec->comp->compact != NULL)) {
         xmlRegexpPtr comp;
 	int target, i, state;
@@ -3150,13 +3181,24 @@
 	    else
 		*terminal = 0;
 	}
-	for (i = 0;(i < comp->nbstrings) && (*nbval < maxval);i++) {
+	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
 	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
-	    if ((target > 0) && (target <= comp->nbstates)) {
-	        values[*nbval] = comp->stringMap[i];
+	    if ((target > 0) && (target <= comp->nbstates) &&
+	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] !=
+		 XML_REGEXP_SINK_STATE)) {
+	        values[nb++] = comp->stringMap[i];
 		(*nbval)++;
 	    }
 	}
+	for (i = 0;(i < comp->nbstrings) && (nb < maxval);i++) {
+	    target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
+	    if ((target > 0) && (target <= comp->nbstates) &&
+	        (comp->compact[(target - 1) * (comp->nbstrings + 1)] ==
+		 XML_REGEXP_SINK_STATE)) {
+	        values[nb++] = comp->stringMap[i];
+		(*nbneg)++;
+	    }
+	}
     } else {
         int transno;
 	xmlRegTransPtr trans;
@@ -3178,7 +3220,7 @@
 	    state = exec->state;
 	}
 	for (transno = 0;
-	     (transno < state->nbTrans) && (*nbval < maxval);
+	     (transno < state->nbTrans) && (nb < maxval);
 	     transno++) {
 	    trans = &state->trans[transno];
 	    if (trans->to < 0)
@@ -3187,8 +3229,10 @@
 	    if ((atom == NULL) || (atom->valuep == NULL))
 		continue;
 	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
+	        /* this should not be reached but ... */
 	        TODO;
 	    } else if (trans->count == REGEXP_ALL_COUNTER) {
+	        /* this should not be reached but ... */
 	        TODO;
 	    } else if (trans->counter >= 0) {
 		xmlRegCounterPtr counter;
@@ -3200,12 +3244,40 @@
 		    count = exec->counts[trans->counter];
 		counter = &exec->comp->counters[trans->counter];
 		if (count < counter->max) {
-		    values[*nbval] = (xmlChar *) atom->valuep;
+		    values[nb++] = (xmlChar *) atom->valuep;
 		    (*nbval)++;
 		}
 	    } else {
-		values[*nbval] = (xmlChar *) atom->valuep;
-		(*nbval)++;
+                if ((exec->comp->states[trans->to] != NULL) &&
+		    (exec->comp->states[trans->to]->type !=
+		     XML_REGEXP_SINK_STATE)) {
+		    values[nb++] = (xmlChar *) atom->valuep;
+		    (*nbval)++;
+		}
+	    } 
+	}
+	for (transno = 0;
+	     (transno < state->nbTrans) && (nb < maxval);
+	     transno++) {
+	    trans = &state->trans[transno];
+	    if (trans->to < 0)
+		continue;
+	    atom = trans->atom;
+	    if ((atom == NULL) || (atom->valuep == NULL))
+		continue;
+	    if (trans->count == REGEXP_ALL_LAX_COUNTER) {
+	        continue;
+	    } else if (trans->count == REGEXP_ALL_COUNTER) {
+	        continue;
+	    } else if (trans->counter >= 0) {
+	        continue;
+	    } else {
+                if ((exec->comp->states[trans->to] != NULL) &&
+		    (exec->comp->states[trans->to]->type ==
+		     XML_REGEXP_SINK_STATE)) {
+		    values[nb++] = (xmlChar *) atom->valuep;
+		    (*nbneg)++;
+		}
 	    } 
 	}
     }
@@ -3216,6 +3288,7 @@
  * xmlRegExecNextValues:
  * @exec: a regexp execution context
  * @nbval: pointer to the number of accepted values IN/OUT
+ * @nbneg: return number of negative transitions
  * @values: pointer to the array of acceptable values
  * @terminal: return value if this was a terminal state
  *
@@ -3229,9 +3302,9 @@
  * Returns: 0 in case of success or -1 in case of error.
  */
 int
-xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, xmlChar **values,
-                     int *terminal) {
-    return(xmlRegExecGetValues(exec, 0, nbval, values, terminal));
+xmlRegExecNextValues(xmlRegExecCtxtPtr exec, int *nbval, int *nbneg,
+                     xmlChar **values, int *terminal) {
+    return(xmlRegExecGetValues(exec, 0, nbval, nbneg, values, terminal));
 }
 
 /**
@@ -3239,6 +3312,7 @@
  * @exec: a regexp execution context generating an error
  * @string: return value for the error string
  * @nbval: pointer to the number of accepted values IN/OUT
+ * @nbneg: return number of negative transitions
  * @values: pointer to the array of acceptable values
  * @terminal: return value if this was a terminal state
  *
@@ -3254,7 +3328,7 @@
  */
 int
 xmlRegExecErrInfo(xmlRegExecCtxtPtr exec, const xmlChar **string,
-                  int *nbval, xmlChar **values, int *terminal) {
+                  int *nbval, int *nbneg, xmlChar **values, int *terminal) {
     if (exec == NULL)
         return(-1);
     if (string != NULL) {
@@ -3263,7 +3337,7 @@
 	else
 	    *string = NULL;
     }
-    return(xmlRegExecGetValues(exec, 1, nbval, values, terminal));
+    return(xmlRegExecGetValues(exec, 1, nbval, nbneg, values, terminal));
 }
 
 #ifdef DEBUG_ERR
@@ -3271,8 +3345,9 @@
     const xmlChar *string;
     const xmlChar *values[5];
     int nb = 5;
+    int nbneg;
     int terminal;
-    xmlRegExecErrInfo(exec, &string, &nb, &values[0], &terminal);
+    xmlRegExecErrInfo(exec, &string, &nb, &nbneg, &values[0], &terminal);
 }
 #endif