found a nasty bug in regexp automata build, reported by Ashwin and Bjorn
* xmlregexp.c: found a nasty bug in regexp automata build,
reported by Ashwin and Bjorn Reese
Daniel
svn path=/trunk/; revision=3705
diff --git a/xmlregexp.c b/xmlregexp.c
index 52e484c..389453b 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -1532,6 +1532,8 @@
static int
xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
xmlRegStatePtr to, xmlRegAtomPtr atom) {
+ xmlRegStatePtr end;
+
if (atom == NULL) {
ERROR("genrate transition: atom == NULL");
return(-1);
@@ -1689,12 +1691,31 @@
else {
return(-1);
}
+ }
+ end = to;
+ if ((atom->quant == XML_REGEXP_QUANT_MULT) ||
+ (atom->quant == XML_REGEXP_QUANT_PLUS)) {
+ /*
+ * Do not pollute the target state by adding transitions from
+ * it as it is likely to be the shared target of multiple branches.
+ * So isolate with an epsilon transition.
+ */
+ xmlRegStatePtr tmp;
+
+ tmp = xmlRegNewState(ctxt);
+ if (tmp != NULL)
+ xmlRegStatePush(ctxt, tmp);
+ else {
+ return(-1);
+ }
+ xmlFAGenerateEpsilonTransition(ctxt, tmp, to);
+ to = tmp;
}
if (xmlRegAtomPush(ctxt, atom) < 0) {
return(-1);
}
xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1);
- ctxt->state = to;
+ ctxt->state = end;
switch (atom->quant) {
case XML_REGEXP_QUANT_OPT:
atom->quant = XML_REGEXP_QUANT_ONCE;