added a streaming pattern detector for a subset of XPath, should help
* pattern.c include/libxml/pattern.h xmllint.c: added a
streaming pattern detector for a subset of XPath, should
help Kasimier for identity constraints
* python/generator.py: applied Stephane Bidoul patch to find
paths without breaking.
Daniel
diff --git a/pattern.c b/pattern.c
index 74985a8..47b4f22 100644
--- a/pattern.c
+++ b/pattern.c
@@ -25,9 +25,42 @@
#ifdef LIBXML_PATTERN_ENABLED
+#define DEBUG_STREAMING
+
#define ERROR(a, b, c, d)
#define ERROR5(a, b, c, d, e)
+#define XML_STREAM_STEP_DESC 1
+#define XML_STREAM_STEP_FINAL 2
+#define XML_STREAM_STEP_ROOT 4
+
+typedef struct _xmlStreamStep xmlStreamStep;
+typedef xmlStreamStep *xmlStreamStepPtr;
+struct _xmlStreamStep {
+ int flags; /* properties of that step */
+ const xmlChar *name; /* first string value if NULL accept all */
+ const xmlChar *ns; /* second string value */
+};
+
+typedef struct _xmlStreamComp xmlStreamComp;
+typedef xmlStreamComp *xmlStreamCompPtr;
+struct _xmlStreamComp {
+ xmlDict *dict; /* the dictionnary if any */
+ int nbStep; /* number of steps in the automata */
+ int maxStep; /* allocated number of steps */
+ xmlStreamStepPtr steps; /* the array of steps */
+};
+
+struct _xmlStreamCtxt {
+ xmlStreamCompPtr comp; /* the compiled stream */
+ int nbState; /* number of state in the automata */
+ int maxState; /* allocated number of state */
+ int level; /* how deep are we ? */
+ int *states; /* the array of step indexes */
+};
+
+static void xmlFreeStreamComp(xmlStreamCompPtr comp);
+
/*
* Types are private:
*/
@@ -55,12 +88,14 @@
struct _xmlPattern {
void *data; /* the associated template */
- struct _xmlPattern *next; /* siblings */
+ xmlDictPtr dict; /* the optional dictionnary */
+ struct _xmlPattern *next; /* siblings */
const xmlChar *pattern; /* the pattern */
int nbStep;
int maxStep;
xmlStepOpPtr steps; /* ops for computation */
+ xmlStreamCompPtr stream; /* the streaming data if any */
};
typedef struct _xmlPatParserContext xmlPatParserContext;
@@ -124,18 +159,25 @@
if (comp == NULL)
return;
+ if (comp->stream != NULL)
+ xmlFreeStreamComp(comp->stream);
if (comp->pattern != NULL)
xmlFree((xmlChar *)comp->pattern);
if (comp->steps != NULL) {
- for (i = 0;i < comp->nbStep;i++) {
- op = &comp->steps[i];
- if (op->value != NULL)
- xmlFree((xmlChar *) op->value);
- if (op->value2 != NULL)
- xmlFree((xmlChar *) op->value2);
+ if (comp->dict == NULL) {
+ for (i = 0;i < comp->nbStep;i++) {
+ op = &comp->steps[i];
+ if (op->value != NULL)
+ xmlFree((xmlChar *) op->value);
+ if (op->value2 != NULL)
+ xmlFree((xmlChar *) op->value2);
+ }
}
xmlFree(comp->steps);
}
+ if (comp->dict != NULL)
+ xmlDictFree(comp->dict);
+
memset(comp, -1, sizeof(xmlPattern));
xmlFree(comp);
}
@@ -161,7 +203,8 @@
* xmlNewPatParserContext:
* @pattern: the pattern context
* @dict: the inherited dictionnary or NULL
- * @namespaces: the prefix definitions, array of [URI, prefix] or NULL
+ * @namespaces: the prefix definitions, array of [URI, prefix] terminated
+ * with [NULL, NULL] or NULL if no namespace is used
*
* Create a new XML pattern parser context
*
@@ -347,6 +390,9 @@
case XML_OP_END:
return(1);
case XML_OP_ROOT:
+ if (node->type == XML_NAMESPACE_DECL)
+ return(0);
+ node = node->parent;
if ((node->type == XML_DOCUMENT_NODE) ||
#ifdef LIBXML_DOCB_ENABLED
(node->type == XML_DOCB_DOCUMENT_NODE) ||
@@ -883,6 +929,10 @@
if (CUR == '@') {
TODO
} else {
+ if (CUR == '/') {
+ PUSH(XML_OP_ROOT, NULL, NULL);
+ NEXT;
+ }
xmlCompileStepPattern(ctxt);
SKIP_BLANKS;
while (CUR == '/') {
@@ -905,6 +955,465 @@
error:
return;
}
+
+/************************************************************************
+ * *
+ * The streaming code *
+ * *
+ ************************************************************************/
+
+#ifdef DEBUG_STREAMING
+static void
+xmlDebugStreamComp(xmlStreamCompPtr stream) {
+ int i;
+
+ if (stream == NULL) {
+ printf("Stream: NULL\n");
+ return;
+ }
+ printf("Stream: %d steps\n", stream->nbStep);
+ for (i = 0;i < stream->nbStep;i++) {
+ if (stream->steps[i].ns != NULL) {
+ printf("{%s}", stream->steps[i].ns);
+ }
+ if (stream->steps[i].name == NULL) {
+ printf("* ");
+ } else {
+ printf("%s ", stream->steps[i].name);
+ }
+ if (stream->steps[i].flags & XML_STREAM_STEP_ROOT)
+ printf("root ");
+ if (stream->steps[i].flags & XML_STREAM_STEP_DESC)
+ printf("// ");
+ if (stream->steps[i].flags & XML_STREAM_STEP_FINAL)
+ printf("final ");
+ printf("\n");
+ }
+}
+static void
+xmlDebugStreamCtxt(xmlStreamCtxtPtr ctxt, int match) {
+ int i;
+
+ if (ctxt == NULL) {
+ printf("Stream: NULL\n");
+ return;
+ }
+ printf("Stream: level %d, %d states: ", ctxt->level, ctxt->nbState);
+ if (match)
+ printf("matches\n");
+ else
+ printf("\n");
+ for (i = 0;i < ctxt->nbState;i++) {
+ if (ctxt->states[2 * i] < 0)
+ printf(" %d: free\n", i);
+ else {
+ printf(" %d: step %d, level %d", i, ctxt->states[2 * i],
+ ctxt->states[(2 * i) + 1]);
+ if (ctxt->comp->steps[ctxt->states[2 * i]].flags &
+ XML_STREAM_STEP_DESC)
+ printf(" //\n");
+ else
+ printf("\n");
+ }
+ printf("\n");
+ }
+}
+#endif
+/**
+ * xmlNewStreamComp:
+ * @size: the number of expected steps
+ *
+ * build a new compiled pattern for streaming
+ *
+ * Returns the new structure or NULL in case of error.
+ */
+static xmlStreamCompPtr
+xmlNewStreamComp(int size) {
+ xmlStreamCompPtr cur;
+
+ if (size < 4)
+ size = 4;
+
+ cur = (xmlStreamCompPtr) xmlMalloc(sizeof(xmlStreamComp));
+ if (cur == NULL) {
+ ERROR(NULL, NULL, NULL,
+ "xmlNewStreamComp: malloc failed\n");
+ return(NULL);
+ }
+ memset(cur, 0, sizeof(xmlStreamComp));
+ cur->steps = (xmlStreamStepPtr) xmlMalloc(size * sizeof(xmlStreamStep));
+ if (cur->steps == NULL) {
+ xmlFree(cur);
+ ERROR(NULL, NULL, NULL,
+ "xmlNewStreamComp: malloc failed\n");
+ return(NULL);
+ }
+ cur->nbStep = 0;
+ cur->maxStep = size;
+ return(cur);
+}
+
+/**
+ * xmlFreeStreamComp:
+ * @comp: the compiled pattern for streaming
+ *
+ * Free the compiled pattern for streaming
+ */
+static void
+xmlFreeStreamComp(xmlStreamCompPtr comp) {
+ if (comp != NULL) {
+ if (comp->steps != NULL)
+ xmlFree(comp->steps);
+ if (comp->dict != NULL)
+ xmlDictFree(comp->dict);
+ xmlFree(comp);
+ }
+}
+
+/**
+ * xmlStreamCompAddStep:
+ * @comp: the compiled pattern for streaming
+ * @name: the first string, the name, or NULL for *
+ * @ns: the second step, the namespace name
+ * @flags: the flags for that step
+ *
+ * Add a new step to the compiled pattern
+ *
+ * Returns -1 in case of error or the step index if successful
+ */
+static int
+xmlStreamCompAddStep(xmlStreamCompPtr comp, const xmlChar *name,
+ const xmlChar *ns, int flags) {
+ xmlStreamStepPtr cur;
+
+ if (comp->nbStep >= comp->maxStep) {
+ cur = (xmlStreamStepPtr) xmlRealloc(comp->steps,
+ comp->maxStep * 2 * sizeof(xmlStreamStep));
+ if (cur == NULL) {
+ ERROR(NULL, NULL, NULL,
+ "xmlNewStreamComp: malloc failed\n");
+ return(-1);
+ }
+ comp->steps = cur;
+ comp->maxStep *= 2;
+ }
+ cur = &comp->steps[comp->nbStep++];
+ cur->flags = flags;
+ cur->name = name;
+ cur->ns = ns;
+ return(comp->nbStep - 1);
+}
+
+/**
+ * xmlStreamCompile:
+ * @comp: the precompiled pattern
+ *
+ * Tries to stream compile a pattern
+ *
+ * Returns -1 in case of failure and 0 in case of success.
+ */
+static int
+xmlStreamCompile(xmlPatternPtr comp) {
+ xmlStreamCompPtr stream;
+ int i, s = 0, root = 0, desc = 0;
+
+ if ((comp == NULL) || (comp->steps == NULL))
+ return(-1);
+ stream = xmlNewStreamComp((comp->nbStep / 2) + 1);
+ if (stream == NULL)
+ return(-1);
+ if (comp->dict != NULL) {
+ stream->dict = comp->dict;
+ xmlDictReference(stream->dict);
+ }
+ for (i = 0;i < comp->nbStep;i++) {
+ switch (comp->steps[i].op) {
+ case XML_OP_END:
+ break;
+ case XML_OP_ROOT:
+ if (i != 0)
+ goto error;
+ root = 1;
+ break;
+ case XML_OP_CHILD:
+ case XML_OP_ATTR:
+ case XML_OP_NS:
+ goto error;
+ case XML_OP_ELEM:
+ s = xmlStreamCompAddStep(stream, comp->steps[i].value,
+ comp->steps[i].value2, desc);
+ desc = 0;
+ if (s < 0)
+ goto error;
+ break;
+ case XML_OP_ALL:
+ s = xmlStreamCompAddStep(stream, NULL, NULL, desc);
+ desc = 0;
+ if (s < 0)
+ goto error;
+ break;
+ case XML_OP_PARENT:
+ break;
+ case XML_OP_ANCESTOR:
+ desc = XML_STREAM_STEP_DESC;
+ break;
+ }
+ }
+ stream->steps[s].flags |= XML_STREAM_STEP_FINAL;
+ if (root)
+ stream->steps[0].flags |= XML_STREAM_STEP_ROOT;
+#ifdef DEBUG_STREAMING
+ xmlDebugStreamComp(stream);
+#endif
+ comp->stream = stream;
+ return(0);
+error:
+ xmlFreeStreamComp(stream);
+ return(0);
+}
+
+/**
+ * xmlNewStreamCtxt:
+ * @size: the number of expected states
+ *
+ * build a new stream context
+ *
+ * Returns the new structure or NULL in case of error.
+ */
+static xmlStreamCtxtPtr
+xmlNewStreamCtxt(xmlStreamCompPtr stream) {
+ xmlStreamCtxtPtr cur;
+
+ cur = (xmlStreamCtxtPtr) xmlMalloc(sizeof(xmlStreamCtxt));
+ if (cur == NULL) {
+ ERROR(NULL, NULL, NULL,
+ "xmlNewStreamCtxt: malloc failed\n");
+ return(NULL);
+ }
+ memset(cur, 0, sizeof(xmlStreamCtxt));
+ cur->states = (int *) xmlMalloc(4 * 2 * sizeof(int));
+ if (cur->states == NULL) {
+ xmlFree(cur);
+ ERROR(NULL, NULL, NULL,
+ "xmlNewStreamCtxt: malloc failed\n");
+ return(NULL);
+ }
+ cur->nbState = 0;
+ cur->maxState = 4;
+ cur->level = 0;
+ cur->comp = stream;
+ return(cur);
+}
+
+/**
+ * xmlFreeStreamCtxt:
+ * @stream: the stream context
+ *
+ * Free the stream context
+ */
+void
+xmlFreeStreamCtxt(xmlStreamCtxtPtr stream) {
+ if (stream != NULL) {
+ if (stream->states != NULL)
+ xmlFree(stream->states);
+ xmlFree(stream);
+ }
+}
+
+/**
+ * xmlStreamCtxtAddState:
+ * @comp: the stream context
+ * @idx: the step index for that streaming state
+ *
+ * Add a new state to the stream context
+ *
+ * Returns -1 in case of error or the state index if successful
+ */
+static int
+xmlStreamCtxtAddState(xmlStreamCtxtPtr comp, int idx, int level) {
+ int i;
+ for (i = 0;i < comp->nbState;i++) {
+ if (comp->states[2 * i] < 0) {
+ comp->states[2 * i] = idx;
+ comp->states[2 * i + 1] = level;
+ return(i);
+ }
+ }
+ if (comp->nbState >= comp->maxState) {
+ int *cur;
+
+ cur = (int *) xmlRealloc(comp->states,
+ comp->maxState * 4 * sizeof(int));
+ if (cur == NULL) {
+ ERROR(NULL, NULL, NULL,
+ "xmlNewStreamCtxt: malloc failed\n");
+ return(-1);
+ }
+ comp->states = cur;
+ comp->maxState *= 2;
+ }
+ comp->states[2 * comp->nbState] = idx;
+ comp->states[2 * comp->nbState++ + 1] = level;
+ return(comp->nbState - 1);
+}
+
+/**
+ * xmlStreamPush:
+ * @stream: the stream context
+ * @name: the current name
+ * @ns: the namespace name
+ *
+ * push new data onto the stream. NOTE: if the call xmlPatterncompile()
+ * indicated a dictionnary, then strings for name and ns will be expected
+ * to come from the dictionary.
+ * Both @name and @ns being NULL means the / i.e. the root of the document.
+ * This can also act as a reset.
+ *
+ * Returns: -1 in case of error, 1 if the current state in the stream is a
+ * match and 0 otherwise.
+ */
+int
+xmlStreamPush(xmlStreamCtxtPtr stream,
+ const xmlChar *name, const xmlChar *ns) {
+ int ret = 0, tmp, i, m, match, step;
+ xmlStreamCompPtr comp;
+
+ if ((stream == NULL) || (stream->nbState < 0))
+ return(-1);
+ comp = stream->comp;
+ if ((name == NULL) && (ns == NULL)) {
+ stream->nbState = 0;
+ stream->level = 0;
+ if (comp->steps[0].flags & XML_STREAM_STEP_ROOT) {
+ tmp = xmlStreamCtxtAddState(stream, 0, 0);
+ if (tmp < 0)
+ return(-1);
+ if (comp->steps[tmp].flags & XML_STREAM_STEP_FINAL)
+ return(1);
+ }
+ return(0);
+ }
+ /*
+ * Check evolution of existing states
+ */
+ m = stream->nbState;
+ for (i = 0;i < m;i++) {
+ match = 0;
+ step = stream->states[2 * i];
+ /* dead states */
+ if (step < 0) continue;
+ /* skip new states just added */
+ if (stream->states[(2 * i) + 1] > stream->level) continue;
+
+ /* discard old states */
+ /* something needed about old level discarded */
+
+ if (comp->dict) {
+ if (comp->steps[step].name == NULL) {
+ if (comp->steps[step].ns == NULL)
+ match = 1;
+ else
+ match = (comp->steps[step].ns == ns);
+ } else {
+ match = ((comp->steps[step].name == name) &&
+ (comp->steps[step].ns == ns));
+ }
+ } else {
+ if (comp->steps[step].name == NULL) {
+ if (comp->steps[step].ns == NULL)
+ match = 1;
+ else
+ match = xmlStrEqual(comp->steps[step].ns, ns);
+ } else {
+ match = ((xmlStrEqual(comp->steps[step].name, name)) &&
+ (xmlStrEqual(comp->steps[step].ns, ns)));
+ }
+ }
+ if (match) {
+ if (comp->steps[step].flags & XML_STREAM_STEP_DESC) {
+ if (comp->steps[step].flags & XML_STREAM_STEP_FINAL) {
+ ret = 1;
+ } else {
+ /* descending match create a new state */
+ xmlStreamCtxtAddState(stream, step + 1, stream->level + 1);
+ }
+ } else {
+ if (comp->steps[step].flags & XML_STREAM_STEP_FINAL) {
+ ret = 1;
+ stream->states[2 * i] = -1;
+ } else {
+ stream->states[2 * i] = step + 1;
+ stream->states[2 * i + 1] = stream->level + 1;
+ }
+ }
+ } else if (!(comp->steps[step].flags & XML_STREAM_STEP_DESC)) {
+ /* didn't match, discard */
+ printf("discard %d\n", i);
+ stream->states[2 * i] = -1;
+ }
+ }
+
+ /*
+ * Check creating a new state.
+ */
+ stream->level++;
+ if (!(comp->steps[0].flags & XML_STREAM_STEP_ROOT)) {
+ match = 0;
+ if (comp->dict) {
+ if (comp->steps[0].name == NULL) {
+ if (comp->steps[0].ns == NULL)
+ match = 1;
+ else
+ match = (comp->steps[0].ns == ns);
+ } else {
+ match = ((comp->steps[0].name == name) &&
+ (comp->steps[0].ns == ns));
+ }
+ } else {
+ if (comp->steps[0].name == NULL) {
+ if (comp->steps[0].ns == NULL)
+ match = 1;
+ else
+ match = xmlStrEqual(comp->steps[0].ns, ns);
+ } else {
+ match = ((xmlStrEqual(comp->steps[0].name, name)) &&
+ (xmlStrEqual(comp->steps[0].ns, ns)));
+ }
+ }
+ if (match) {
+ if (comp->steps[0].flags & XML_STREAM_STEP_FINAL)
+ ret = 1;
+ else
+ xmlStreamCtxtAddState(stream, 1, stream->level);
+ }
+ }
+
+#ifdef DEBUG_STREAMING
+ xmlDebugStreamCtxt(stream, ret);
+#endif
+ return(ret);
+}
+
+/**
+ * xmlStreamPop:
+ * @stream: the stream context
+ *
+ * push one level from the stream.
+ *
+ * Returns: -1 in case of error, 0 otherwise.
+ */
+int
+xmlStreamPop(xmlStreamCtxtPtr stream) {
+ if (stream == NULL)
+ return(-1);
+ stream->level--;
+ if (stream->level < 0)
+ return(-1);
+
+ /* something is certainly needed here */
+ return(0);
+}
+
/************************************************************************
* *
* The public interfaces *
@@ -938,6 +1447,7 @@
xmlCompilePathPattern(ctxt);
xmlFreePatParserContext(ctxt);
+ xmlStreamCompile(ret);
if (xmlReversePattern(ret) < 0)
goto error;
return(ret);
@@ -964,4 +1474,21 @@
return(xmlPatMatch(comp, node));
}
+/**
+ * xmlPatternGetStreamCtxt:
+ * @comp: the precompiled pattern
+ *
+ * Get a streaming context for that pattern
+ * Use xmlFreeStreamCtxt to free the context.
+ *
+ * Returns a pointer to the context or NULL in case of failure
+ */
+xmlStreamCtxtPtr
+xmlPatternGetStreamCtxt(xmlPatternPtr comp)
+{
+ if ((comp == NULL) || (comp->stream == NULL))
+ return(NULL);
+ return(xmlNewStreamCtxt(comp->stream));
+}
+
#endif /* LIBXML_PATTERN_ENABLED */