Geez, this one was painful ! I still need to handle entities references
for the validation step but I have a clean way to add this without touching
the algorithm:
- valid.[ch] tree.h: worked *hard* to get non-determinist content
validation without using an ugly NFA -> DFA algo in the source.
Made a specific algorithm easier to maintain, using a single
stack and without recursion.
- Makefile.am test/VCM/*.xml: added more tests to "make Validtests"
- hash.c: made the growing routine static
- tree.h parser.c: added the parent information to an
xmlElementContent node.
Daniel
diff --git a/ChangeLog b/ChangeLog
index 839965f..9d557c0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+Fri Apr 20 14:52:44 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
+
+ * valid.[ch] tree.h: worked *hard* to get non-determinist content
+ validation without using an ugly NFA -> DFA algo in the source.
+ Made a specific algorithm easier to maintain, using a single
+ stack and without recursion.
+ * Makefile.am test/VCM/*.xml: added more tests to "make Validtests"
+ * hash.c: made the growing routine static
+ * tree.h parser.c: added the parent information to an
+ xmlElementContent node.
+
Wed Apr 18 23:33:11 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* SAX.c parser.c xpath.c: generating IDs when not validating
diff --git a/Makefile.am b/Makefile.am
index e0fb485..85ee7b5 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -296,6 +296,7 @@
fi ; fi ; done ; fi ; done)
XIncludetests : xmllint
+ @(rm -f .memdump ; touch .memdump)
@echo "##"
@echo "## XInclude regression tests"
@echo "##"
@@ -351,6 +352,17 @@
Validtests : xmllint
+ @(rm -f .memdump ; touch .memdump)
+ @echo "##"
+ @echo "## Valid documents regression tests"
+ @echo "##"
+ @(for i in $(srcdir)/test/VCM/* ; do \
+ name=`basename $$i`; \
+ if [ ! -d $$i ] ; then \
+ echo Testing $$name ; \
+ $(top_builddir)/xmllint --valid --noout $$i ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
+ fi ; done ; exit 0)
@echo "##"
@echo "## Validity checking regression tests"
@echo "##"
@@ -363,11 +375,12 @@
else \
echo Testing $$name ; \
$(top_builddir)/xmllint --noout --valid $$i 2> result.$$name ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
diff $(srcdir)/result/VC/$$name result.$$name ; \
rm result.$$name ; \
fi ; fi ; done)
@echo "##"
- @echo "## Valid documents regression tests"
+ @echo "## General documents valid regression tests"
@echo "##"
@(for i in $(srcdir)/test/valid/* ; do \
name=`basename $$i`; \
@@ -378,6 +391,7 @@
else \
echo Testing $$name ; \
$(top_builddir)/xmllint --valid $$i > result.$$name 2>error.$$name ; \
+ grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
diff $(srcdir)/result/valid/$$name result.$$name ; \
diff $(srcdir)/result/valid/$$name.err error.$$name ; \
rm result.$$name error.$$name ; \
diff --git a/hash.c b/hash.c
index ec15745..f8865e8 100644
--- a/hash.c
+++ b/hash.c
@@ -125,7 +125,7 @@
*
* Returns 0 in case of success, -1 in case of failure
*/
-int
+static int
xmlHashGrow(xmlHashTablePtr table, int size) {
unsigned long key;
int oldsize, i;
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index ce86711..08321a2 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -169,6 +169,7 @@
const xmlChar *name; /* Element name */
struct _xmlElementContent *c1; /* first child */
struct _xmlElementContent *c2; /* second child */
+ struct _xmlElementContent *parent; /* parent */
};
typedef enum {
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index 2df3c58..c048ba0 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -17,6 +17,12 @@
extern "C" {
#endif
+/*
+ * Validation state added for non-determinist content model
+ */
+typedef struct _xmlValidState xmlValidState;
+typedef xmlValidState *xmlValidStatePtr;
+
/**
* an xmlValidCtxt is used for error reporting when validating
*/
@@ -40,6 +46,12 @@
int finishDtd; /* finished validating the Dtd ? */
xmlDocPtr doc; /* the document */
int valid; /* temporary validity check result */
+
+ /* state state used for non-determinist content validation */
+ xmlValidState *vstate; /* current state */
+ int vstateNr; /* Depth of the validation stack */
+ int vstateMax; /* Max depth of the validation stack */
+ xmlValidState *vstateTab; /* array of validation states */
};
/*
diff --git a/parser.c b/parser.c
index a784f71..da921b8 100644
--- a/parser.c
+++ b/parser.c
@@ -3926,12 +3926,18 @@
ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
if (ret == NULL) return(NULL);
ret->c1 = cur;
+ if (cur != NULL)
+ cur->parent = ret;
cur = ret;
} else {
n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
if (n == NULL) return(NULL);
n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
+ if (n->c1 != NULL)
+ n->c1->parent = n;
cur->c2 = n;
+ if (n != NULL)
+ n->parent = cur;
cur = n;
xmlFree(elem);
}
@@ -3954,6 +3960,8 @@
if (elem != NULL) {
cur->c2 = xmlNewElementContent(elem,
XML_ELEMENT_CONTENT_ELEMENT);
+ if (cur->c2 != NULL)
+ cur->c2->parent = cur;
xmlFree(elem);
}
ret->ocur = XML_ELEMENT_CONTENT_MULT;
@@ -4098,10 +4106,16 @@
}
if (last == NULL) {
op->c1 = ret;
+ if (ret != NULL)
+ ret->parent = op;
ret = cur = op;
} else {
cur->c2 = op;
+ if (op != NULL)
+ op->parent = cur;
op->c1 = last;
+ if (last != NULL)
+ last->parent = op;
cur =op;
last = NULL;
}
@@ -4143,10 +4157,16 @@
}
if (last == NULL) {
op->c1 = ret;
+ if (ret != NULL)
+ ret->parent = op;
ret = cur = op;
} else {
cur->c2 = op;
+ if (op != NULL)
+ op->parent = cur;
op->c1 = last;
+ if (last != NULL)
+ last->parent = op;
cur =op;
last = NULL;
}
@@ -4213,6 +4233,8 @@
}
if ((cur != NULL) && (last != NULL)) {
cur->c2 = last;
+ if (last != NULL)
+ last->parent = cur;
}
ctxt->entity = ctxt->input;
NEXT;
@@ -8883,7 +8905,6 @@
xmlParserCtxtPtr ctxt;
xmlDocPtr newDoc;
xmlSAXHandlerPtr oldsax = NULL;
- int oldexternal = ctxt->external;
int ret = 0;
if (depth > 40) {
diff --git a/test/VCM/21.xml b/test/VCM/21.xml
new file mode 100644
index 0000000..78c8713
--- /dev/null
+++ b/test/VCM/21.xml
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a, (b | c)*, d*)? >
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+<!ELEMENT c EMPTY>
+<!ELEMENT d EMPTY>
+]>
+<doc><a/><d/></doc>
diff --git a/test/VCM/v1.xml b/test/VCM/v1.xml
new file mode 100644
index 0000000..8a7f679
--- /dev/null
+++ b/test/VCM/v1.xml
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc EMPTY>
+]>
+<doc/>
diff --git a/test/VCM/v10.xml b/test/VCM/v10.xml
new file mode 100644
index 0000000..f293a42
--- /dev/null
+++ b/test/VCM/v10.xml
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)* >
+<!ELEMENT a EMPTY>
+]>
+<doc></doc>
diff --git a/test/VCM/v11.xml b/test/VCM/v11.xml
new file mode 100644
index 0000000..c133523
--- /dev/null
+++ b/test/VCM/v11.xml
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)* >
+<!ELEMENT a EMPTY>
+]>
+<doc><a/></doc>
diff --git a/test/VCM/v12.xml b/test/VCM/v12.xml
new file mode 100644
index 0000000..3eed3d6
--- /dev/null
+++ b/test/VCM/v12.xml
@@ -0,0 +1,9 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)* >
+<!ELEMENT a EMPTY>
+]>
+<doc>
+<a/>
+<a/>
+<a/>
+</doc>
diff --git a/test/VCM/v13.xml b/test/VCM/v13.xml
new file mode 100644
index 0000000..75f4352
--- /dev/null
+++ b/test/VCM/v13.xml
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)+ >
+<!ELEMENT a EMPTY>
+]>
+<doc>
+<a/>
+</doc>
diff --git a/test/VCM/v14.xml b/test/VCM/v14.xml
new file mode 100644
index 0000000..fa70f9f
--- /dev/null
+++ b/test/VCM/v14.xml
@@ -0,0 +1,9 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)+ >
+<!ELEMENT a EMPTY>
+]>
+<doc>
+<a/>
+<a/>
+<a/>
+</doc>
diff --git a/test/VCM/v15.xml b/test/VCM/v15.xml
new file mode 100644
index 0000000..878e3e3
--- /dev/null
+++ b/test/VCM/v15.xml
@@ -0,0 +1,9 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ((a | b | c)*) >
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+<!ELEMENT c EMPTY>
+]>
+<doc>
+<b/>
+</doc>
diff --git a/test/VCM/v16.xml b/test/VCM/v16.xml
new file mode 100644
index 0000000..e676347
--- /dev/null
+++ b/test/VCM/v16.xml
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a | b)>
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+]>
+<doc>
+<b/>
+</doc>
diff --git a/test/VCM/v17.xml b/test/VCM/v17.xml
new file mode 100644
index 0000000..bc9c8c2
--- /dev/null
+++ b/test/VCM/v17.xml
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a? | b?) >
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+<!ELEMENT c EMPTY>
+]>
+<doc></doc>
diff --git a/test/VCM/v18.xml b/test/VCM/v18.xml
new file mode 100644
index 0000000..fccc440
--- /dev/null
+++ b/test/VCM/v18.xml
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a? | b?) >
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+<!ELEMENT c EMPTY>
+]>
+<doc><b/></doc>
diff --git a/test/VCM/v19.xml b/test/VCM/v19.xml
new file mode 100644
index 0000000..17aacff
--- /dev/null
+++ b/test/VCM/v19.xml
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a? | b+) >
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+<!ELEMENT c EMPTY>
+]>
+<doc><b/></doc>
diff --git a/test/VCM/v2.xml b/test/VCM/v2.xml
new file mode 100644
index 0000000..35c63af
--- /dev/null
+++ b/test/VCM/v2.xml
@@ -0,0 +1,4 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>text</doc>
diff --git a/test/VCM/v20.xml b/test/VCM/v20.xml
new file mode 100644
index 0000000..a337efa
--- /dev/null
+++ b/test/VCM/v20.xml
@@ -0,0 +1,10 @@
+<!DOCTYPE doc [
+<!ELEMENT doc ((a | b)*, a, b) >
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+<!ELEMENT c EMPTY>
+]>
+<doc>
+<a/>
+<b/>
+</doc>
diff --git a/test/VCM/v3.xml b/test/VCM/v3.xml
new file mode 100644
index 0000000..f255589
--- /dev/null
+++ b/test/VCM/v3.xml
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (#PCDATA)>
+]>
+<doc>
+<?pi value?>
+text
+<!-- and comments -->
+</doc>
diff --git a/test/VCM/v4.xml b/test/VCM/v4.xml
new file mode 100644
index 0000000..e96afe2
--- /dev/null
+++ b/test/VCM/v4.xml
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)>
+<!ELEMENT a EMPTY>
+]>
+<doc><a/></doc>
diff --git a/test/VCM/v5.xml b/test/VCM/v5.xml
new file mode 100644
index 0000000..a7ff5ba
--- /dev/null
+++ b/test/VCM/v5.xml
@@ -0,0 +1,7 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)>
+<!ELEMENT a EMPTY>
+]>
+<doc>
+<a/>
+</doc>
diff --git a/test/VCM/v6.xml b/test/VCM/v6.xml
new file mode 100644
index 0000000..93fa4b8
--- /dev/null
+++ b/test/VCM/v6.xml
@@ -0,0 +1,9 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a, b)>
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+]>
+<doc>
+<a/>
+<b/>
+</doc>
diff --git a/test/VCM/v7.xml b/test/VCM/v7.xml
new file mode 100644
index 0000000..240c480
--- /dev/null
+++ b/test/VCM/v7.xml
@@ -0,0 +1,8 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a | b)>
+<!ELEMENT a EMPTY>
+<!ELEMENT b EMPTY>
+]>
+<doc>
+<a/>
+</doc>
diff --git a/test/VCM/v8.xml b/test/VCM/v8.xml
new file mode 100644
index 0000000..421a1df
--- /dev/null
+++ b/test/VCM/v8.xml
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)? >
+<!ELEMENT a EMPTY>
+]>
+<doc></doc>
diff --git a/test/VCM/v9.xml b/test/VCM/v9.xml
new file mode 100644
index 0000000..1c639ec
--- /dev/null
+++ b/test/VCM/v9.xml
@@ -0,0 +1,5 @@
+<!DOCTYPE doc [
+<!ELEMENT doc (a)? >
+<!ELEMENT a EMPTY>
+]>
+<doc><a/></doc>
diff --git a/tree.h b/tree.h
index ce86711..08321a2 100644
--- a/tree.h
+++ b/tree.h
@@ -169,6 +169,7 @@
const xmlChar *name; /* Element name */
struct _xmlElementContent *c1; /* first child */
struct _xmlElementContent *c2; /* second child */
+ struct _xmlElementContent *parent; /* parent */
};
typedef enum {
diff --git a/valid.c b/valid.c
index c3a0739..c36930f 100644
--- a/valid.c
+++ b/valid.c
@@ -28,6 +28,8 @@
#include <libxml/xmlerror.h>
#include <libxml/list.h>
+#define ALLOW_UNDETERMINISTIC_MODELS
+
/*
* Generic function for accessing stacks in the Validity Context
*/
@@ -61,84 +63,220 @@
return(ret); \
} \
+#ifdef ALLOW_UNDETERMINISTIC_MODELS
+
+/*
+ * I will use a home made algorithm less complex and easier to
+ * debug/maintin than a generic NFA -> DFA state based algo. The
+ * only restriction is on the deepness of the tree limited by the
+ * size of the occurs bitfield
+ *
+ * this is the content of a saved state for rollbacks
+ */
+
+#define ROLLBACK_OR 0
+#define ROLLBACK_PARENT 1
+
+struct _xmlValidState {
+ xmlElementContentPtr cont; /* pointer to the content model subtree */
+ xmlNodePtr node; /* pointer to the current node in the list */
+ long occurs;/* bitfield for multiple occurences */
+ unsigned char depth; /* current depth in the overall tree */
+ unsigned char state; /* ROLLBACK_XXX */
+} _xmlValidState;
+
+#define MAX_DEPTH ((sizeof(_xmlValidState.occurs)) * 8)
+#define CONT ctxt->vstate->cont
+#define NODE ctxt->vstate->node
+#define DEPTH ctxt->vstate->depth
+#define OCCURS ctxt->vstate->occurs
+#define STATE ctxt->vstate->state
+
+#define OCCURENCE (ctxt->vstate->occurs & (1 << DEPTH))
+#define PARENT_OCCURENCE (ctxt->vstate->occurs & ((1 << DEPTH) - 1))
+
+#define SET_OCCURENCE ctxt->vstate->occurs |= (1 << DEPTH)
+#define RESET_OCCURENCE ctxt->vstate->occurs &= ((1 << DEPTH) - 1)
+
+static int
+vstateVPush(xmlValidCtxtPtr ctxt, xmlElementContentPtr cont,
+ xmlNodePtr node, unsigned char depth, long occurs,
+ unsigned char state) {
+ if (ctxt->vstateNr >= ctxt->vstateMax) {
+ ctxt->vstateMax *= 2;
+ ctxt->vstateTab = (xmlValidState *) xmlRealloc(ctxt->vstateTab,
+ ctxt->vstateMax * sizeof(ctxt->vstateTab[0]));
+ if (ctxt->vstateTab == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "realloc failed !n");
+ return(0);
+ }
+ }
+ ctxt->vstateTab[ctxt->vstateNr].cont = cont;
+ ctxt->vstateTab[ctxt->vstateNr].node = node;
+ ctxt->vstateTab[ctxt->vstateNr].depth = depth;
+ ctxt->vstateTab[ctxt->vstateNr].occurs = occurs;
+ ctxt->vstateTab[ctxt->vstateNr].state = state;
+ return(ctxt->vstateNr++);
+}
+
+static int
+vstateVPop(xmlValidCtxtPtr ctxt) {
+ if (ctxt->vstateNr <= 1) return(-1);
+ ctxt->vstateNr--;
+ ctxt->vstate = &ctxt->vstateTab[0];
+ ctxt->vstate->cont = ctxt->vstateTab[ctxt->vstateNr].cont;
+ ctxt->vstate->node = ctxt->vstateTab[ctxt->vstateNr].node;
+ ctxt->vstate->depth = ctxt->vstateTab[ctxt->vstateNr].depth;
+ ctxt->vstate->occurs = ctxt->vstateTab[ctxt->vstateNr].occurs;
+ ctxt->vstate->state = ctxt->vstateTab[ctxt->vstateNr].state;
+ return(ctxt->vstateNr);
+}
+
+
+#endif /* ALLOW_UNDETERMINISTIC_MODELS */
+
PUSH_AND_POP(static, xmlNodePtr, node)
/* #define DEBUG_VALID_ALGO */
#ifdef DEBUG_VALID_ALGO
-void xmlValidPrintNodeList(xmlNodePtr cur) {
+static void
+xmlValidPrintNode(xmlNodePtr cur) {
+ if (cur == NULL) {
+ xmlGenericError(xmlGenericErrorContext, "null");
+ return;
+ }
+ switch (cur->type) {
+ case XML_ELEMENT_NODE:
+ xmlGenericError(xmlGenericErrorContext, "%s ", cur->name);
+ break;
+ case XML_TEXT_NODE:
+ xmlGenericError(xmlGenericErrorContext, "text ");
+ break;
+ case XML_CDATA_SECTION_NODE:
+ xmlGenericError(xmlGenericErrorContext, "cdata ");
+ break;
+ case XML_ENTITY_REF_NODE:
+ xmlGenericError(xmlGenericErrorContext, "&%s; ", cur->name);
+ break;
+ case XML_PI_NODE:
+ xmlGenericError(xmlGenericErrorContext, "pi(%s) ", cur->name);
+ break;
+ case XML_COMMENT_NODE:
+ xmlGenericError(xmlGenericErrorContext, "comment ");
+ break;
+ case XML_ATTRIBUTE_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?attr? ");
+ break;
+ case XML_ENTITY_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?ent? ");
+ break;
+ case XML_DOCUMENT_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?doc? ");
+ break;
+ case XML_DOCUMENT_TYPE_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?doctype? ");
+ break;
+ case XML_DOCUMENT_FRAG_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?frag? ");
+ break;
+ case XML_NOTATION_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?nota? ");
+ break;
+ case XML_HTML_DOCUMENT_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?html? ");
+ break;
+ case XML_DTD_NODE:
+ xmlGenericError(xmlGenericErrorContext, "?dtd? ");
+ break;
+ case XML_ELEMENT_DECL:
+ xmlGenericError(xmlGenericErrorContext, "?edecl? ");
+ break;
+ case XML_ATTRIBUTE_DECL:
+ xmlGenericError(xmlGenericErrorContext, "?adecl? ");
+ break;
+ case XML_ENTITY_DECL:
+ xmlGenericError(xmlGenericErrorContext, "?entdecl? ");
+ break;
+ case XML_NAMESPACE_DECL:
+ xmlGenericError(xmlGenericErrorContext, "?nsdecl? ");
+ break;
+ case XML_XINCLUDE_START:
+ xmlGenericError(xmlGenericErrorContext, "incstart ");
+ break;
+ case XML_XINCLUDE_END:
+ xmlGenericError(xmlGenericErrorContext, "incend ");
+ break;
+ }
+}
+
+static void
+xmlValidPrintNodeList(xmlNodePtr cur) {
if (cur == NULL)
xmlGenericError(xmlGenericErrorContext, "null ");
while (cur != NULL) {
- switch (cur->type) {
- case XML_ELEMENT_NODE:
- xmlGenericError(xmlGenericErrorContext, "%s ", cur->name);
- break;
- case XML_TEXT_NODE:
- xmlGenericError(xmlGenericErrorContext, "text ");
- break;
- case XML_CDATA_SECTION_NODE:
- xmlGenericError(xmlGenericErrorContext, "cdata ");
- break;
- case XML_ENTITY_REF_NODE:
- xmlGenericError(xmlGenericErrorContext, "&%s; ", cur->name);
- break;
- case XML_PI_NODE:
- xmlGenericError(xmlGenericErrorContext, "pi(%s) ", cur->name);
- break;
- case XML_COMMENT_NODE:
- xmlGenericError(xmlGenericErrorContext, "comment ");
- break;
- case XML_ATTRIBUTE_NODE:
- xmlGenericError(xmlGenericErrorContext, "?attr? ");
- break;
- case XML_ENTITY_NODE:
- xmlGenericError(xmlGenericErrorContext, "?ent? ");
- break;
- case XML_DOCUMENT_NODE:
- xmlGenericError(xmlGenericErrorContext, "?doc? ");
- break;
- case XML_DOCUMENT_TYPE_NODE:
- xmlGenericError(xmlGenericErrorContext, "?doctype? ");
- break;
- case XML_DOCUMENT_FRAG_NODE:
- xmlGenericError(xmlGenericErrorContext, "?frag? ");
- break;
- case XML_NOTATION_NODE:
- xmlGenericError(xmlGenericErrorContext, "?nota? ");
- break;
- case XML_HTML_DOCUMENT_NODE:
- xmlGenericError(xmlGenericErrorContext, "?html? ");
- break;
- case XML_DTD_NODE:
- xmlGenericError(xmlGenericErrorContext, "?dtd? ");
- break;
- case XML_ELEMENT_DECL:
- xmlGenericError(xmlGenericErrorContext, "?edecl? ");
- break;
- case XML_ATTRIBUTE_DECL:
- xmlGenericError(xmlGenericErrorContext, "?adecl? ");
- break;
- case XML_ENTITY_DECL:
- xmlGenericError(xmlGenericErrorContext, "?entdecl? ");
- break;
- }
+ xmlValidPrintNode(cur);
cur = cur->next;
}
}
-void xmlValidDebug(xmlNodePtr cur, xmlElementContentPtr cont) {
+static void
+xmlValidDebug(xmlNodePtr cur, xmlElementContentPtr cont) {
char expr[1000];
expr[0] = 0;
xmlGenericError(xmlGenericErrorContext, "valid: ");
xmlValidPrintNodeList(cur);
xmlGenericError(xmlGenericErrorContext, "against ");
- xmlSprintfElementContent(expr, cont, 0);
+ xmlSprintfElementContent(expr, cont, 1);
xmlGenericError(xmlGenericErrorContext, "%s\n", expr);
}
+static void
+xmlValidDebugState(xmlValidStatePtr state) {
+ xmlGenericError(xmlGenericErrorContext, "(");
+ if (state->cont == NULL)
+ xmlGenericError(xmlGenericErrorContext, "null,");
+ else
+ switch (state->cont->type) {
+ case XML_ELEMENT_CONTENT_PCDATA:
+ xmlGenericError(xmlGenericErrorContext, "pcdata,");
+ break;
+ case XML_ELEMENT_CONTENT_ELEMENT:
+ xmlGenericError(xmlGenericErrorContext, "%s,",
+ state->cont->name);
+ break;
+ case XML_ELEMENT_CONTENT_SEQ:
+ xmlGenericError(xmlGenericErrorContext, "seq,");
+ break;
+ case XML_ELEMENT_CONTENT_OR:
+ xmlGenericError(xmlGenericErrorContext, "or,");
+ break;
+ }
+ xmlValidPrintNode(state->node);
+ xmlGenericError(xmlGenericErrorContext, ",%d,%X,%d)",
+ state->depth, state->occurs, state->state);
+}
+
+static void
+xmlValidStateDebug(xmlValidCtxtPtr ctxt) {
+ int i, j;
+
+ xmlGenericError(xmlGenericErrorContext, "state: ");
+ xmlValidDebugState(ctxt->vstate);
+ xmlGenericError(xmlGenericErrorContext, " stack: %d ",
+ ctxt->vstateNr - 1);
+ for (i = 0, j = ctxt->vstateNr - 1;(i < 3) && (j > 0);i++,j--)
+ xmlValidDebugState(&ctxt->vstateTab[j]);
+ xmlGenericError(xmlGenericErrorContext, "\n");
+}
+
+/*****
#define DEBUG_VALID_STATE(n,c) xmlValidDebug(n,c);
+ *****/
+
+#define DEBUG_VALID_STATE(n,c) xmlValidStateDebug(ctxt);
#define DEBUG_VALID_MSG(m) \
xmlGenericError(xmlGenericErrorContext, "%s\n", m);
@@ -271,7 +409,7 @@
ret->name = xmlStrdup(name);
else
ret->name = NULL;
- ret->c1 = ret->c2 = NULL;
+ ret->c1 = ret->c2 = ret->parent = NULL;
return(ret);
}
@@ -296,7 +434,11 @@
}
ret->ocur = cur->ocur;
if (cur->c1 != NULL) ret->c1 = xmlCopyElementContent(cur->c1);
+ if (ret->c1 != NULL)
+ ret->c1->parent = ret;
if (cur->c2 != NULL) ret->c2 = xmlCopyElementContent(cur->c2);
+ if (ret->c2 != NULL)
+ ret->c2->parent = ret;
return(ret);
}
@@ -3086,6 +3228,8 @@
return(ret);
}
+#ifndef ALLOW_UNDETERMINISTIC_MODELS
+
/* Find the next XML_ELEMENT_NODE, subject to the content constraints.
* Return -1 if we found something unexpected, or 1 otherwise.
*/
@@ -3346,6 +3490,386 @@
return(xmlValidateFindNextElement(ctxt, child, cont));
}
+#else /* ALLOW_UNDETERMINISTIC_MODELS */
+
+/**
+ * xmlValidateSkipIgnorable:
+ * @ctxt: the validation context
+ * @child: the child list
+ *
+ * Skip ignorable elements w.r.t. the validation process
+ *
+ * returns the first element to consider for validation of the content model
+ */
+
+static xmlNodePtr
+xmlValidateSkipIgnorable(xmlValidCtxtPtr ctxt, xmlNodePtr child) {
+ while (child != NULL) {
+ switch (child->type) {
+ /*
+ * If there is an entity declared and it's not empty
+ * Push the current node on the stack and process with the
+ * entity content.
+ */
+ case XML_ENTITY_REF_NODE:
+ if ((child->children != NULL) &&
+ (child->children->children != NULL)) {
+ nodeVPush(ctxt, child);
+ child = child->children->children;
+ continue;
+ }
+ break;
+ /* These things are ignored (skipped) during validation. */
+ case XML_PI_NODE:
+ case XML_COMMENT_NODE:
+ case XML_XINCLUDE_START:
+ case XML_XINCLUDE_END:
+ child = child->next;
+ break;
+ case XML_TEXT_NODE:
+ if (xmlIsBlankNode(child))
+ child = child->next;
+ else
+ return(child);
+ break;
+ /* keep current node */
+ default:
+ return(child);
+ }
+ }
+ return(child);
+}
+
+/**
+ * xmlValidateElementType:
+ * @ctxt: the validation context
+ *
+ * Try to validate the content model of an element internal function
+ *
+ * returns 1 if valid or 0 and -1 if PCDATA stuff is found,
+ * also update child and content values in-situ.
+ */
+
+static int
+xmlValidateElementType(xmlValidCtxtPtr ctxt) {
+ int ret = -1;
+ int consumed = 1;
+
+ NODE = xmlValidateSkipIgnorable(ctxt, NODE);
+ if ((NODE == NULL) && (CONT == NULL))
+ return(1);
+ if ((NODE == NULL) &&
+ ((CONT->ocur == XML_ELEMENT_CONTENT_MULT) ||
+ (CONT->ocur == XML_ELEMENT_CONTENT_OPT))) {
+ return(1);
+ }
+ if (CONT == NULL) return(-1);
+
+ /*
+ * We arrive here when more states need to be examined
+ */
+cont:
+
+ /*
+ * We just recovered from a rollback generated by a possible
+ * epsilon transition, go directly to the analysis phase
+ */
+ if (STATE == ROLLBACK_PARENT) {
+ DEBUG_VALID_MSG("restaured parent branch");
+ DEBUG_VALID_STATE(NODE, CONT)
+ ret = 1;
+ consumed = 0;
+ goto analyze;
+ }
+
+ DEBUG_VALID_STATE(NODE, CONT)
+ /*
+ * we may have to save a backup state here. This is the equivalent
+ * of handling epsilon transition in NFAs.
+ */
+ if ((consumed) && (CONT != NULL) &&
+ (CONT->parent != NULL) &&
+ ((CONT->ocur == XML_ELEMENT_CONTENT_MULT) ||
+ (CONT->ocur == XML_ELEMENT_CONTENT_OPT))) {
+ DEBUG_VALID_MSG("saving parent branch");
+ vstateVPush(ctxt, CONT, NODE, DEPTH, OCCURS, ROLLBACK_PARENT);
+ }
+
+
+ /*
+ * Check first if the content matches
+ */
+ switch (CONT->type) {
+ case XML_ELEMENT_CONTENT_PCDATA:
+ if (NODE == NULL) {
+ DEBUG_VALID_MSG("pcdata failed no node");
+ ret = 0;
+ break;
+ }
+ if (NODE->type == XML_TEXT_NODE) {
+ DEBUG_VALID_MSG("pcdata found, skip to next");
+ /*
+ * go to next element in the content model
+ * skipping ignorable elems
+ */
+ do {
+ NODE = NODE->next;
+ NODE = xmlValidateSkipIgnorable(ctxt, NODE);
+ } while ((NODE != NULL) &&
+ ((NODE->type != XML_ELEMENT_NODE) &&
+ (NODE->type != XML_TEXT_NODE)));
+ consumed = 1;
+ ret = 1;
+ break;
+ } else {
+ DEBUG_VALID_MSG("pcdata failed");
+ ret = 0;
+ break;
+ }
+ break;
+ case XML_ELEMENT_CONTENT_ELEMENT:
+ if (NODE == NULL) {
+ DEBUG_VALID_MSG("element failed no node");
+ ret = 0;
+ break;
+ }
+ ret = (xmlStrEqual(NODE->name, CONT->name));
+ if (ret == 1) {
+ DEBUG_VALID_MSG("element found, skip to next");
+ /*
+ * go to next element in the content model
+ * skipping ignorable elems
+ */
+ do {
+ NODE = NODE->next;
+ NODE = xmlValidateSkipIgnorable(ctxt, NODE);
+ } while ((NODE != NULL) &&
+ ((NODE->type != XML_ELEMENT_NODE) &&
+ (NODE->type != XML_TEXT_NODE)));
+ consumed = 1;
+ } else {
+ DEBUG_VALID_MSG("element failed");
+ ret = 0;
+ break;
+ }
+ break;
+ case XML_ELEMENT_CONTENT_OR:
+ /*
+ * save the second branch 'or' branch
+ */
+ DEBUG_VALID_MSG("saving 'or' branch");
+ vstateVPush(ctxt, CONT->c2, NODE, DEPTH + 1, OCCURS, ROLLBACK_OR);
+
+ DEPTH++;
+ CONT = CONT->c1;
+ goto cont;
+ case XML_ELEMENT_CONTENT_SEQ:
+ DEPTH++;
+ CONT = CONT->c1;
+ goto cont;
+ }
+
+ /*
+ * At this point handle going up in the tree
+ */
+ if (ret == -1) {
+ DEBUG_VALID_MSG("error found returning");
+ return(ret);
+ }
+analyze:
+ while (CONT != NULL) {
+ /*
+ * First do the analysis depending on the occurence model at
+ * this level.
+ */
+ if (ret == 0) {
+ switch (CONT->ocur) {
+ case XML_ELEMENT_CONTENT_ONCE:
+ DEBUG_VALID_MSG("Once branch failed, rollback");
+ if (vstateVPop(ctxt) < 0 ) {
+ DEBUG_VALID_MSG("exhaustion, failed");
+ return(0);
+ }
+ consumed = 0;
+ goto cont;
+ case XML_ELEMENT_CONTENT_PLUS:
+ if (OCCURENCE == 0) {
+ DEBUG_VALID_MSG("Plus branch failed, rollback");
+ if (vstateVPop(ctxt) < 0 ) {
+ DEBUG_VALID_MSG("exhaustion, failed");
+ return(0);
+ }
+ consumed = 0;
+ goto cont;
+ }
+ DEBUG_VALID_MSG("Plus branch found");
+ ret = 1;
+ break;
+ case XML_ELEMENT_CONTENT_MULT:
+#ifdef DEBUG_VALID_ALGO
+ if (OCCURENCE == 0) {
+ DEBUG_VALID_MSG("Mult branch failed");
+ } else {
+ DEBUG_VALID_MSG("Mult branch found");
+ }
+#endif
+ ret = 1;
+ break;
+ case XML_ELEMENT_CONTENT_OPT:
+ DEBUG_VALID_MSG("Option branch failed");
+ ret = 1;
+ break;
+ }
+ } else {
+ switch (CONT->ocur) {
+ case XML_ELEMENT_CONTENT_OPT:
+ DEBUG_VALID_MSG("Option branch succeeded");
+ ret = 1;
+ break;
+ case XML_ELEMENT_CONTENT_ONCE:
+ DEBUG_VALID_MSG("Once branch succeeded");
+ ret = 1;
+ break;
+ case XML_ELEMENT_CONTENT_PLUS:
+ if (STATE == ROLLBACK_PARENT) {
+ DEBUG_VALID_MSG("Plus branch rollback");
+ ret = 1;
+ break;
+ }
+ if (NODE == NULL) {
+ DEBUG_VALID_MSG("Plus branch exhausted");
+ ret = 1;
+ break;
+ }
+ DEBUG_VALID_MSG("Plus branch succeeded, continuing");
+ SET_OCCURENCE;
+ goto cont;
+ case XML_ELEMENT_CONTENT_MULT:
+ if (STATE == ROLLBACK_PARENT) {
+ DEBUG_VALID_MSG("Mult branch rollback");
+ ret = 1;
+ break;
+ }
+ if (NODE == NULL) {
+ DEBUG_VALID_MSG("Mult branch exhausted");
+ ret = 1;
+ break;
+ }
+ DEBUG_VALID_MSG("Mult branch succeeded, continuing");
+ SET_OCCURENCE;
+ goto cont;
+ }
+ }
+ STATE = 0;
+
+ /*
+ * Then act accordingly at the parent level
+ */
+ RESET_OCCURENCE;
+ if (CONT->parent == NULL)
+ break;
+
+ switch (CONT->parent->type) {
+ case XML_ELEMENT_CONTENT_PCDATA:
+ DEBUG_VALID_MSG("Error: parent pcdata");
+ return(-1);
+ case XML_ELEMENT_CONTENT_ELEMENT:
+ DEBUG_VALID_MSG("Error: parent element");
+ return(-1);
+ case XML_ELEMENT_CONTENT_OR:
+ if (ret == 1) {
+ DEBUG_VALID_MSG("Or succeeded");
+ CONT = CONT->parent;
+ DEPTH--;
+ } else {
+ DEBUG_VALID_MSG("Or failed");
+ CONT = CONT->parent;
+ DEPTH--;
+ }
+ break;
+ case XML_ELEMENT_CONTENT_SEQ:
+ if (ret == 0) {
+ DEBUG_VALID_MSG("Sequence failed");
+ CONT = CONT->parent;
+ DEPTH--;
+ } else if (CONT == CONT->parent->c1) {
+ DEBUG_VALID_MSG("Sequence testing 2nd branch");
+ CONT = CONT->parent->c2;
+ goto cont;
+ } else {
+ DEBUG_VALID_MSG("Sequence succeeded");
+ CONT = CONT->parent;
+ DEPTH--;
+ }
+ }
+ }
+ if (NODE != NULL) {
+ DEBUG_VALID_MSG("Failed, remaining input, rollback");
+ if (vstateVPop(ctxt) < 0 ) {
+ DEBUG_VALID_MSG("exhaustion, failed");
+ return(0);
+ }
+ consumed = 0;
+ goto cont;
+ }
+ if (ret == 0) {
+ DEBUG_VALID_MSG("Failure, rollback");
+ if (vstateVPop(ctxt) < 0 ) {
+ DEBUG_VALID_MSG("exhaustion, failed");
+ return(0);
+ }
+ consumed = 0;
+ goto cont;
+ }
+ return(1);
+}
+
+/**
+ * xmlValidateElementContent:
+ * @ctxt: the validation context
+ * @child: the child list
+ * @cont: pointer to the content declaration
+ *
+ * Try to validate the content model of an element
+ *
+ * returns 1 if valid or 0 if not and -1 in case of error
+ */
+
+static int
+xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child,
+ xmlElementContentPtr cont) {
+ int ret;
+
+ /*
+ * TODO: handle the fact that entities references
+ * may appear at this level.
+ */
+ ctxt->vstateMax = 8;
+ ctxt->vstateTab = (xmlValidState *) xmlMalloc(
+ ctxt->vstateMax * sizeof(ctxt->vstateTab[0]));
+ if (ctxt->vstateTab == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "malloc failed !n");
+ return(0);
+ }
+ /*
+ * The first entry in the stack is reserved to the current state
+ */
+ ctxt->vstate = &ctxt->vstateTab[0];
+ ctxt->vstateNr = 1;
+ CONT = cont;
+ NODE = child;
+ DEPTH = 0;
+ OCCURS = 0;
+ STATE = 0;
+ ret = xmlValidateElementType(ctxt);
+ ctxt->vstateMax = 0;
+ xmlFree(ctxt->vstateTab);
+
+ return(ret);
+}
+#endif /* ALLOW_UNDETERMINISTIC_MODELS */
+
/**
* xmlSprintfElementChilds:
* @buf: an output buffer
@@ -3612,6 +4136,23 @@
case XML_ELEMENT_TYPE_ELEMENT:
child = elem->children;
cont = elemDecl->content;
+#ifdef ALLOW_UNDETERMINISTIC_MODELS
+ ret = xmlValidateElementContent(ctxt, child, cont);
+ if (ret != 1) {
+ char expr[1000];
+ char list[2000];
+
+ expr[0] = 0;
+ xmlSprintfElementContent(expr, cont, 1);
+ list[0] = 0;
+ xmlSprintfElementChilds(list, elem, 1);
+
+ VERROR(ctxt->userData,
+ "Element %s content doesn't follow the Dtd\nExpecting %s, got %s\n",
+ elem->name, expr, list);
+ ret = 0;
+ }
+#else
ret = xmlValidateElementTypeElement(ctxt, &child, cont);
while ((child != NULL) && (child->type == XML_TEXT_NODE) &&
(xmlIsBlankNode(child))) {
@@ -3633,6 +4174,7 @@
ret = 0;
}
break;
+#endif
}
/* [ VC: Required Attribute ] */
diff --git a/valid.h b/valid.h
index 2df3c58..c048ba0 100644
--- a/valid.h
+++ b/valid.h
@@ -17,6 +17,12 @@
extern "C" {
#endif
+/*
+ * Validation state added for non-determinist content model
+ */
+typedef struct _xmlValidState xmlValidState;
+typedef xmlValidState *xmlValidStatePtr;
+
/**
* an xmlValidCtxt is used for error reporting when validating
*/
@@ -40,6 +46,12 @@
int finishDtd; /* finished validating the Dtd ? */
xmlDocPtr doc; /* the document */
int valid; /* temporary validity check result */
+
+ /* state state used for non-determinist content validation */
+ xmlValidState *vstate; /* current state */
+ int vstateNr; /* Depth of the validation stack */
+ int vstateMax; /* Max depth of the validation stack */
+ xmlValidState *vstateTab; /* array of validation states */
};
/*