another entity processing update from Markus Henke Daniel
* tree.c: another entity processing update from Markus Henke
Daniel
diff --git a/ChangeLog b/ChangeLog
index 87789fc..2569fb5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Fri Apr 19 18:26:04 CEST 2002 Daniel Veillard <daniel@veillard.com>
+
+ * tree.c: another entity processing update from Markus Henke
+
Fri Apr 19 17:14:24 CEST 2002 Bjorn Reese <breese@users.sourceforge.net>
* trionan.c: fixed crash on OSF/1
diff --git a/include/libxml/xmlautomata.h b/include/libxml/xmlautomata.h
index 853acc4..c7d5b05 100644
--- a/include/libxml/xmlautomata.h
+++ b/include/libxml/xmlautomata.h
@@ -60,6 +60,16 @@
int min,
int max,
void *data);
+xmlAutomataStatePtr xmlAutomataNewOnceTrans (xmlAutomataPtr am,
+ xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to,
+ const xmlChar *token,
+ int min,
+ int max,
+ void *data);
+xmlAutomataStatePtr xmlAutomataNewAllTrans (xmlAutomataPtr am,
+ xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to);
xmlAutomataStatePtr xmlAutomataNewEpsilon (xmlAutomataPtr am,
xmlAutomataStatePtr from,
xmlAutomataStatePtr to);
diff --git a/tree.c b/tree.c
index 086cb16..e5a4958 100644
--- a/tree.c
+++ b/tree.c
@@ -902,56 +902,73 @@
* Returns a pointer to the string copy, the caller must free it.
*/
xmlChar *
-xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
+xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
+{
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
- if (list == NULL) return(NULL);
+ if (list == NULL)
+ return (NULL);
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
- (node->type == XML_CDATA_SECTION_NODE)) {
- if (inLine) {
- ret = xmlStrcat(ret, node->content);
- } else {
- xmlChar *buffer;
-
- buffer = xmlEncodeEntitiesReentrant(doc, node->content);
- if (buffer != NULL) {
- ret = xmlStrcat(ret, buffer);
- xmlFree(buffer);
- }
- }
- } else if (node->type == XML_ENTITY_REF_NODE) {
- if (inLine) {
- ent = xmlGetDocEntity(doc, node->name);
- if (ent != NULL)
- ret = xmlStrcat(ret, ent->content);
- else {
- ret = xmlStrcat(ret, node->content);
- }
+ (node->type == XML_CDATA_SECTION_NODE)) {
+ if (inLine) {
+ ret = xmlStrcat(ret, node->content);
} else {
- xmlChar buf[2];
- buf[0] = '&'; buf[1] = 0;
- ret = xmlStrncat(ret, buf, 1);
- ret = xmlStrcat(ret, node->name);
- buf[0] = ';'; buf[1] = 0;
- ret = xmlStrncat(ret, buf, 1);
- }
- }
-#if 0
- else {
- xmlGenericError(xmlGenericErrorContext,
- "xmlGetNodeListString : invalid node type %d\n",
- node->type);
- }
-#endif
- node = node->next;
- }
- return(ret);
-}
+ xmlChar *buffer;
+ buffer = xmlEncodeEntitiesReentrant(doc, node->content);
+ if (buffer != NULL) {
+ ret = xmlStrcat(ret, buffer);
+ xmlFree(buffer);
+ }
+ }
+ } else if (node->type == XML_ENTITY_REF_NODE) {
+ if (inLine) {
+ ent = xmlGetDocEntity(doc, node->name);
+ if (ent != NULL) {
+ xmlChar *buffer;
+
+ /* an entity content can be any "well balanced chunk",
+ * i.e. the result of the content [43] production:
+ * http://www.w3.org/TR/REC-xml#NT-content.
+ * So it can contain text, CDATA section or nested
+ * entity reference nodes (among others).
+ * -> we recursive call xmlNodeListGetString()
+ * which handles these types */
+ buffer = xmlNodeListGetString(doc, ent->children, 1);
+ if (buffer != NULL) {
+ ret = xmlStrcat(ret, buffer);
+ xmlFree(buffer);
+ }
+ } else {
+ ret = xmlStrcat(ret, node->content);
+ }
+ } else {
+ xmlChar buf[2];
+
+ buf[0] = '&';
+ buf[1] = 0;
+ ret = xmlStrncat(ret, buf, 1);
+ ret = xmlStrcat(ret, node->name);
+ buf[0] = ';';
+ buf[1] = 0;
+ ret = xmlStrncat(ret, buf, 1);
+ }
+ }
+#if 0
+ else {
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlGetNodeListString : invalid node type %d\n",
+ node->type);
+ }
+#endif
+ node = node->next;
+ }
+ return (ret);
+}
/**
* xmlNodeListGetRawString:
* @doc: the document
@@ -965,54 +982,73 @@
* Returns a pointer to the string copy, the caller must free it.
*/
xmlChar *
-xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine) {
+xmlNodeListGetRawString(xmlDocPtr doc, xmlNodePtr list, int inLine)
+{
xmlNodePtr node = list;
xmlChar *ret = NULL;
xmlEntityPtr ent;
- if (list == NULL) return(NULL);
+ if (list == NULL)
+ return (NULL);
while (node != NULL) {
if ((node->type == XML_TEXT_NODE) ||
- (node->type == XML_CDATA_SECTION_NODE)) {
- if (inLine) {
- ret = xmlStrcat(ret, node->content);
- } else {
- xmlChar *buffer;
-
- buffer = xmlEncodeSpecialChars(doc, node->content);
- if (buffer != NULL) {
- ret = xmlStrcat(ret, buffer);
- xmlFree(buffer);
- }
- }
- } else if (node->type == XML_ENTITY_REF_NODE) {
- if (inLine) {
- ent = xmlGetDocEntity(doc, node->name);
- if (ent != NULL)
- ret = xmlStrcat(ret, ent->content);
- else {
- ret = xmlStrcat(ret, node->content);
- }
+ (node->type == XML_CDATA_SECTION_NODE)) {
+ if (inLine) {
+ ret = xmlStrcat(ret, node->content);
} else {
- xmlChar buf[2];
- buf[0] = '&'; buf[1] = 0;
- ret = xmlStrncat(ret, buf, 1);
- ret = xmlStrcat(ret, node->name);
- buf[0] = ';'; buf[1] = 0;
- ret = xmlStrncat(ret, buf, 1);
- }
- }
+ xmlChar *buffer;
+
+ buffer = xmlEncodeSpecialChars(doc, node->content);
+ if (buffer != NULL) {
+ ret = xmlStrcat(ret, buffer);
+ xmlFree(buffer);
+ }
+ }
+ } else if (node->type == XML_ENTITY_REF_NODE) {
+ if (inLine) {
+ ent = xmlGetDocEntity(doc, node->name);
+ if (ent != NULL) {
+ xmlChar *buffer;
+
+ /* an entity content can be any "well balanced chunk",
+ * i.e. the result of the content [43] production:
+ * http://www.w3.org/TR/REC-xml#NT-content.
+ * So it can contain text, CDATA section or nested
+ * entity reference nodes (among others).
+ * -> we recursive call xmlNodeListGetRawString()
+ * which handles these types */
+ buffer =
+ xmlNodeListGetRawString(doc, ent->children, 1);
+ if (buffer != NULL) {
+ ret = xmlStrcat(ret, buffer);
+ xmlFree(buffer);
+ }
+ } else {
+ ret = xmlStrcat(ret, node->content);
+ }
+ } else {
+ xmlChar buf[2];
+
+ buf[0] = '&';
+ buf[1] = 0;
+ ret = xmlStrncat(ret, buf, 1);
+ ret = xmlStrcat(ret, node->name);
+ buf[0] = ';';
+ buf[1] = 0;
+ ret = xmlStrncat(ret, buf, 1);
+ }
+ }
#if 0
- else {
- xmlGenericError(xmlGenericErrorContext,
- "xmlGetNodeListString : invalid node type %d\n",
- node->type);
- }
+ else {
+ xmlGenericError(xmlGenericErrorContext,
+ "xmlGetNodeListString : invalid node type %d\n",
+ node->type);
+ }
#endif
- node = node->next;
+ node = node->next;
}
- return(ret);
+ return (ret);
}
/**
@@ -3763,122 +3799,159 @@
* It's up to the caller to free the memory.
*/
xmlChar *
-xmlNodeGetContent(xmlNodePtr cur) {
- if (cur == NULL) return(NULL);
+xmlNodeGetContent(xmlNodePtr cur)
+{
+ if (cur == NULL)
+ return (NULL);
switch (cur->type) {
case XML_DOCUMENT_FRAG_NODE:
- case XML_ELEMENT_NODE: {
- xmlNodePtr tmp = cur;
- xmlBufferPtr buffer;
- xmlChar *ret;
+ case XML_ELEMENT_NODE:{
+ xmlNodePtr tmp = cur;
+ xmlBufferPtr buffer;
+ xmlChar *ret;
- buffer = xmlBufferCreate();
- if (buffer == NULL)
- return(NULL);
- while (tmp != NULL) {
- switch (tmp->type) {
- case XML_CDATA_SECTION_NODE:
- case XML_TEXT_NODE:
- if (tmp->content != NULL)
- xmlBufferCat(buffer, tmp->content);
- break;
- case XML_ENTITY_REF_NODE: {
- xmlEntityPtr ent;
+ buffer = xmlBufferCreate();
+ if (buffer == NULL)
+ return (NULL);
+ while (tmp != NULL) {
+ switch (tmp->type) {
+ case XML_CDATA_SECTION_NODE:
+ case XML_TEXT_NODE:
+ if (tmp->content != NULL)
+ xmlBufferCat(buffer, tmp->content);
+ break;
+ case XML_ENTITY_REF_NODE:{
+ /* recursive substitution of entity references */
+ xmlChar *cont = xmlNodeGetContent(tmp);
- ent = xmlGetDocEntity(cur->doc, tmp->name);
- if (ent != NULL)
- xmlBufferCat(buffer, ent->content);
- }
- default:
- break;
- }
- /*
- * Skip to next node
- */
- if (tmp->children != NULL) {
- if (tmp->children->type != XML_ENTITY_DECL) {
- tmp = tmp->children;
- continue;
- }
- }
- if (tmp == cur)
- break;
+ if (cont) {
+ xmlBufferCat(buffer,
+ (const xmlChar *) cont);
+ xmlFree(cont);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ /*
+ * Skip to next node
+ */
+ if (tmp->children != NULL) {
+ if (tmp->children->type != XML_ENTITY_DECL) {
+ tmp = tmp->children;
+ continue;
+ }
+ }
+ if (tmp == cur)
+ break;
- if (tmp->next != NULL) {
- tmp = tmp->next;
- continue;
- }
-
- do {
- tmp = tmp->parent;
- if (tmp == NULL)
- break;
- if (tmp == cur) {
- tmp = NULL;
- break;
- }
- if (tmp->next != NULL) {
- tmp = tmp->next;
- break;
- }
- } while (tmp != NULL);
- }
- ret = buffer->content;
- buffer->content = NULL;
- xmlBufferFree(buffer);
- return(ret);
- }
- case XML_ATTRIBUTE_NODE: {
- xmlAttrPtr attr = (xmlAttrPtr) cur;
- if (attr->parent != NULL)
- return(xmlNodeListGetString(attr->parent->doc, attr->children, 1));
- else
- return(xmlNodeListGetString(NULL, attr->children, 1));
- break;
- }
+ if (tmp->next != NULL) {
+ tmp = tmp->next;
+ continue;
+ }
+
+ do {
+ tmp = tmp->parent;
+ if (tmp == NULL)
+ break;
+ if (tmp == cur) {
+ tmp = NULL;
+ break;
+ }
+ if (tmp->next != NULL) {
+ tmp = tmp->next;
+ break;
+ }
+ } while (tmp != NULL);
+ }
+ ret = buffer->content;
+ buffer->content = NULL;
+ xmlBufferFree(buffer);
+ return (ret);
+ }
+ case XML_ATTRIBUTE_NODE:{
+ xmlAttrPtr attr = (xmlAttrPtr) cur;
+
+ if (attr->parent != NULL)
+ return (xmlNodeListGetString
+ (attr->parent->doc, attr->children, 1));
+ else
+ return (xmlNodeListGetString(NULL, attr->children, 1));
+ break;
+ }
case XML_COMMENT_NODE:
case XML_PI_NODE:
- if (cur->content != NULL)
- return(xmlStrdup(cur->content));
- return(NULL);
- case XML_ENTITY_REF_NODE:
- /*
- * Locate the entity, and get it's content
- * @@@
- */
- return(NULL);
+ if (cur->content != NULL)
+ return (xmlStrdup(cur->content));
+ return (NULL);
+ case XML_ENTITY_REF_NODE:{
+ xmlEntityPtr ent;
+ xmlNodePtr tmp;
+ xmlBufferPtr buffer;
+ xmlChar *ret;
+
+ /* lookup entity declaration */
+ ent = xmlGetDocEntity(cur->doc, cur->name);
+ if (ent == NULL)
+ return (NULL);
+
+ buffer = xmlBufferCreate();
+ if (buffer == NULL)
+ return (NULL);
+
+ /* an entity content can be any "well balanced chunk",
+ * i.e. the result of the content [43] production:
+ * http://www.w3.org/TR/REC-xml#NT-content
+ * -> we iterate through child nodes and recursive call
+ * xmlNodeGetContent() which handles all possible node types */
+ tmp = ent->children;
+ while (tmp) {
+ xmlChar *cont = xmlNodeGetContent(tmp);
+
+ if (cont) {
+ xmlBufferCat(buffer, (const xmlChar *) cont);
+ xmlFree(cont);
+ }
+ tmp = tmp->next;
+ }
+
+ ret = buffer->content;
+ buffer->content = NULL;
+ xmlBufferFree(buffer);
+ return (ret);
+ }
case XML_ENTITY_NODE:
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
case XML_NOTATION_NODE:
case XML_DTD_NODE:
- case XML_XINCLUDE_START:
- case XML_XINCLUDE_END:
+ case XML_XINCLUDE_START:
+ case XML_XINCLUDE_END:
#ifdef LIBXML_DOCB_ENABLED
- case XML_DOCB_DOCUMENT_NODE:
+ case XML_DOCB_DOCUMENT_NODE:
#endif
- return(NULL);
- case XML_NAMESPACE_DECL:
- return(xmlStrdup(((xmlNsPtr)cur)->href));
+ return (NULL);
+ case XML_NAMESPACE_DECL:
+ return (xmlStrdup(((xmlNsPtr) cur)->href));
case XML_ELEMENT_DECL:
- /* TODO !!! */
- return(NULL);
+ /* TODO !!! */
+ return (NULL);
case XML_ATTRIBUTE_DECL:
- /* TODO !!! */
- return(NULL);
+ /* TODO !!! */
+ return (NULL);
case XML_ENTITY_DECL:
- /* TODO !!! */
- return(NULL);
+ /* TODO !!! */
+ return (NULL);
case XML_CDATA_SECTION_NODE:
case XML_TEXT_NODE:
- if (cur->content != NULL)
- return(xmlStrdup(cur->content));
- return(NULL);
+ if (cur->content != NULL)
+ return (xmlStrdup(cur->content));
+ return (NULL);
}
- return(NULL);
+ return (NULL);
}
-
/**
* xmlNodeSetContent:
* @cur: the node being modified
diff --git a/xmlregexp.c b/xmlregexp.c
index 31657ea..5d63c90 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -109,6 +109,8 @@
XML_REGEXP_QUANT_OPT,
XML_REGEXP_QUANT_MULT,
XML_REGEXP_QUANT_PLUS,
+ XML_REGEXP_QUANT_ONCEONLY,
+ XML_REGEXP_QUANT_ALL,
XML_REGEXP_QUANT_RANGE
} xmlRegQuantType;
@@ -279,6 +281,8 @@
};
+#define REGEXP_ALL_COUNTER 0x123456
+
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
/************************************************************************
@@ -630,6 +634,10 @@
fprintf(output, "+ "); break;
case XML_REGEXP_QUANT_RANGE:
fprintf(output, "range "); break;
+ case XML_REGEXP_QUANT_ONCEONLY:
+ fprintf(output, "onceonly "); break;
+ case XML_REGEXP_QUANT_ALL:
+ fprintf(output, "all "); break;
}
}
static void
@@ -943,6 +951,24 @@
}
/**
+ * xmlFAGenerateAllTransition:
+ * ctxt: a regexp parser context
+ * from: the from state
+ * to: the target state or NULL for building a new one
+ *
+ */
+static void
+xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
+ xmlRegStatePtr from, xmlRegStatePtr to) {
+ if (to == NULL) {
+ to = xmlRegNewState(ctxt);
+ xmlRegStatePush(ctxt, to);
+ ctxt->state = to;
+ }
+ xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER);
+}
+
+/**
* xmlFAGenerateEpsilonTransition:
* ctxt: a regexp parser context
* from: the from state
@@ -3424,6 +3450,69 @@
}
/**
+ * xmlAutomataNewOnceTrans:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ * @token: the input string associated to that transition
+ * @min: the minimum successive occurences of token
+ * @min: the maximum successive occurences of token
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a transition from the @from state to the target state
+ * activated by a succession of input of value @token and whose number
+ * is between @min and @max, moreover that transistion can only be crossed
+ * once.
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to, const xmlChar *token,
+ int min, int max, void *data) {
+ xmlRegAtomPtr atom;
+ int counter;
+
+ if ((am == NULL) || (from == NULL) || (token == NULL))
+ return(NULL);
+ if (min < 1)
+ return(NULL);
+ if ((max < min) || (max < 1))
+ return(NULL);
+ atom = xmlRegNewAtom(am, XML_REGEXP_STRING);
+ if (atom == NULL)
+ return(NULL);
+ atom->valuep = xmlStrdup(token);
+ atom->data = data;
+ atom->quant = XML_REGEXP_QUANT_ONCEONLY;
+ if (min == 0)
+ atom->min = 1;
+ else
+ atom->min = min;
+ atom->max = max;
+ /*
+ * associate a counter to the transition.
+ */
+ counter = xmlRegGetCounter(am);
+ am->counters[counter].min = 1;
+ am->counters[counter].max = 1;
+
+ /* xmlFAGenerateTransitions(am, from, to, atom); */
+ if (to == NULL) {
+ to = xmlRegNewState(am);
+ xmlRegStatePush(am, to);
+ }
+ xmlRegStateAddTrans(am, from, atom, to, counter, -1);
+ xmlRegAtomPush(am, atom);
+ am->state = to;
+ if (to == NULL)
+ to = am->state;
+ if (to == NULL)
+ return(NULL);
+ return(to);
+}
+
+/**
* xmlAutomataNewState:
* @am: an automata
*
@@ -3466,6 +3555,30 @@
}
/**
+ * xmlAutomataNewAllTrans:
+ * @am: an automata
+ * @from: the starting point of the transition
+ * @to: the target point of the transition or NULL
+ *
+ * If @to is NULL, this create first a new target state in the automata
+ * and then adds a an ALL transition from the @from state to the
+ * target state. That transition is an epsilon transition allowed only when
+ * all transitions from the @from node have been activated.
+ *
+ * Returns the target state or NULL in case of error
+ */
+xmlAutomataStatePtr
+xmlAutomataNewAllTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
+ xmlAutomataStatePtr to) {
+ if ((am == NULL) || (from == NULL))
+ return(NULL);
+ xmlFAGenerateAllTransition(am, from, to);
+ if (to == NULL)
+ return(am->state);
+ return(to);
+}
+
+/**
* xmlAutomataNewCounter:
* @am: an automata
* @min: the minimal value on the counter
diff --git a/xmlschemas.c b/xmlschemas.c
index 8cb9400..1926063 100644
--- a/xmlschemas.c
+++ b/xmlschemas.c
@@ -2119,7 +2119,7 @@
if (type == NULL)
return (NULL);
type->node = node;
- type->type = XML_SCHEMA_TYPE_SEQUENCE;
+ type->type = XML_SCHEMA_TYPE_ALL;
type->id = xmlGetProp(node, BAD_CAST "id");
type->minOccurs = xmlGetMinOccurs(ctxt, node);
type->maxOccurs = xmlGetMaxOccurs(ctxt, node);
@@ -3037,6 +3037,26 @@
break;
}
case XML_SCHEMA_TYPE_ALL: {
+ xmlAutomataStatePtr end;
+ xmlAutomataStatePtr start;
+ xmlSchemaTypePtr subtypes;
+ xmlSchemaElementPtr elem = (xmlSchemaElementPtr) type;
+
+ subtypes = type->subtypes;
+ if (subtypes == NULL)
+ break;
+ start = ctxt->state;
+ while (subtypes != NULL) {
+ ctxt->state = start;
+ elem = (xmlSchemaElementPtr) subtypes;
+
+ /* TODO : handle the namespace too */
+ xmlAutomataNewOnceTrans(ctxt->am, ctxt->state, ctxt->state,
+ elem->name, elem->minOccurs, elem->maxOccurs,
+ subtypes);
+ subtypes = subtypes->next;
+ }
+ ctxt->state = xmlAutomataNewAllTrans(ctxt->am, ctxt->state, NULL);
TODO
break;
}