- Lots of improvements, too long to list here
- Push mode for the XML parser (HTML to come)
- XML shell like interface for debug
- improvements on XPath and validation
Daniel
diff --git a/ChangeLog b/ChangeLog
index 04d7a0f..50790ba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+Tue Dec 28 18:44:22 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>
+
+ * parser.[ch] parserInternals.h: Push parser for XML,
+ seems to work fine now
+ * tester.c debugXML.[ch]: Added an XML shell debug facility and
+ --push for push testing
+ * xpath.[ch] : cleaned up for Shell usage, added missing APIs
+ * testSAX.c: added --push
+ * HTMLtree.[ch] tree.[ch]: new functions for dumping parts of the
+ subtree
+ * xmlIO.[ch] : enriched API + fixes for push mode
+ * entities.[ch]: added the entity content length to the struct.
+ * xmlmemory.[ch]: new API to show the last entries for the shell
+ * valid.c: added required attribute testing
+ * SAX.c: the cdata callback now merge contiguous fragments
+ * HTMLparser.c: cleanup of some macros
+
Wed Dec 22 12:20:53 CET 1999 Daniel Veillard <Daniel.Veillard@w3.org>
* parser.c: fix for PIs name starting with xml
diff --git a/HTMLparser.c b/HTMLparser.c
index 7276e7c..e3a1f72 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -57,8 +57,8 @@
* Generic function for accessing stacks in the Parser Context
*/
-#define PUSH_AND_POP(type, name) \
-int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \
+#define PUSH_AND_POP(scope, type, name) \
+scope int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \
if (ctxt->name##Nr >= ctxt->name##Max) { \
ctxt->name##Max *= 2; \
ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
@@ -72,7 +72,7 @@
ctxt->name = value; \
return(ctxt->name##Nr++); \
} \
-type html##name##Pop(htmlParserCtxtPtr ctxt) { \
+scope type html##name##Pop(htmlParserCtxtPtr ctxt) { \
type ret; \
if (ctxt->name##Nr < 0) return(0); \
ctxt->name##Nr--; \
@@ -86,8 +86,8 @@
return(ret); \
} \
-PUSH_AND_POP(xmlNodePtr, node)
-PUSH_AND_POP(xmlChar*, name)
+PUSH_AND_POP(extern, xmlNodePtr, node)
+PUSH_AND_POP(extern, xmlChar*, name)
/*
* Macros for accessing the content. Those should be used only by the parser,
@@ -2626,11 +2626,11 @@
}
-/********************************************************************************
- * *
- * Parser contexts handling *
- * *
- ********************************************************************************/
+/************************************************************************
+ * *
+ * Parser contexts handling *
+ * *
+ ************************************************************************/
/**
* xmlInitParserCtxt:
@@ -2665,6 +2665,7 @@
ctxt->version = NULL;
ctxt->encoding = NULL;
ctxt->standalone = -1;
+ ctxt->instate = XML_PARSER_START;
/* Allocate the Node stack */
ctxt->nodeTab = (htmlNodePtr *) xmlMalloc(10 * sizeof(htmlNodePtr));
@@ -2691,6 +2692,7 @@
ctxt->record_info = 0;
ctxt->validate = 0;
ctxt->nbChars = 0;
+ ctxt->checkIndex = 0;
xmlInitNodeInfoSeq(&ctxt->node_seq);
}
diff --git a/HTMLtree.c b/HTMLtree.c
index c84daea..e6142ae 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -28,6 +28,9 @@
#include "entities.h"
#include "valid.h"
+static void
+htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
+
/**
* htmlDtdDump:
* @buf: the HTML buffer output
@@ -108,7 +111,7 @@
}
-static void
+void
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
/**
* htmlNodeListDump:
@@ -138,7 +141,7 @@
*
* Dump an HTML node, recursive behaviour,children are printed too.
*/
-static void
+void
htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
htmlElemDescPtr info;
@@ -149,6 +152,10 @@
/*
* Special cases.
*/
+ if (cur->type == XML_HTML_DOCUMENT_NODE) {
+ htmlDocContentDump(buf, (xmlDocPtr) cur);
+ return;
+ }
if (cur->type == HTML_TEXT_NODE) {
if (cur->content != NULL) {
xmlChar *buffer;
diff --git a/SAX.c b/SAX.c
index be5a0ab..19e0da3 100644
--- a/SAX.c
+++ b/SAX.c
@@ -1101,14 +1101,22 @@
cdataBlock(void *ctx, const xmlChar *value, int len)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
- xmlNodePtr ret;
+ xmlNodePtr ret, lastChild;
#ifdef DEBUG_SAX
fprintf(stderr, "SAX.pcdata(%.10s, %d)\n", value, len);
#endif
- ret = xmlNewCDataBlock(ctxt->myDoc, value, len);
- xmlAddChild(ctxt->node, ret);
- /* !!!!! merges */
+ lastChild = xmlGetLastChild(ctxt->node);
+#ifdef DEBUG_SAX_TREE
+ fprintf(stderr, "add chars to %s \n", ctxt->node->name);
+#endif
+ if ((lastChild != NULL) &&
+ (lastChild->type == XML_CDATA_SECTION_NODE)) {
+ xmlTextConcat(lastChild, value, len);
+ } else {
+ ret = xmlNewCDataBlock(ctxt->myDoc, value, len);
+ xmlAddChild(ctxt->node, ret);
+ }
}
/*
diff --git a/debugXML.c b/debugXML.c
index 2344bb2..99972ae 100644
--- a/debugXML.c
+++ b/debugXML.c
@@ -13,9 +13,15 @@
#include "config.h"
#endif
#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include "xmlmemory.h"
#include "tree.h"
#include "parser.h"
#include "debugXML.h"
+#include "HTMLtree.h"
+#include "HTMLparser.h"
#define IS_BLANK(c) \
(((c) == '\n') || ((c) == '\r') || ((c) == '\t') || ((c) == ' '))
@@ -43,7 +49,7 @@
if (ns->prefix != NULL)
fprintf(output, "namespace %s href=", ns->prefix);
else
- fprintf(output, "default namespace href=", ns->prefix);
+ fprintf(output, "default namespace href=");
xmlDebugDumpString(output, ns->href);
fprintf(output, "\n");
@@ -217,7 +223,7 @@
}
-void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) {
+void xmlDebugDumpDocumentHead(FILE *output, xmlDocPtr doc) {
if (output == NULL) output = stdout;
if (doc == NULL) {
fprintf(output, "DOCUMENT == NULL !\n");
@@ -286,10 +292,21 @@
fprintf(output, "standalone=true\n");
if (doc->oldNs != NULL)
xmlDebugDumpNamespaceList(output, doc->oldNs, 0);
- if (doc->root != NULL)
- xmlDebugDumpNodeList(output, doc->root, 1);
}
+void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) {
+ if (output == NULL) output = stdout;
+ if (doc == NULL) {
+ fprintf(output, "DOCUMENT == NULL !\n");
+ return;
+ }
+ xmlDebugDumpDocumentHead(output, doc);
+ if (((doc->type == XML_DOCUMENT_NODE) ||
+ (doc->type == XML_HTML_DOCUMENT_NODE)) &&
+ (doc->root != NULL))
+ xmlDebugDumpNodeList(output, doc->root, 1);
+}
+
void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
int i;
xmlEntityPtr cur;
@@ -422,3 +439,807 @@
} else
fprintf(output, "No entities in external subset\n");
}
+
+static int xmlLsCountNode(xmlNodePtr node) {
+ int ret = 0;
+ xmlNodePtr list = NULL;
+
+ switch (node->type) {
+ case XML_ELEMENT_NODE:
+ list = node->childs;
+ break;
+ case XML_DOCUMENT_NODE:
+ case XML_HTML_DOCUMENT_NODE:
+ list = ((xmlDocPtr) node)->root;
+ break;
+ case XML_ATTRIBUTE_NODE:
+ list = ((xmlAttrPtr) node)->val;
+ break;
+ case XML_TEXT_NODE:
+ case XML_CDATA_SECTION_NODE:
+ case XML_PI_NODE:
+ case XML_COMMENT_NODE:
+ if (node->content != NULL) {
+#ifndef XML_USE_BUFFER_CONTENT
+ ret = xmlStrlen(node->content);
+#else
+ ret = xmlBufferLength(node->content);
+#endif
+ }
+ break;
+ case XML_ENTITY_REF_NODE:
+ case XML_DOCUMENT_TYPE_NODE:
+ case XML_ENTITY_NODE:
+ case XML_DOCUMENT_FRAG_NODE:
+ case XML_NOTATION_NODE:
+ ret = 1;
+ break;
+ }
+ for (;list != NULL;ret++)
+ list = list->next;
+ return(ret);
+}
+
+void xmlLsOneNode(FILE *output, xmlNodePtr node) {
+ switch (node->type) {
+ case XML_ELEMENT_NODE:
+ fprintf(output, "-");
+ break;
+ case XML_ATTRIBUTE_NODE:
+ fprintf(output, "a");
+ break;
+ case XML_TEXT_NODE:
+ fprintf(output, "t");
+ break;
+ case XML_CDATA_SECTION_NODE:
+ fprintf(output, "c");
+ break;
+ case XML_ENTITY_REF_NODE:
+ fprintf(output, "e");
+ break;
+ case XML_ENTITY_NODE:
+ fprintf(output, "E");
+ break;
+ case XML_PI_NODE:
+ fprintf(output, "p");
+ break;
+ case XML_COMMENT_NODE:
+ fprintf(output, "c");
+ break;
+ case XML_DOCUMENT_NODE:
+ fprintf(output, "d");
+ break;
+ case XML_HTML_DOCUMENT_NODE:
+ fprintf(output, "h");
+ break;
+ case XML_DOCUMENT_TYPE_NODE:
+ fprintf(output, "T");
+ break;
+ case XML_DOCUMENT_FRAG_NODE:
+ fprintf(output, "F");
+ break;
+ case XML_NOTATION_NODE:
+ fprintf(output, "N");
+ break;
+ default:
+ fprintf(output, "?");
+ }
+ if (node->properties != NULL)
+ fprintf(output, "a");
+ else
+ fprintf(output, "-");
+ if (node->nsDef != NULL)
+ fprintf(output, "n");
+ else
+ fprintf(output, "-");
+
+ fprintf(output, " %8d ", xmlLsCountNode(node));
+
+ switch (node->type) {
+ case XML_ELEMENT_NODE:
+ if (node->name != NULL)
+ fprintf(output, "%s", node->name);
+ break;
+ case XML_ATTRIBUTE_NODE:
+ if (node->name != NULL)
+ fprintf(output, "%s", node->name);
+ break;
+ case XML_TEXT_NODE:
+ if (node->content != NULL) {
+#ifndef XML_USE_BUFFER_CONTENT
+ xmlDebugDumpString(output, node->content);
+#else
+ xmlDebugDumpString(output, xmlBufferContent(node->content));
+#endif
+ }
+ break;
+ case XML_CDATA_SECTION_NODE:
+ break;
+ case XML_ENTITY_REF_NODE:
+ if (node->name != NULL)
+ fprintf(output, "%s", node->name);
+ break;
+ case XML_ENTITY_NODE:
+ if (node->name != NULL)
+ fprintf(output, "%s", node->name);
+ break;
+ case XML_PI_NODE:
+ if (node->name != NULL)
+ fprintf(output, "%s", node->name);
+ break;
+ case XML_COMMENT_NODE:
+ break;
+ case XML_DOCUMENT_NODE:
+ break;
+ case XML_HTML_DOCUMENT_NODE:
+ break;
+ case XML_DOCUMENT_TYPE_NODE:
+ break;
+ case XML_DOCUMENT_FRAG_NODE:
+ break;
+ case XML_NOTATION_NODE:
+ break;
+ default:
+ if (node->name != NULL)
+ fprintf(output, "%s", node->name);
+ }
+ fprintf(output, "\n");
+}
+
+/****************************************************************
+ * *
+ * The XML shell related functions *
+ * *
+ ****************************************************************/
+
+/*
+ * TODO: Improvement/cleanups for the XML shell
+ * - allow to shell out an editor on a subpart
+ * - cleanup function registrations (with help) and calling
+ * - provide registration routines
+ */
+
+/**
+ * xmlShellList:
+ * @ctxt: the shell context
+ * @arg: unused
+ * @node: a node
+ * @node2: unused
+ *
+ * Implements the XML shell function "ls"
+ * Does an Unix like listing of the given node (like a directory)
+ *
+ * Returns 0
+ */
+int
+xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,
+ xmlNodePtr node2) {
+ xmlNodePtr cur;
+
+ if ((node->type == XML_DOCUMENT_NODE) ||
+ (node->type == XML_HTML_DOCUMENT_NODE)) {
+ cur = ((xmlDocPtr) node)->root;
+ } else if (node->childs != NULL) {
+ cur = node->childs;
+ } else {
+ xmlLsOneNode(stdout, node);
+ return(0);
+ }
+ while (cur != NULL) {
+ xmlLsOneNode(stdout, cur);
+ cur = cur->next;
+ }
+ return(0);
+}
+
+/**
+ * xmlShellDir:
+ * @ctxt: the shell context
+ * @arg: unused
+ * @node: a node
+ * @node2: unused
+ *
+ * Implements the XML shell function "dir"
+ * dumps informations about the node (namespace, attributes, content).
+ *
+ * Returns 0
+ */
+int
+xmlShellDir(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,
+ xmlNodePtr node2) {
+ if ((node->type == XML_DOCUMENT_NODE) ||
+ (node->type == XML_HTML_DOCUMENT_NODE)) {
+ xmlDebugDumpDocumentHead(stdout, (xmlDocPtr) node);
+ } else if (node->type == XML_ATTRIBUTE_NODE) {
+ xmlDebugDumpAttr(stdout, (xmlAttrPtr) node, 0);
+ } else {
+ xmlDebugDumpOneNode(stdout, node, 0);
+ }
+ return(0);
+}
+
+/**
+ * xmlShellCat:
+ * @ctxt: the shell context
+ * @arg: unused
+ * @node: a node
+ * @node2: unused
+ *
+ * Implements the XML shell function "cat"
+ * dumps the serialization node content (XML or HTML).
+ *
+ * Returns 0
+ */
+int
+xmlShellCat(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,
+ xmlNodePtr node2) {
+ xmlElemDump(stdout, ctxt->doc, node);
+ printf("\n");
+ return(0);
+}
+
+/**
+ * xmlShellLoad:
+ * @ctxt: the shell context
+ * @filename: the file name
+ * @node: unused
+ * @node2: unused
+ *
+ * Implements the XML shell function "load"
+ * loads a new document specified by the filename
+ *
+ * Returns 0 or -1 if loading failed
+ */
+int
+xmlShellLoad(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node,
+ xmlNodePtr node2) {
+ xmlDocPtr doc;
+ int html = 0;
+
+ if (ctxt->doc != NULL)
+ html = (ctxt->doc->type == XML_HTML_DOCUMENT_NODE);
+
+ if (html) {
+ doc = htmlParseFile(filename, NULL);
+ } else {
+ doc = xmlParseFile(filename);
+ }
+ if (doc != NULL) {
+ if (ctxt->loaded == 1) {
+ xmlFreeDoc(ctxt->doc);
+ }
+ ctxt->loaded = 1;
+ xmlXPathFreeContext(ctxt->pctxt);
+ xmlFree(ctxt->filename);
+ ctxt->doc = doc;
+ ctxt->node = (xmlNodePtr) doc;
+ ctxt->pctxt = xmlXPathNewContext(doc);
+ ctxt->filename = (char *) xmlStrdup((xmlChar *) filename);
+ } else
+ return(-1);
+ return(0);
+}
+
+/**
+ * xmlShellWrite:
+ * @ctxt: the shell context
+ * @filename: the file name
+ * @node: a node in the tree
+ * @node2: unused
+ *
+ * Implements the XML shell function "write"
+ * Write the current node to the filename, it saves the serailization
+ * of the subtree under the @node specified
+ *
+ * Returns 0 or -1 in case of error
+ */
+int
+xmlShellWrite(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node,
+ xmlNodePtr node2) {
+ if (node == NULL)
+ return(-1);
+ if ((filename == NULL) || (filename[0] == 0)) {
+ fprintf(stderr, "Write command requires a filename argument\n");
+ return(-1);
+ }
+#ifdef W_OK
+ if (access((char *) filename, W_OK)) {
+ fprintf(stderr, "Cannot write to %s\n", filename);
+ return(-1);
+ }
+#endif
+ switch(node->type) {
+ case XML_DOCUMENT_NODE:
+ if (xmlSaveFile((char *) filename, ctxt->doc) < -1) {
+ fprintf(stderr, "Failed to write to %s\n", filename);
+ return(-1);
+ }
+ break;
+ case XML_HTML_DOCUMENT_NODE:
+ if (htmlSaveFile((char *) filename, ctxt->doc) < 0) {
+ fprintf(stderr, "Failed to write to %s\n", filename);
+ return(-1);
+ }
+ break;
+ default: {
+ FILE *f;
+
+ f = fopen((char *) filename, "w");
+ if (f == NULL) {
+ fprintf(stderr, "Failed to write to %s\n", filename);
+ return(-1);
+ }
+ xmlElemDump(f, ctxt->doc, node);
+ fclose(f);
+ }
+ }
+ return(0);
+}
+
+/**
+ * xmlShellSave:
+ * @ctxt: the shell context
+ * @filename: the file name (optionnal)
+ * @node: unused
+ * @node2: unused
+ *
+ * Implements the XML shell function "save"
+ * Write the current document to the filename, or it's original name
+ *
+ * Returns 0 or -1 in case of error
+ */
+int
+xmlShellSave(xmlShellCtxtPtr ctxt, char *filename, xmlNodePtr node,
+ xmlNodePtr node2) {
+ if (ctxt->doc == NULL)
+ return(-1);
+ if ((filename == NULL) || (filename[0] == 0))
+ filename = ctxt->filename;
+#ifdef W_OK
+ if (access((char *) filename, W_OK)) {
+ fprintf(stderr, "Cannot save to %s\n", filename);
+ return(-1);
+ }
+#endif
+ switch(ctxt->doc->type) {
+ case XML_DOCUMENT_NODE:
+ if (xmlSaveFile((char *) filename, ctxt->doc) < 0) {
+ fprintf(stderr, "Failed to save to %s\n", filename);
+ }
+ break;
+ case XML_HTML_DOCUMENT_NODE:
+ if (htmlSaveFile((char *) filename, ctxt->doc) < 0) {
+ fprintf(stderr, "Failed to save to %s\n", filename);
+ }
+ break;
+ default:
+ fprintf(stderr,
+ "To save to subparts of a document use the 'write' command\n");
+ return(-1);
+
+ }
+ return(0);
+}
+
+/**
+ * xmlShellValidate:
+ * @ctxt: the shell context
+ * @dtd: the DTD URI (optionnal)
+ * @node: unused
+ * @node2: unused
+ *
+ * Implements the XML shell function "validate"
+ * Validate the document, if a DTD path is provided, then the validation
+ * is done against the given DTD.
+ *
+ * Returns 0 or -1 in case of error
+ */
+int
+xmlShellValidate(xmlShellCtxtPtr ctxt, char *dtd, xmlNodePtr node,
+ xmlNodePtr node2) {
+ xmlValidCtxt vctxt;
+ int res = -1;
+
+ vctxt.userData = stderr;
+ vctxt.error = (xmlValidityErrorFunc) fprintf;
+ vctxt.warning = (xmlValidityWarningFunc) fprintf;
+
+ if ((dtd == NULL) || (dtd[0] == 0)) {
+ res = xmlValidateDocument(&vctxt, ctxt->doc);
+ } else {
+ xmlDtdPtr subset;
+
+ subset = xmlParseDTD(NULL, (xmlChar *) dtd);
+ if (subset != NULL) {
+ res = xmlValidateDtd(&vctxt, ctxt->doc, subset);
+
+ xmlFreeDtd(subset);
+ }
+ }
+ return(res);
+}
+
+/**
+ * xmlShellDu:
+ * @ctxt: the shell context
+ * @arg: unused
+ * @tree: a node defining a subtree
+ * @node2: unused
+ *
+ * Implements the XML shell function "du"
+ * show the structure of the subtree under node @tree
+ * If @tree is null, the command works on the current node.
+ *
+ * Returns 0 or -1 in case of error
+ */
+int
+xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree,
+ xmlNodePtr node2) {
+ xmlNodePtr node;
+ int indent = 0,i;
+
+ if (tree == NULL) return(-1);
+ node = tree;
+ while (node != NULL) {
+ if ((node->type == XML_DOCUMENT_NODE) ||
+ (node->type == XML_HTML_DOCUMENT_NODE)) {
+ printf("/\n");
+ } else if (node->type == XML_ELEMENT_NODE) {
+ for (i = 0;i < indent;i++)
+ printf(" ");
+ printf("%s\n", node->name);
+ } else {
+ }
+
+ /*
+ * Browse the full subtree, deep first
+ */
+
+ if ((node->type == XML_DOCUMENT_NODE) ||
+ (node->type == XML_HTML_DOCUMENT_NODE)) {
+ node = ((xmlDocPtr) node)->root;
+ } else if (node->childs != NULL) {
+ /* deep first */
+ node = node->childs;
+ indent++;
+ } else if ((node != tree) && (node->next != NULL)) {
+ /* then siblings */
+ node = node->next;
+ } else if (node != tree) {
+ /* go up to parents->next if needed */
+ while (node != tree) {
+ if (node->parent != NULL) {
+ node = node->parent;
+ indent--;
+ }
+ if ((node != tree) && (node->next != NULL)) {
+ node = node->next;
+ break;
+ }
+ if (node->parent == NULL) {
+ node = NULL;
+ break;
+ }
+ if (node == tree) {
+ node = NULL;
+ break;
+ }
+ }
+ /* exit condition */
+ if (node == tree)
+ node = NULL;
+ } else
+ node = NULL;
+ }
+ return(0);
+}
+
+/**
+ * xmlShellPwd:
+ * @ctxt: the shell context
+ * @buffer: the output buffer
+ * @tree: a node
+ * @node2: unused
+ *
+ * Implements the XML shell function "pwd"
+ * Show the full path from the root to the node, if needed building
+ * thumblers when similar elements exists at a given ancestor level.
+ * The output is compatible with XPath commands.
+ *
+ * Returns 0 or -1 in case of error
+ */
+int
+xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node,
+ xmlNodePtr node2) {
+ xmlNodePtr cur, tmp, next;
+ char buf[500];
+ char sep;
+ const char *name;
+ int occur = 0;
+
+ buffer[0] = 0;
+ if (node == NULL) return(-1);
+ cur = node;
+ do {
+ name = "";
+ sep= '?';
+ occur = 0;
+ if ((cur->type == XML_DOCUMENT_NODE) ||
+ (cur->type == XML_HTML_DOCUMENT_NODE)) {
+ sep = '/';
+ next = NULL;
+ } else if (cur->type == XML_ELEMENT_NODE) {
+ sep = '/';
+ name = (const char *)cur->name;
+ next = cur->parent;
+
+ /*
+ * Thumbler index computation
+ */
+ tmp = cur->prev;
+ while (tmp != NULL) {
+ if (!xmlStrcmp(cur->name, tmp->name))
+ occur++;
+ tmp = tmp->prev;
+ }
+ if (occur == 0) {
+ tmp = cur->next;
+ while (tmp != NULL) {
+ if (!xmlStrcmp(cur->name, tmp->name))
+ occur++;
+ tmp = tmp->next;
+ }
+ if (occur != 0) occur = 1;
+ } else
+ occur++;
+ } else if (cur->type == XML_ATTRIBUTE_NODE) {
+ sep = '@';
+ name = (const char *) (((xmlAttrPtr) cur)->name);
+ next = ((xmlAttrPtr) cur)->node;
+ } else {
+ next = cur->parent;
+ }
+ if (occur == 0)
+ sprintf(buf, "%c%s%s", sep, name, buffer);
+ else
+ sprintf(buf, "%c%s[%d]%s", sep, name, occur, buffer);
+ strcpy(buffer, buf);
+ cur = next;
+ } while (cur != NULL);
+ return(0);
+}
+
+/**
+ * xmlShell
+ * @doc: the initial document
+ * @filename: the output buffer
+ * @input: the line reading function
+ * @output: the output FILE*
+ *
+ * Implements the XML shell
+ * This allow to load, validate, view, modify and save a document
+ * using a environment similar to a UNIX commandline.
+ */
+void
+xmlShell(xmlDocPtr doc, char *filename, xmlShellReadlineFunc input,
+ FILE *output) {
+ char prompt[500] = "/ > ";
+ char *cmdline = NULL;
+ int nbargs;
+ char command[100];
+ char arg[400];
+ xmlShellCtxtPtr ctxt;
+ xmlXPathObjectPtr list;
+
+ if (doc == NULL)
+ return;
+ if (filename == NULL)
+ return;
+ if (input == NULL)
+ return;
+ if (output == NULL)
+ return;
+ ctxt = (xmlShellCtxtPtr) xmlMalloc(sizeof(xmlShellCtxt));
+ if (ctxt == NULL)
+ return;
+ ctxt->loaded = 0;
+ ctxt->doc = doc;
+ ctxt->input = input;
+ ctxt->output = output;
+ ctxt->filename = (char *) xmlStrdup((xmlChar *) filename);
+ ctxt->node = (xmlNodePtr) ctxt->doc;
+
+ ctxt->pctxt = xmlXPathNewContext(ctxt->doc);
+ if (ctxt->pctxt == NULL) {
+ xmlFree(ctxt);
+ return;
+ }
+ while (1) {
+ if (ctxt->node == (xmlNodePtr) ctxt->doc)
+ sprintf(prompt, "%s > ", "/");
+ else if (ctxt->node->name)
+ sprintf(prompt, "%s > ", ctxt->node->name);
+ else
+ sprintf(prompt, "? > ");
+
+ cmdline = ctxt->input(prompt);
+ if (cmdline == NULL) break;
+
+ command[0] = 0;
+ arg[0] = 0;
+ nbargs = sscanf(cmdline, "%s %s", command, arg);
+
+ if (command[0] == 0) continue;
+ if (!strcmp(command, "exit"))
+ break;
+ if (!strcmp(command, "quit"))
+ break;
+ if (!strcmp(command, "bye"))
+ break;
+ if (!strcmp(command, "validate")) {
+ xmlShellValidate(ctxt, arg, NULL, NULL);
+ } else if (!strcmp(command, "load")) {
+ xmlShellLoad(ctxt, arg, NULL, NULL);
+ } else if (!strcmp(command, "save")) {
+ xmlShellSave(ctxt, arg, NULL, NULL);
+ } else if (!strcmp(command, "write")) {
+ xmlShellWrite(ctxt, arg, NULL, NULL);
+ } else if (!strcmp(command, "free")) {
+ if (arg[0] == 0) {
+ xmlMemShow(stdout, 0);
+ } else {
+ int len = 0;
+ sscanf(arg, "%d", &len);
+ xmlMemShow(stdout, len);
+ }
+ } else if (!strcmp(command, "pwd")) {
+ char dir[500];
+ if (!xmlShellPwd(ctxt, dir, ctxt->node, NULL))
+ printf("%s\n", dir);
+ } else if (!strcmp(command, "du")) {
+ xmlShellDu(ctxt, NULL, ctxt->node, NULL);
+ } else if ((!strcmp(command, "ls")) ||
+ (!strcmp(command, "dir"))) {
+ int dir = (!strcmp(command, "dir"));
+ if (arg[0] == 0) {
+ if (dir)
+ xmlShellDir(ctxt, NULL, ctxt->node, NULL);
+ else
+ xmlShellList(ctxt, NULL, ctxt->node, NULL);
+ } else {
+ ctxt->pctxt->node = ctxt->node;
+ if (ctxt->pctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->pctxt->nodelist);
+ ctxt->pctxt->nodelist = xmlXPathNodeSetCreate(ctxt->node);
+ list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt);
+ if (list != NULL) {
+ switch (list->type) {
+ case XPATH_UNDEFINED:
+ fprintf(stderr, "%s: no such node\n", arg);
+ break;
+ case XPATH_NODESET: {
+ int i;
+
+ for (i = 0;i < list->nodesetval->nodeNr;i++) {
+ if (dir)
+ xmlShellDir(ctxt, NULL,
+ list->nodesetval->nodeTab[i], NULL);
+ else
+ xmlShellList(ctxt, NULL,
+ list->nodesetval->nodeTab[i], NULL);
+ }
+ break;
+ }
+ case XPATH_BOOLEAN:
+ fprintf(stderr, "%s is a Boolean\n", arg);
+ break;
+ case XPATH_NUMBER:
+ fprintf(stderr, "%s is a number\n", arg);
+ break;
+ case XPATH_STRING:
+ fprintf(stderr, "%s is a string\n", arg);
+ break;
+ }
+ xmlXPathFreeNodeSetList(list);
+ } else {
+ fprintf(stderr, "%s: no such node\n", arg);
+ }
+ if (ctxt->pctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->pctxt->nodelist);
+ ctxt->pctxt->nodelist = NULL;
+ }
+ } else if (!strcmp(command, "cd")) {
+ if (arg[0] == 0) {
+ ctxt->node = (xmlNodePtr) ctxt->doc;
+ } else {
+ ctxt->pctxt->node = ctxt->node;
+ if (ctxt->pctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->pctxt->nodelist);
+ ctxt->pctxt->nodelist = xmlXPathNodeSetCreate(ctxt->node);
+ list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt);
+ if (list != NULL) {
+ switch (list->type) {
+ case XPATH_UNDEFINED:
+ fprintf(stderr, "%s: no such node\n", arg);
+ break;
+ case XPATH_NODESET:
+ if (list->nodesetval->nodeNr == 1) {
+ ctxt->node = list->nodesetval->nodeTab[0];
+ } else
+ fprintf(stderr, "%s is a %d Node Set\n",
+ arg, list->nodesetval->nodeNr);
+ break;
+ case XPATH_BOOLEAN:
+ fprintf(stderr, "%s is a Boolean\n", arg);
+ break;
+ case XPATH_NUMBER:
+ fprintf(stderr, "%s is a number\n", arg);
+ break;
+ case XPATH_STRING:
+ fprintf(stderr, "%s is a string\n", arg);
+ break;
+ }
+ xmlXPathFreeNodeSetList(list);
+ } else {
+ fprintf(stderr, "%s: no such node\n", arg);
+ }
+ if (ctxt->pctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->pctxt->nodelist);
+ ctxt->pctxt->nodelist = NULL;
+ }
+ } else if (!strcmp(command, "cat")) {
+ if (arg[0] == 0) {
+ xmlShellCat(ctxt, NULL, ctxt->node, NULL);
+ } else {
+ ctxt->pctxt->node = ctxt->node;
+ if (ctxt->pctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->pctxt->nodelist);
+ ctxt->pctxt->nodelist = xmlXPathNodeSetCreate(ctxt->node);
+ list = xmlXPathEval((xmlChar *) arg, ctxt->pctxt);
+ if (list != NULL) {
+ switch (list->type) {
+ case XPATH_UNDEFINED:
+ fprintf(stderr, "%s: no such node\n", arg);
+ break;
+ case XPATH_NODESET: {
+ int i;
+
+ for (i = 0;i < list->nodesetval->nodeNr;i++) {
+ if (i > 0) printf(" -------\n");
+ xmlShellCat(ctxt, NULL,
+ list->nodesetval->nodeTab[i], NULL);
+ }
+ break;
+ }
+ case XPATH_BOOLEAN:
+ fprintf(stderr, "%s is a Boolean\n", arg);
+ break;
+ case XPATH_NUMBER:
+ fprintf(stderr, "%s is a number\n", arg);
+ break;
+ case XPATH_STRING:
+ fprintf(stderr, "%s is a string\n", arg);
+ break;
+ }
+ xmlXPathFreeNodeSetList(list);
+ } else {
+ fprintf(stderr, "%s: no such node\n", arg);
+ }
+ if (ctxt->pctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->pctxt->nodelist);
+ ctxt->pctxt->nodelist = NULL;
+ }
+ } else {
+ fprintf(stderr, "Unknown command %s\n", command);
+ }
+ free(cmdline); /* not xmlFree here ! */
+ }
+ xmlXPathFreeContext(ctxt->pctxt);
+ if (ctxt->loaded) {
+ xmlFreeDoc(ctxt->doc);
+ }
+ xmlFree(ctxt);
+ if (cmdline != NULL)
+ free(cmdline); /* not xmlFree here ! */
+}
+
diff --git a/debugXML.h b/debugXML.h
index 9c77496..8774f0b 100644
--- a/debugXML.h
+++ b/debugXML.h
@@ -7,19 +7,97 @@
#ifndef __DEBUG_XML__
#define __DEBUG_XML__
+#include <stdio.h>
#include "tree.h"
+#include "xpath.h"
#ifdef __cplusplus
extern "C" {
#endif
-extern void xmlDebugDumpString(FILE *output, const xmlChar *str);
-extern void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth);
-extern void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth);
-extern void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth);
-extern void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth);
-extern void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth);
-extern void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc);
-extern void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc);
+
+/*
+ * The standard Dump routines
+ */
+void xmlDebugDumpString (FILE *output,
+ const xmlChar *str);
+void xmlDebugDumpAttr (FILE *output,
+ xmlAttrPtr attr,
+ int depth);
+void xmlDebugDumpAttrList (FILE *output,
+ xmlAttrPtr attr,
+ int depth);
+void xmlDebugDumpOneNode (FILE *output,
+ xmlNodePtr node,
+ int depth);
+void xmlDebugDumpNode (FILE *output,
+ xmlNodePtr node,
+ int depth);
+void xmlDebugDumpNodeList (FILE *output,
+ xmlNodePtr node,
+ int depth);
+void xmlDebugDumpDocumentHead(FILE *output,
+ xmlDocPtr doc);
+void xmlDebugDumpDocument (FILE *output,
+ xmlDocPtr doc);
+void xmlDebugDumpEntities (FILE *output,
+ xmlDocPtr doc);
+void xmlLsOneNode (FILE *output,
+ xmlNodePtr node);
+
+/****************************************************************
+ * *
+ * The XML shell related structures and functions *
+ * *
+ ****************************************************************/
+
+/**
+ * xmlShellReadlineFunc:
+ * @prompt: a string prompt
+ *
+ * This is a generic signature for the XML shell input function
+ *
+ * Returns a string which will be freed by the Shell
+ */
+typedef char * (* xmlShellReadlineFunc)(char *prompt);
+
+/*
+ * The shell context itself
+ * TODO: add the defined function tables.
+ */
+typedef struct xmlShellCtxt {
+ char *filename;
+ xmlDocPtr doc;
+ xmlNodePtr node;
+ xmlXPathContextPtr pctxt;
+ int loaded;
+ FILE *output;
+ xmlShellReadlineFunc input;
+} xmlShellCtxt, *xmlShellCtxtPtr;
+
+/**
+ * xmlShellCmd:
+ * @ctxt: a shell context
+ * @arg: a string argument
+ * @node: a first node
+ * @node2: a second node
+ *
+ * This is a generic signature for the XML shell functions
+ *
+ * Returns an int, negative returns indicating errors
+ */
+typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt,
+ char *arg,
+ xmlNodePtr node,
+ xmlNodePtr node2);
+
+/*
+ * The Shell interface.
+ */
+void xmlShell (xmlDocPtr doc,
+ char *filename,
+ xmlShellReadlineFunc input,
+ FILE *output);
+
#ifdef __cplusplus
}
#endif
diff --git a/doc/xml.html b/doc/xml.html
index 40d6e0c..8bd7639 100644
--- a/doc/xml.html
+++ b/doc/xml.html
@@ -128,6 +128,9 @@
<ul>
<li>working on HTML and XML links recognition layers, get in touch with me
if you want to test those.</li>
+ <li>a Push interface for the XML parser</li>
+ <li>an shell like interface to the document tree (try tester --shell :-)</li>
+ <li>lots of bug fixes and improvement added over XMas hollidays</li>
</ul>
<h3>1.8.2: Dec 21 1999</h3>
@@ -901,6 +904,6 @@
<p><a href="mailto:Daniel.Veillard@w3.org">Daniel Veillard</a></p>
-<p>$Id: xml.html,v 1.16 1997/01/04 02:49:42 veillard Exp $</p>
+<p>$Id: xml.html,v 1.17 1999/12/21 15:35:27 veillard Exp $</p>
</body>
</html>
diff --git a/entities.c b/entities.c
index 2e73684..027acd0 100644
--- a/entities.c
+++ b/entities.c
@@ -114,10 +114,13 @@
cur->SystemID = xmlStrdup(SystemID);
else
cur->SystemID = NULL;
- if (content != NULL)
- cur->content = xmlStrdup(content);
- else
+ if (content != NULL) {
+ cur->length = xmlStrlen(content);
+ cur->content = xmlStrndup(content, cur->length);
+ } else {
+ cur->length = 0;
cur->content = NULL;
+ }
cur->orig = NULL;
table->nb_entities++;
}
diff --git a/entities.h b/entities.h
index 3af38e3..84ad7c1 100644
--- a/entities.h
+++ b/entities.h
@@ -34,6 +34,7 @@
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
xmlChar *content; /* The entity content or ndata if unparsed */
+ int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */
} xmlEntity;
typedef xmlEntity *xmlEntityPtr;
diff --git a/include/libxml/debugXML.h b/include/libxml/debugXML.h
index 9c77496..8774f0b 100644
--- a/include/libxml/debugXML.h
+++ b/include/libxml/debugXML.h
@@ -7,19 +7,97 @@
#ifndef __DEBUG_XML__
#define __DEBUG_XML__
+#include <stdio.h>
#include "tree.h"
+#include "xpath.h"
#ifdef __cplusplus
extern "C" {
#endif
-extern void xmlDebugDumpString(FILE *output, const xmlChar *str);
-extern void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth);
-extern void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth);
-extern void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth);
-extern void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth);
-extern void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth);
-extern void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc);
-extern void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc);
+
+/*
+ * The standard Dump routines
+ */
+void xmlDebugDumpString (FILE *output,
+ const xmlChar *str);
+void xmlDebugDumpAttr (FILE *output,
+ xmlAttrPtr attr,
+ int depth);
+void xmlDebugDumpAttrList (FILE *output,
+ xmlAttrPtr attr,
+ int depth);
+void xmlDebugDumpOneNode (FILE *output,
+ xmlNodePtr node,
+ int depth);
+void xmlDebugDumpNode (FILE *output,
+ xmlNodePtr node,
+ int depth);
+void xmlDebugDumpNodeList (FILE *output,
+ xmlNodePtr node,
+ int depth);
+void xmlDebugDumpDocumentHead(FILE *output,
+ xmlDocPtr doc);
+void xmlDebugDumpDocument (FILE *output,
+ xmlDocPtr doc);
+void xmlDebugDumpEntities (FILE *output,
+ xmlDocPtr doc);
+void xmlLsOneNode (FILE *output,
+ xmlNodePtr node);
+
+/****************************************************************
+ * *
+ * The XML shell related structures and functions *
+ * *
+ ****************************************************************/
+
+/**
+ * xmlShellReadlineFunc:
+ * @prompt: a string prompt
+ *
+ * This is a generic signature for the XML shell input function
+ *
+ * Returns a string which will be freed by the Shell
+ */
+typedef char * (* xmlShellReadlineFunc)(char *prompt);
+
+/*
+ * The shell context itself
+ * TODO: add the defined function tables.
+ */
+typedef struct xmlShellCtxt {
+ char *filename;
+ xmlDocPtr doc;
+ xmlNodePtr node;
+ xmlXPathContextPtr pctxt;
+ int loaded;
+ FILE *output;
+ xmlShellReadlineFunc input;
+} xmlShellCtxt, *xmlShellCtxtPtr;
+
+/**
+ * xmlShellCmd:
+ * @ctxt: a shell context
+ * @arg: a string argument
+ * @node: a first node
+ * @node2: a second node
+ *
+ * This is a generic signature for the XML shell functions
+ *
+ * Returns an int, negative returns indicating errors
+ */
+typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt,
+ char *arg,
+ xmlNodePtr node,
+ xmlNodePtr node2);
+
+/*
+ * The Shell interface.
+ */
+void xmlShell (xmlDocPtr doc,
+ char *filename,
+ xmlShellReadlineFunc input,
+ FILE *output);
+
#ifdef __cplusplus
}
#endif
diff --git a/include/libxml/entities.h b/include/libxml/entities.h
index 3af38e3..84ad7c1 100644
--- a/include/libxml/entities.h
+++ b/include/libxml/entities.h
@@ -34,6 +34,7 @@
const xmlChar *ExternalID; /* External identifier for PUBLIC Entity */
const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */
xmlChar *content; /* The entity content or ndata if unparsed */
+ int length; /* the content length */
xmlChar *orig; /* The entity cont without ref substitution */
} xmlEntity;
typedef xmlEntity *xmlEntityPtr;
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 8a6443c..196aeb3 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -40,11 +40,12 @@
const char *filename; /* The file analyzed, if any */
const char *directory; /* the directory/base of teh file */
- const xmlChar *base; /* Base of the array to parse */
- const xmlChar *cur; /* Current char being parsed */
+ const xmlChar *base; /* Base of the array to parse */
+ const xmlChar *cur; /* Current char being parsed */
+ int length; /* length if known */
int line; /* Current line */
int col; /* Current column */
- int consumed; /* How many xmlChars were already consumed */
+ int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
} xmlParserInput;
typedef xmlParserInput *xmlParserInputPtr;
@@ -77,20 +78,25 @@
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
/**
- * The parser is not a state based parser, but we need to maintain
+ * The parser is not (yet) a state based parser, but we need to maintain
* minimum state informations, especially for entities processing.
*/
typedef enum {
- XML_PARSER_EOF = 0,
- XML_PARSER_PROLOG,
- XML_PARSER_CONTENT,
- XML_PARSER_ENTITY_DECL,
- XML_PARSER_ENTITY_VALUE,
- XML_PARSER_ATTRIBUTE_VALUE,
- XML_PARSER_DTD,
- XML_PARSER_EPILOG,
- XML_PARSER_COMMENT,
- XML_PARSER_CDATA_SECTION
+ XML_PARSER_EOF = -1, /* nothing is to be parsed */
+ XML_PARSER_START = 0, /* nothing has been parsed */
+ XML_PARSER_MISC, /* Misc* before int subset */
+ XML_PARSER_PI, /* Whithin a processing instruction */
+ XML_PARSER_DTD, /* within some DTD content */
+ XML_PARSER_PROLOG, /* Misc* after internal subset */
+ XML_PARSER_COMMENT, /* within a comment */
+ XML_PARSER_START_TAG, /* within a start tag */
+ XML_PARSER_CONTENT, /* within the content */
+ XML_PARSER_CDATA_SECTION, /* within a CDATA section */
+ XML_PARSER_END_TAG, /* within a closing tag */
+ XML_PARSER_ENTITY_DECL, /* within an entity declaration */
+ XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
+ XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
+ XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState;
/**
@@ -151,6 +157,7 @@
xmlChar * *nameTab; /* array of nodes */
long nbChars; /* number of xmlChar processed */
+ long checkIndex; /* used by progressive parsing lookup */
} _xmlParserCtxt;
typedef _xmlParserCtxt xmlParserCtxt;
typedef xmlParserCtxt *xmlParserCtxtPtr;
@@ -347,13 +354,35 @@
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID,
const xmlChar *SystemID);
+/**
+ * SAX initialization routines
+ */
+void xmlDefaultSAXHandlerInit(void);
+void htmlDefaultSAXHandlerInit(void);
+
+/**
+ * Parser contexts handling.
+ */
void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
+void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
const xmlChar* buffer,
const char* filename);
-void xmlDefaultSAXHandlerInit(void);
-void htmlDefaultSAXHandlerInit(void);
+xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
+
+/**
+ * Interfaces for the Push mode
+ */
+xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
+ void *user_data,
+ const char *chunk,
+ int size,
+ const char *filename);
+int xmlParseChunk (xmlParserCtxtPtr ctxt,
+ const char *chunk,
+ int size,
+ int terminate);
/**
* Node infos
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index 9da4846..5a7b7ff 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -435,9 +435,10 @@
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
- ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
- (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
- (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
+ ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
+ ((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
+ (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
+ (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
@@ -595,8 +596,7 @@
xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
xmlChar **value);
xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
-void xmlParseEndTag (xmlParserCtxtPtr ctxt,
- xmlChar *tagname);
+void xmlParseEndTag (xmlParserCtxtPtr ctxt);
void xmlParseCDSect (xmlParserCtxtPtr ctxt);
void xmlParseContent (xmlParserCtxtPtr ctxt);
void xmlParseElement (xmlParserCtxtPtr ctxt);
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index 3a0285b..cce6168 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -526,6 +526,9 @@
int *size);
void xmlDocDump (FILE *f,
xmlDocPtr cur);
+void xmlElemDump (FILE *f,
+ xmlDocPtr cur,
+ xmlNodePtr elem);
int xmlSaveFile (const char *filename,
xmlDocPtr cur);
diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h
index 2bdba5d..bf43de2 100644
--- a/include/libxml/xmlIO.h
+++ b/include/libxml/xmlIO.h
@@ -38,6 +38,9 @@
*/
xmlParserInputBufferPtr
+ xmlAllocParserInputBuffer (xmlCharEncoding enc);
+
+xmlParserInputBufferPtr
xmlParserInputBufferCreateFilename (const char *filename,
xmlCharEncoding enc);
xmlParserInputBufferPtr
diff --git a/include/libxml/xmlmemory.h b/include/libxml/xmlmemory.h
index 5c1b477..64477a1 100644
--- a/include/libxml/xmlmemory.h
+++ b/include/libxml/xmlmemory.h
@@ -1,5 +1,5 @@
/*
- * memory.h: interface for the memory allocation debug.
+ * xmlmemory.h: interface for the memory allocation debug.
*
* Daniel.Veillard@w3.org
*/
@@ -24,6 +24,7 @@
#define xmlInitMemory()
#define xmlMemoryDump()
#define xmlMemDisplay(x)
+#define xmlMemShow(x, d)
#else /* ! NO_DEBUG_MEMORY */
#include <stdio.h>
@@ -51,6 +52,7 @@
char * xmlMemStrdup (const char *str);
int xmlMemUsed (void);
void xmlMemDisplay (FILE *fp);
+void xmlMemShow (FILE *fp, int nr);
void xmlMemoryDump (void);
int xmlInitMemory (void);
diff --git a/include/libxml/xpath.h b/include/libxml/xpath.h
index 149b0be..84c8305 100644
--- a/include/libxml/xpath.h
+++ b/include/libxml/xpath.h
@@ -205,6 +205,9 @@
void xmlXPathFreeObject (xmlXPathObjectPtr obj);
xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str,
xmlXPathContextPtr ctxt);
+xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val);
+void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj);
+void xmlXPathFreeNodeSet (xmlNodeSetPtr obj);
#ifdef __cplusplus
}
diff --git a/parser.c b/parser.c
index 98656d4..2323e8a 100644
--- a/parser.c
+++ b/parser.c
@@ -43,6 +43,9 @@
#include "xmlIO.h"
#include "xml-error.h"
+#define XML_PARSER_BIG_BUFFER_SIZE 1000
+#define XML_PARSER_BUFFER_SIZE 100
+
const char *xmlParserVersion = LIBXML_VERSION;
/*
@@ -61,6 +64,9 @@
************************************************************************/
/* #define DEBUG_INPUT */
+/* #define DEBUG_STACK */
+/* #define DEBUG_PUSH */
+
#define INPUT_CHUNK 250
/* we need to keep enough input to show errors in context */
@@ -170,7 +176,14 @@
return(0);
}
- ret = xmlParserInputBufferGrow(in->buf, len);
+ if ((in->buf->netIO != NULL) || (in->buf->file != NULL) ||
+#ifdef HAVE_ZLIB_H
+ (in->buf->gzfile != NULL) ||
+#endif
+ (in->buf->fd >= 0))
+ ret = xmlParserInputBufferGrow(in->buf, len);
+ else
+ return(0);
/*
* NOTE : in->base may be a "dandling" i.e. freed pointer in this
@@ -256,8 +269,8 @@
* Generic function for accessing stacks in the Parser Context
*/
-#define PUSH_AND_POP(type, name) \
-extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
+#define PUSH_AND_POP(scope, type, name) \
+scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
if (ctxt->name##Nr >= ctxt->name##Max) { \
ctxt->name##Max *= 2; \
ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab, \
@@ -271,7 +284,7 @@
ctxt->name = value; \
return(ctxt->name##Nr++); \
} \
-extern type name##Pop(xmlParserCtxtPtr ctxt) { \
+scope type name##Pop(xmlParserCtxtPtr ctxt) { \
type ret; \
if (ctxt->name##Nr <= 0) return(0); \
ctxt->name##Nr--; \
@@ -284,8 +297,9 @@
return(ret); \
} \
-PUSH_AND_POP(xmlParserInputPtr, input)
-PUSH_AND_POP(xmlNodePtr, node)
+PUSH_AND_POP(extern, xmlParserInputPtr, input)
+PUSH_AND_POP(extern, xmlNodePtr, node)
+PUSH_AND_POP(extern, xmlChar*, name)
/*
* Macros for accessing the content. Those should be used only by the parser,
@@ -444,6 +458,7 @@
input->buf = NULL;
input->free = NULL;
input->consumed = 0;
+ input->length = 0;
return(input);
}
@@ -507,6 +522,7 @@
input->filename = (char *) entity->SystemID; /* TODO !!! char <- xmlChar */
input->base = entity->content;
input->cur = entity->content;
+ input->length = entity->length;
return(input);
}
@@ -535,6 +551,7 @@
}
input->base = buffer;
input->cur = buffer;
+ input->length = xmlStrlen(buffer);
return(input);
}
@@ -556,7 +573,7 @@
if (ctxt == NULL) return(NULL);
buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
if (buf == NULL) {
- char name[1024];
+ char name[XML_PARSER_BIG_BUFFER_SIZE];
if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
#ifdef WIN32
@@ -638,7 +655,7 @@
ctxt->hasPErefs = 0;
ctxt->html = 0;
ctxt->external = 0;
- ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->instate = XML_PARSER_START;
ctxt->token = 0;
ctxt->directory = NULL;
@@ -648,6 +665,12 @@
ctxt->nodeMax = 10;
ctxt->node = NULL;
+ /* Allocate the Name stack */
+ ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
+ ctxt->nameNr = 0;
+ ctxt->nameMax = 10;
+ ctxt->name = NULL;
+
if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
else {
ctxt->sax = sax;
@@ -664,6 +687,8 @@
ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
ctxt->record_info = 0;
ctxt->nbChars = 0;
+ ctxt->checkIndex = 0;
+ ctxt->errNo = XML_ERR_OK;
xmlInitNodeInfoSeq(&ctxt->node_seq);
}
@@ -679,13 +704,17 @@
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
{
xmlParserInputPtr input;
+ xmlChar *oldname;
if (ctxt == NULL) return;
while ((input = inputPop(ctxt)) != NULL) {
xmlFreeInputStream(input);
}
-
+ while ((oldname = namePop(ctxt)) != NULL) {
+ xmlFree(oldname);
+ }
+ if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
@@ -970,9 +999,15 @@
GROW;
if ((CUR == '&') && (NXT(1) == '#')) {
switch(ctxt->instate) {
+ case XML_PARSER_ENTITY_DECL:
+ case XML_PARSER_PI:
case XML_PARSER_CDATA_SECTION:
- return;
case XML_PARSER_COMMENT:
+ /* we just ignore it there */
+ return;
+ case XML_PARSER_START_TAG:
+ return;
+ case XML_PARSER_END_TAG:
return;
case XML_PARSER_EOF:
ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
@@ -981,6 +1016,8 @@
ctxt->wellFormed = 0;
return;
case XML_PARSER_PROLOG:
+ case XML_PARSER_START:
+ case XML_PARSER_MISC:
ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
@@ -999,9 +1036,6 @@
"CharRef are forbiden in DTDs!\n");
ctxt->wellFormed = 0;
return;
- case XML_PARSER_ENTITY_DECL:
- /* we just ignore it there */
- return;
case XML_PARSER_ENTITY_VALUE:
/*
* NOTE: in the case of entity values, we don't do the
@@ -1023,8 +1057,13 @@
switch(ctxt->instate) {
case XML_PARSER_CDATA_SECTION:
return;
+ case XML_PARSER_PI:
case XML_PARSER_COMMENT:
return;
+ case XML_PARSER_START_TAG:
+ return;
+ case XML_PARSER_END_TAG:
+ return;
case XML_PARSER_EOF:
ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@@ -1032,6 +1071,8 @@
ctxt->wellFormed = 0;
return;
case XML_PARSER_PROLOG:
+ case XML_PARSER_START:
+ case XML_PARSER_MISC:
ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
@@ -1195,6 +1236,10 @@
return;
case XML_PARSER_COMMENT:
return;
+ case XML_PARSER_START_TAG:
+ return;
+ case XML_PARSER_END_TAG:
+ return;
case XML_PARSER_EOF:
ctxt->errNo = XML_ERR_PEREF_AT_EOF;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@@ -1202,6 +1247,8 @@
ctxt->wellFormed = 0;
return;
case XML_PARSER_PROLOG:
+ case XML_PARSER_START:
+ case XML_PARSER_MISC:
ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
@@ -1210,6 +1257,7 @@
case XML_PARSER_ENTITY_DECL:
case XML_PARSER_CONTENT:
case XML_PARSER_ATTRIBUTE_VALUE:
+ case XML_PARSER_PI:
/* we just ignore it there */
return;
case XML_PARSER_EPILOG:
@@ -1355,7 +1403,7 @@
/*
* allocate a translation buffer.
*/
- buffer_size = 1000;
+ buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) {
perror("xmlDecodeEntities: malloc failed");
@@ -1382,7 +1430,7 @@
current = ent->content;
while (*current != 0) {
*out++ = *current++;
- if (out - buffer > buffer_size - 100) {
+ if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -1396,7 +1444,7 @@
nbchars += i + 2;
*out++ = '&';
- if (out - buffer > buffer_size - i - 100) {
+ if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -1427,7 +1475,7 @@
/* invalid for UTF-8 , use COPY(out); !!!!!! */
*out++ = cur;
nbchars++;
- if (out - buffer > buffer_size - 100) {
+ if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -1471,7 +1519,7 @@
/*
* allocate a translation buffer.
*/
- buffer_size = 500;
+ buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) {
perror("xmlDecodeEntities: malloc failed");
@@ -1498,7 +1546,7 @@
current = ent->content;
while (*current != 0) {
*out++ = *current++;
- if (out - buffer > buffer_size - 100) {
+ if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -1510,7 +1558,7 @@
const xmlChar *cur = ent->name;
*out++ = '&';
- if (out - buffer > buffer_size - i - 100) {
+ if (out - buffer > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -1526,7 +1574,7 @@
current = ent->content;
while (*current != 0) {
*out++ = *current++;
- if (out - buffer > buffer_size - 100) {
+ if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -1537,7 +1585,7 @@
} else {
/* invalid for UTF-8 , use COPY(out); !!!!!! */
*out++ = cur;
- if (out - buffer > buffer_size - 100) {
+ if (out - buffer > buffer_size - XML_PARSER_BUFFER_SIZE) {
int index = out - buffer;
growBuffer(buffer);
@@ -2016,9 +2064,6 @@
*
* Is this a sequence of blank chars that one can ignore ?
*
- * TODO: Whether white space are significant has to be checked accordingly
- * to DTD informations if available
- *
* Returns 1 if ignorable 0 otherwise.
*/
@@ -2284,7 +2329,7 @@
xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar c;
buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
@@ -2668,7 +2713,7 @@
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar c;
xmlChar stop;
xmlChar *ret = NULL;
@@ -2823,7 +2868,7 @@
/*
* allocate a translation buffer.
*/
- buffer_size = 100;
+ buffer_size = XML_PARSER_BUFFER_SIZE;
buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
if (buffer == NULL) {
perror("xmlParseAttValue: malloc failed");
@@ -2836,7 +2881,6 @@
*/
cur = CUR;
while ((cur != limit) && (cur != '<')) {
-
if (cur == 0) break;
if ((cur == '&') && (NXT(1) == '#')) {
int val = xmlParseCharRef(ctxt);
@@ -2925,7 +2969,7 @@
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar cur;
xmlChar stop;
@@ -2996,7 +3040,7 @@
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar cur;
xmlChar stop;
@@ -3064,16 +3108,11 @@
void
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
- xmlChar buf[1000];
+ xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE];
int nbchar = 0;
xmlChar cur;
SHRINK;
- /*
- * !!!!!!!!!!!!
- * NOTE: NXT(0) is used here to avoid breaking on < or &
- * entities substitutions.
- */
cur = CUR;
while ((IS_CHAR(cur)) && (cur != '<') &&
(cur != '&')) {
@@ -3082,14 +3121,13 @@
if (cdata) break;
else {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
+ ctxt->sax->warning(ctxt->userData,
"Sequence ']]>' not allowed in content\n");
ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
- ctxt->wellFormed = 0;
}
}
buf[nbchar++] = CUR;
- if (nbchar == 1000) {
+ if (nbchar == XML_PARSER_BIG_BUFFER_SIZE) {
/*
* Ok the segment is to be consumed as chars.
*/
@@ -3244,10 +3282,11 @@
xmlParseComment(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar q;
xmlChar r;
xmlChar cur;
+ xmlParserInputState state;
/*
* Check that there is a comment right here.
@@ -3255,12 +3294,14 @@
if ((CUR != '<') || (NXT(1) != '!') ||
(NXT(2) != '-') || (NXT(3) != '-')) return;
+ state = ctxt->instate;
ctxt->instate = XML_PARSER_COMMENT;
SHRINK;
SKIP(4);
buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
if (buf == NULL) {
fprintf(stderr, "malloc of %d byte failed\n", size);
+ ctxt->instate = state;
return;
}
q = CUR;
@@ -3283,6 +3324,7 @@
buf = xmlRealloc(buf, size * sizeof(xmlChar));
if (buf == NULL) {
fprintf(stderr, "realloc of %d byte failed\n", size);
+ ctxt->instate = state;
return;
}
}
@@ -3310,6 +3352,7 @@
ctxt->sax->comment(ctxt->userData, buf);
xmlFree(buf);
}
+ ctxt->instate = state;
}
/**
@@ -3362,11 +3405,14 @@
xmlParsePI(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar cur;
xmlChar *target;
+ xmlParserInputState state;
if ((CUR == '<') && (NXT(1) == '?')) {
+ state = ctxt->instate;
+ ctxt->instate = XML_PARSER_PI;
/*
* this is a Processing Instruction.
*/
@@ -3382,6 +3428,7 @@
buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
if (buf == NULL) {
fprintf(stderr, "malloc of %d byte failed\n", size);
+ ctxt->instate = state;
return;
}
cur = CUR;
@@ -3401,6 +3448,7 @@
buf = xmlRealloc(buf, size * sizeof(xmlChar));
if (buf == NULL) {
fprintf(stderr, "realloc of %d byte failed\n", size);
+ ctxt->instate = state;
return;
}
}
@@ -3440,6 +3488,7 @@
ctxt->errNo = XML_ERR_PI_NOT_STARTED;
ctxt->wellFormed = 0;
}
+ ctxt->instate = state;
}
}
@@ -3980,13 +4029,13 @@
*
* [ VC: IDREF ]
* Values of type IDREF must match the Name production, and values
- * of type IDREFS must match Names; TODO each IDREF Name must match the value
+ * of type IDREFS must match Names; each IDREF Name must match the value
* of an ID attribute on some element in the XML document; i.e. IDREF
* values must match the value of some ID attribute.
*
* [ VC: Entity Name ]
* Values of type ENTITY must match the Name production, values
- * of type ENTITIES must match Names; TODO each Entity Name must match the
+ * of type ENTITIES must match Names; each Entity Name must match the
* name of an unparsed entity declared in the DTD.
*
* [ VC: Name Token ]
@@ -5565,6 +5614,43 @@
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
/*
+ * Cleanup
+ */
+ if (URI != NULL) xmlFree(URI);
+ if (ExternalID != NULL) xmlFree(ExternalID);
+ if (name != NULL) xmlFree(name);
+
+ /*
+ * Is there any internal subset declarations ?
+ * they are handled separately in xmlParseInternalSubset()
+ */
+ if (CUR == '[')
+ return;
+
+ /*
+ * We should be at the end of the DOCTYPE declaration.
+ */
+ if (CUR != '>') {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
+ ctxt->wellFormed = 0;
+ ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
+ }
+ NEXT;
+}
+
+/**
+ * xmlParseInternalsubset :
+ * @ctxt: an XML parser context
+ *
+ * parse the internal subset declaration
+ *
+ * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
+ */
+
+void
+xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
+ /*
* Is there any DTD definition ?
*/
if (CUR == '[') {
@@ -5592,7 +5678,7 @@
if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
- "xmlParseDocTypeDecl: error detected in Markup declaration\n");
+ "xmlParseInternalSubset: error detected in Markup declaration\n");
ctxt->wellFormed = 0;
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
break;
@@ -5611,13 +5697,6 @@
ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
}
NEXT;
-
- /*
- * Cleanup
- */
- if (URI != NULL) xmlFree(URI);
- if (ExternalID != NULL) xmlFree(ExternalID);
- if (name != NULL) xmlFree(name);
}
/**
@@ -5830,7 +5909,6 @@
/**
* xmlParseEndTag:
* @ctxt: an XML parser context
- * @tagname: the tag name as parsed in the opening tag.
*
* parse an end of tag
*
@@ -5842,8 +5920,9 @@
*/
void
-xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlChar *tagname) {
+xmlParseEndTag(xmlParserCtxtPtr ctxt) {
xmlChar *name;
+ xmlChar *oldname;
GROW;
if ((CUR != '<') || (NXT(1) != '/')) {
@@ -5876,10 +5955,10 @@
* start-tag.
*
*/
- if (xmlStrcmp(name, tagname)) {
+ if (xmlStrcmp(name, ctxt->name)) {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
- "Opening and ending tag mismatch: %s and %s\n", tagname, name);
+ "Opening and ending tag mismatch: %s and %s\n", ctxt->name, name);
ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
ctxt->wellFormed = 0;
@@ -5893,7 +5972,13 @@
if (name != NULL)
xmlFree(name);
-
+ oldname = namePop(ctxt);
+ if (oldname != NULL) {
+#ifdef DEBUG_STACK
+ fprintf(stderr,"Close: popped %s\n", oldname);
+#endif
+ xmlFree(oldname);
+ }
return;
}
@@ -5915,7 +6000,7 @@
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
xmlChar *buf = NULL;
int len = 0;
- int size = 100;
+ int size = XML_PARSER_BUFFER_SIZE;
xmlChar r, s;
xmlChar cur;
@@ -6113,6 +6198,7 @@
xmlParseElement(xmlParserCtxtPtr ctxt) {
const xmlChar *openTag = CUR_PTR;
xmlChar *name;
+ xmlChar *oldname;
xmlParserNodeInfo node_info;
xmlNodePtr ret;
@@ -6127,6 +6213,7 @@
if (name == NULL) {
return;
}
+ namePush(ctxt, name);
ret = ctxt->node;
/*
@@ -6145,7 +6232,13 @@
SKIP(2);
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
ctxt->sax->endElement(ctxt->userData, name);
- xmlFree(name);
+ oldname = namePop(ctxt);
+ if (oldname != NULL) {
+#ifdef DEBUG_STACK
+ fprintf(stderr,"Close: popped %s\n", oldname);
+#endif
+ xmlFree(oldname);
+ }
return;
}
if (CUR == '>') {
@@ -6162,7 +6255,13 @@
* end of parsing of this node.
*/
nodePop(ctxt);
- xmlFree(name);
+ oldname = namePop(ctxt);
+ if (oldname != NULL) {
+#ifdef DEBUG_STACK
+ fprintf(stderr,"Close: popped %s\n", oldname);
+#endif
+ xmlFree(oldname);
+ }
/*
* Capture end position and add node
@@ -6192,15 +6291,20 @@
* end of parsing of this node.
*/
nodePop(ctxt);
- xmlFree(name);
+ oldname = namePop(ctxt);
+ if (oldname != NULL) {
+#ifdef DEBUG_STACK
+ fprintf(stderr,"Close: popped %s\n", oldname);
+#endif
+ xmlFree(oldname);
+ }
return;
}
/*
* parse the end of tag: '</' should be here.
*/
- xmlParseEndTag(ctxt, name);
- xmlFree(name);
+ xmlParseEndTag(ctxt);
/*
* Capture end position and add node
@@ -6771,6 +6875,10 @@
(NXT(6) == 'Y') && (NXT(7) == 'P') &&
(NXT(8) == 'E')) {
xmlParseDocTypeDecl(ctxt);
+ if (CUR == '[') {
+ ctxt->instate = XML_PARSER_DTD;
+ xmlParseInternalSubset(ctxt);
+ }
ctxt->instate = XML_PARSER_PROLOG;
xmlParseMisc(ctxt);
}
@@ -6779,23 +6887,33 @@
* Time to start parsing the tree itself
*/
GROW;
- ctxt->instate = XML_PARSER_CONTENT;
- xmlParseElement(ctxt);
- ctxt->instate = XML_PARSER_EPILOG;
-
- /*
- * The Misc part at the end
- */
- xmlParseMisc(ctxt);
-
- if (CUR != 0) {
+ if (CUR != '<') {
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
ctxt->sax->error(ctxt->userData,
- "Extra content at the end of the document\n");
+ "Start tag expect, '<' not found\n");
+ ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
ctxt->wellFormed = 0;
- ctxt->errNo = XML_ERR_DOCUMENT_END;
+ ctxt->instate = XML_PARSER_EOF;
+ } else {
+ ctxt->instate = XML_PARSER_CONTENT;
+ xmlParseElement(ctxt);
+ ctxt->instate = XML_PARSER_EPILOG;
+
+
+ /*
+ * The Misc part at the end
+ */
+ xmlParseMisc(ctxt);
+
+ if (CUR != 0) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Extra content at the end of the document\n");
+ ctxt->wellFormed = 0;
+ ctxt->errNo = XML_ERR_DOCUMENT_END;
+ }
+ ctxt->instate = XML_PARSER_EOF;
}
- ctxt->instate = XML_PARSER_EOF;
/*
* SAX: end of the document processing.
@@ -6816,18 +6934,74 @@
* xmlParseLookupSequence:
* @ctxt: an XML parser context
* @first: the first char to lookup
- * @next: the next char to lookup
+ * @next: the next char to lookup or zero
+ * @third: the next char to lookup or zero
*
- * Try to find if a sequence (first, next) or just (first) if next
- * is zero is available in the input stream.
- * Since XML-1.0 is an LALR(2) grammar a sequence of 2 char should be
- * enought. If this doesn't prove true this function call may change.
+ * Try to find if a sequence (first, next, third) or just (first next) or
+ * (first) is available in the input stream.
+ * This function has a side effect of (possibly) incrementing ctxt->checkIndex
+ * to avoid rescanning sequences of bytes, it DOES change the state of the
+ * parser, do not use liberally.
*
- * Returns 1 if the full sequence is available, 0 otherwise.
+ * Returns the index to the current parsing point if the full sequence
+ * is available, -1 otherwise.
*/
int
-xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first, xmlChar next) {
- return(0);
+xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
+ xmlChar next, xmlChar third) {
+ int base, len;
+ xmlParserInputPtr in;
+ const xmlChar *buf;
+
+ in = ctxt->input;
+ if (in == NULL) return(-1);
+ base = in->cur - in->base;
+ if (base < 0) return(-1);
+ if (ctxt->checkIndex > base)
+ base = ctxt->checkIndex;
+ if (in->buf == NULL) {
+ buf = in->base;
+ len = in->length;
+ } else {
+ buf = in->buf->buffer->content;
+ len = in->buf->buffer->use;
+ }
+ /* take into account the sequence length */
+ if (third) len -= 2;
+ else if (next) len --;
+ for (;base < len;base++) {
+ if (buf[base] == first) {
+ if (third != 0) {
+ if ((buf[base + 1] != next) ||
+ (buf[base + 2] != third)) continue;
+ } else if (next != 0) {
+ if (buf[base + 1] != next) continue;
+ }
+ ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+ if (next == 0)
+ fprintf(stderr, "PP: lookup '%c' found at %d\n",
+ first, base);
+ else if (third == 0)
+ fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
+ first, next, base);
+ else
+ fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
+ first, next, third, base);
+#endif
+ return(base - (in->cur - in->base));
+ }
+ }
+ ctxt->checkIndex = base;
+#ifdef DEBUG_PUSH
+ if (next == 0)
+ fprintf(stderr, "PP: lookup '%c' failed\n", first);
+ else if (third == 0)
+ fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
+ else
+ fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
+#endif
+ return(-1);
}
/**
@@ -6841,23 +7015,651 @@
int
xmlParseTry(xmlParserCtxtPtr ctxt) {
int ret = 0;
+ xmlParserInputPtr in;
+ int avail;
+ xmlChar cur, next;
+
+#ifdef DEBUG_PUSH
+ switch (ctxt->instate) {
+ case XML_PARSER_EOF:
+ fprintf(stderr, "PP: try EOF\n"); break;
+ case XML_PARSER_START:
+ fprintf(stderr, "PP: try START\n"); break;
+ case XML_PARSER_MISC:
+ fprintf(stderr, "PP: try MISC\n");break;
+ case XML_PARSER_COMMENT:
+ fprintf(stderr, "PP: try COMMENT\n");break;
+ case XML_PARSER_PROLOG:
+ fprintf(stderr, "PP: try PROLOG\n");break;
+ case XML_PARSER_START_TAG:
+ fprintf(stderr, "PP: try START_TAG\n");break;
+ case XML_PARSER_CONTENT:
+ fprintf(stderr, "PP: try CONTENT\n");break;
+ case XML_PARSER_CDATA_SECTION:
+ fprintf(stderr, "PP: try CDATA_SECTION\n");break;
+ case XML_PARSER_END_TAG:
+ fprintf(stderr, "PP: try END_TAG\n");break;
+ case XML_PARSER_ENTITY_DECL:
+ fprintf(stderr, "PP: try ENTITY_DECL\n");break;
+ case XML_PARSER_ENTITY_VALUE:
+ fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
+ case XML_PARSER_ATTRIBUTE_VALUE:
+ fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
+ case XML_PARSER_DTD:
+ fprintf(stderr, "PP: try DTD\n");break;
+ case XML_PARSER_EPILOG:
+ fprintf(stderr, "PP: try EPILOG\n");break;
+ case XML_PARSER_PI:
+ fprintf(stderr, "PP: try PI\n");break;
+ }
+#endif
while (1) {
+ /*
+ * Pop-up of finished entities.
+ */
+ while ((CUR == 0) && (ctxt->inputNr > 1))
+ xmlPopInput(ctxt);
+
+ in = ctxt->input;
+ if (in == NULL) break;
+ if (in->buf == NULL)
+ avail = in->length - (in->cur - in->base);
+ else
+ avail = in->buf->buffer->use - (in->cur - in->base);
+ if (avail < 1)
+ goto done;
switch (ctxt->instate) {
case XML_PARSER_EOF:
- return(0);
+ /*
+ * Document parsing is done !
+ */
+ goto done;
+ case XML_PARSER_START:
+ /*
+ * Very first chars read from the document flow.
+ */
+ cur = in->cur[0];
+ if (IS_BLANK(cur)) {
+ if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
+ ctxt->sax->setDocumentLocator(ctxt->userData,
+ &xmlDefaultSAXLocator);
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Extra spaces at the beginning of the document are not allowed\n");
+ ctxt->errNo = XML_ERR_DOCUMENT_START;
+ ctxt->wellFormed = 0;
+ SKIP_BLANKS;
+ ret++;
+ if (in->buf == NULL)
+ avail = in->length - (in->cur - in->base);
+ else
+ avail = in->buf->buffer->use - (in->cur - in->base);
+ }
+ if (avail < 2)
+ goto done;
+
+ cur = in->cur[0];
+ next = in->cur[1];
+ if (cur == 0) {
+ if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
+ ctxt->sax->setDocumentLocator(ctxt->userData,
+ &xmlDefaultSAXLocator);
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData, "Document is empty\n");
+ ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
+ ctxt->wellFormed = 0;
+ ctxt->instate = XML_PARSER_EOF;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering EOF\n");
+#endif
+ if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
+ ctxt->sax->endDocument(ctxt->userData);
+ goto done;
+ }
+ if ((cur == '<') && (next == '?')) {
+ /* PI or XML decl */
+ if (avail < 5) return(ret);
+ if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)
+ return(ret);
+ if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
+ ctxt->sax->setDocumentLocator(ctxt->userData,
+ &xmlDefaultSAXLocator);
+ if ((in->cur[2] == 'x') &&
+ (in->cur[3] == 'm') &&
+ (in->cur[4] == 'l')) {
+ ret += 5;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing XML Decl\n");
+#endif
+ xmlParseXMLDecl(ctxt);
+ if ((ctxt->sax) && (ctxt->sax->startDocument))
+ ctxt->sax->startDocument(ctxt->userData);
+ ctxt->instate = XML_PARSER_MISC;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering MISC\n");
+#endif
+ } else {
+ ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
+ if ((ctxt->sax) && (ctxt->sax->startDocument))
+ ctxt->sax->startDocument(ctxt->userData);
+ ctxt->instate = XML_PARSER_MISC;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering MISC\n");
+#endif
+ }
+ } else {
+ if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
+ ctxt->sax->setDocumentLocator(ctxt->userData,
+ &xmlDefaultSAXLocator);
+ ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
+ if ((ctxt->sax) && (ctxt->sax->startDocument))
+ ctxt->sax->startDocument(ctxt->userData);
+ ctxt->instate = XML_PARSER_MISC;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering MISC\n");
+#endif
+ }
+ break;
+ case XML_PARSER_MISC:
+ SKIP_BLANKS;
+ if (in->buf == NULL)
+ avail = in->length - (in->cur - in->base);
+ else
+ avail = in->buf->buffer->use - (in->cur - in->base);
+ if (avail < 2)
+ goto done;
+ cur = in->cur[0];
+ next = in->cur[1];
+ if ((cur == '<') && (next == '?')) {
+ if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing PI\n");
+#endif
+ xmlParsePI(ctxt);
+ } else if ((cur == '<') && (next == '!') &&
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
+ if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing Comment\n");
+#endif
+ xmlParseComment(ctxt);
+ ctxt->instate = XML_PARSER_MISC;
+ } else if ((cur == '<') && (next == '!') &&
+ (in->cur[2] == 'D') && (in->cur[3] == 'O') &&
+ (in->cur[4] == 'C') && (in->cur[5] == 'T') &&
+ (in->cur[6] == 'Y') && (in->cur[7] == 'P') &&
+ (in->cur[8] == 'E')) {
+ if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing internal subset\n");
+#endif
+ xmlParseDocTypeDecl(ctxt);
+ if (CUR == '[') {
+ ctxt->instate = XML_PARSER_DTD;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering DTD\n");
+#endif
+ } else {
+ ctxt->instate = XML_PARSER_PROLOG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering PROLOG\n");
+#endif
+ }
+ } else if ((cur == '<') && (next == '!') &&
+ (avail < 9)) {
+ goto done;
+ } else {
+ ctxt->instate = XML_PARSER_START_TAG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering START_TAG\n");
+#endif
+ }
+ break;
case XML_PARSER_PROLOG:
- case XML_PARSER_CONTENT:
- case XML_PARSER_ENTITY_DECL:
- case XML_PARSER_ENTITY_VALUE:
- case XML_PARSER_ATTRIBUTE_VALUE:
- case XML_PARSER_DTD:
+ SKIP_BLANKS;
+ if (in->buf == NULL)
+ avail = in->length - (in->cur - in->base);
+ else
+ avail = in->buf->buffer->use - (in->cur - in->base);
+ if (avail < 2)
+ goto done;
+ cur = in->cur[0];
+ next = in->cur[1];
+ if ((cur == '<') && (next == '?')) {
+ if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing PI\n");
+#endif
+ xmlParsePI(ctxt);
+ } else if ((cur == '<') && (next == '!') &&
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
+ if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing Comment\n");
+#endif
+ xmlParseComment(ctxt);
+ ctxt->instate = XML_PARSER_PROLOG;
+ } else if ((cur == '<') && (next == '!') &&
+ (avail < 4)) {
+ goto done;
+ } else {
+ ctxt->instate = XML_PARSER_START_TAG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering START_TAG\n");
+#endif
+ }
+ break;
case XML_PARSER_EPILOG:
+ SKIP_BLANKS;
+ if (in->buf == NULL)
+ avail = in->length - (in->cur - in->base);
+ else
+ avail = in->buf->buffer->use - (in->cur - in->base);
+ if (avail < 2)
+ goto done;
+ cur = in->cur[0];
+ next = in->cur[1];
+ if ((cur == '<') && (next == '?')) {
+ if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing PI\n");
+#endif
+ xmlParsePI(ctxt);
+ ctxt->instate = XML_PARSER_EPILOG;
+ } else if ((cur == '<') && (next == '!') &&
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
+ if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing Comment\n");
+#endif
+ xmlParseComment(ctxt);
+ ctxt->instate = XML_PARSER_EPILOG;
+ } else if ((cur == '<') && (next == '!') &&
+ (avail < 4)) {
+ goto done;
+ } else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Extra content at the end of the document\n");
+ ctxt->wellFormed = 0;
+ ctxt->errNo = XML_ERR_DOCUMENT_END;
+ ctxt->instate = XML_PARSER_EOF;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering EOF\n");
+#endif
+ if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
+ ctxt->sax->endDocument(ctxt->userData);
+ goto done;
+ }
+ break;
+ case XML_PARSER_START_TAG: {
+ xmlChar *name, *oldname;
+
+ if (avail < 2)
+ goto done;
+ cur = in->cur[0];
+ if (cur != '<') {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Start tag expect, '<' not found\n");
+ ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
+ ctxt->wellFormed = 0;
+ ctxt->instate = XML_PARSER_EOF;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering EOF\n");
+#endif
+ if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
+ ctxt->sax->endDocument(ctxt->userData);
+ goto done;
+ }
+ if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
+ goto done;
+ name = xmlParseStartTag(ctxt);
+ if (name == NULL) {
+ ctxt->instate = XML_PARSER_EOF;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering EOF\n");
+#endif
+ if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
+ ctxt->sax->endDocument(ctxt->userData);
+ goto done;
+ }
+ namePush(ctxt, xmlStrdup(name));
+
+ /*
+ * [ VC: Root Element Type ]
+ * The Name in the document type declaration must match
+ * the element type of the root element.
+ */
+ if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
+ ctxt->node && (ctxt->node == ctxt->myDoc->root))
+ ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
+
+ /*
+ * Check for an Empty Element.
+ */
+ if ((CUR == '/') && (NXT(1) == '>')) {
+ SKIP(2);
+ if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
+ ctxt->sax->endElement(ctxt->userData, name);
+ xmlFree(name);
+ oldname = namePop(ctxt);
+ if (oldname != NULL) {
+#ifdef DEBUG_STACK
+ fprintf(stderr,"Close: popped %s\n", oldname);
+#endif
+ xmlFree(oldname);
+ }
+ if (ctxt->name == NULL) {
+ ctxt->instate = XML_PARSER_EPILOG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering EPILOG\n");
+#endif
+ } else {
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CONTENT\n");
+#endif
+ }
+ break;
+ }
+ if (CUR == '>') {
+ NEXT;
+ } else {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Couldn't find end of Start Tag %s\n",
+ name);
+ ctxt->wellFormed = 0;
+ ctxt->errNo = XML_ERR_GT_REQUIRED;
+
+ /*
+ * end of parsing of this node.
+ */
+ nodePop(ctxt);
+ oldname = namePop(ctxt);
+ if (oldname != NULL) {
+#ifdef DEBUG_STACK
+ fprintf(stderr,"Close: popped %s\n", oldname);
+#endif
+ xmlFree(oldname);
+ }
+ }
+ xmlFree(name);
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CONTENT\n");
+#endif
+ break;
+ }
+ case XML_PARSER_CONTENT:
+ /*
+ * Handle preparsed entities and charRef
+ */
+ if (ctxt->token != 0) {
+ xmlChar cur[2] = { 0 , 0 } ;
+
+ cur[0] = (xmlChar) ctxt->token;
+ if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
+ ctxt->sax->characters(ctxt->userData, cur, 1);
+ ctxt->token = 0;
+ }
+ if (avail < 2)
+ goto done;
+ cur = in->cur[0];
+ next = in->cur[1];
+ if ((cur == '<') && (next == '?')) {
+ if (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing PI\n");
+#endif
+ xmlParsePI(ctxt);
+ } else if ((cur == '<') && (next == '!') &&
+ (in->cur[2] == '-') && (in->cur[3] == '-')) {
+ if (xmlParseLookupSequence(ctxt, '-', '>', 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing Comment\n");
+#endif
+ xmlParseComment(ctxt);
+ ctxt->instate = XML_PARSER_CONTENT;
+ } else if ((cur == '<') && (in->cur[1] == '!') &&
+ (in->cur[2] == '[') && (NXT(3) == 'C') &&
+ (in->cur[4] == 'D') && (NXT(5) == 'A') &&
+ (in->cur[6] == 'T') && (NXT(7) == 'A') &&
+ (in->cur[8] == '[')) {
+ SKIP(9);
+ ctxt->instate = XML_PARSER_CDATA_SECTION;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CDATA_SECTION\n");
+#endif
+ break;
+ } else if ((cur == '<') && (next == '!') &&
+ (avail < 9)) {
+ goto done;
+ } else if ((cur == '<') && (next == '/')) {
+ ctxt->instate = XML_PARSER_END_TAG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering END_TAG\n");
+#endif
+ break;
+ } else if (cur == '<') {
+ ctxt->instate = XML_PARSER_START_TAG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering START_TAG\n");
+#endif
+ break;
+ } else if (cur == '&') {
+ if (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0)
+ goto done;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing Reference\n");
+#endif
+ /* TODO: check generation of subtrees if noent !!! */
+ xmlParseReference(ctxt);
+ } else {
+ /* TODO Avoid the extra copy, handle directly !!!!!! */
+ /*
+ * Goal of the following test is :
+ * - minimize calls to the SAX 'character' callback
+ * when they are mergeable
+ * - handle an problem for isBlank when we only parse
+ * a sequence of blank chars and the next one is
+ * not available to check against '<' presence.
+ * - tries to homogenize the differences in SAX
+ * callbacks beween the push and pull versions
+ * of the parser.
+ */
+ if ((ctxt->inputNr == 1) &&
+ (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
+ if (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0)
+ goto done;
+ }
+ ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: Parsing char data\n");
+#endif
+ xmlParseCharData(ctxt, 0);
+ }
+ /*
+ * Pop-up of finished entities.
+ */
+ while ((CUR == 0) && (ctxt->inputNr > 1))
+ xmlPopInput(ctxt);
+ break;
+ case XML_PARSER_CDATA_SECTION: {
+ /*
+ * The Push mode need to have the SAX callback for
+ * cdataBlock merge back contiguous callbacks.
+ */
+ int base;
+
+ in = ctxt->input;
+ base = xmlParseLookupSequence(ctxt, ']', ']', '>');
+ if (base < 0) {
+ if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
+ if (ctxt->sax != NULL) {
+ if (ctxt->sax->cdataBlock != NULL)
+ ctxt->sax->cdataBlock(ctxt->userData, in->cur,
+ XML_PARSER_BIG_BUFFER_SIZE);
+ }
+ SKIP(XML_PARSER_BIG_BUFFER_SIZE);
+ ctxt->checkIndex = 0;
+ }
+ goto done;
+ } else {
+ if ((ctxt->sax != NULL) && (base > 0)) {
+ if (ctxt->sax->cdataBlock != NULL)
+ ctxt->sax->cdataBlock(ctxt->userData,
+ in->cur, base);
+ }
+ SKIP(base + 3);
+ ctxt->checkIndex = 0;
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CONTENT\n");
+#endif
+ }
+ break;
+ }
+ case XML_PARSER_END_TAG: {
+ if (avail < 2)
+ goto done;
+ if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)
+ goto done;
+ xmlParseEndTag(ctxt);
+ if (ctxt->name == NULL) {
+ ctxt->instate = XML_PARSER_EPILOG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering EPILOG\n");
+#endif
+ } else {
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CONTENT\n");
+#endif
+ }
+ break;
+ }
+ case XML_PARSER_DTD: {
+ /*
+ * Sorry but progressive parsing of the internal subset
+ * is not expected to be supported. We first check that
+ * the full content of the internal subset is available and
+ * the parsing is launched only at that point.
+ * Internal subset ends up with "']' S? '>'" in an unescaped
+ * section and not in a ']]>' sequence which are conditional
+ * sections (whoever argued to keep that crap in XML deserve
+ * a place in hell !).
+ */
+ int base, i;
+ xmlChar *buf;
+ xmlChar quote = 0;
+
+ base = in->cur - in->base;
+ if (base < 0) return(0);
+ if (ctxt->checkIndex > base)
+ base = ctxt->checkIndex;
+ buf = in->buf->buffer->content;
+ for (;base < in->buf->buffer->use;base++) {
+ if (quote != 0) {
+ if (buf[base] == quote)
+ quote = 0;
+ continue;
+ }
+ if (buf[base] == '"') {
+ quote = '"';
+ continue;
+ }
+ if (buf[base] == '\'') {
+ quote = '\'';
+ continue;
+ }
+ if (buf[base] == ']') {
+ if (base +1 >= in->buf->buffer->use)
+ break;
+ if (buf[base + 1] == ']') {
+ /* conditional crap, skip both ']' ! */
+ base++;
+ continue;
+ }
+ for (i = 0;base + i < in->buf->buffer->use;i++) {
+ if (buf[base + i] == '>')
+ goto found_end_int_subset;
+ }
+ break;
+ }
+ }
+ /*
+ * We didn't found the end of the Internal subset
+ */
+ if (quote == 0)
+ ctxt->checkIndex = base;
+#ifdef DEBUG_PUSH
+ if (next == 0)
+ fprintf(stderr, "PP: lookup of int subset end filed\n");
+#endif
+ goto done;
+
+found_end_int_subset:
+ xmlParseInternalSubset(ctxt);
+ ctxt->instate = XML_PARSER_PROLOG;
+ ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering PROLOG\n");
+#endif
+ break;
+ }
case XML_PARSER_COMMENT:
- case XML_PARSER_CDATA_SECTION:
- break;
+ fprintf(stderr, "PP: internal error, state == COMMENT\n");
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CONTENT\n");
+#endif
+ break;
+ case XML_PARSER_PI:
+ fprintf(stderr, "PP: internal error, state == PI\n");
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering CONTENT\n");
+#endif
+ break;
+ case XML_PARSER_ENTITY_DECL:
+ fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
+ ctxt->instate = XML_PARSER_DTD;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering DTD\n");
+#endif
+ break;
+ case XML_PARSER_ENTITY_VALUE:
+ fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
+ ctxt->instate = XML_PARSER_CONTENT;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering DTD\n");
+#endif
+ break;
+ case XML_PARSER_ATTRIBUTE_VALUE:
+ fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
+ ctxt->instate = XML_PARSER_START_TAG;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: entering START_TAG\n");
+#endif
+ break;
}
}
+done:
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: done %d\n", ret);
+#endif
return(ret);
}
@@ -6872,12 +7674,38 @@
*
* Returns zero if no error, the xmlParserErrors otherwise.
*/
-xmlParserErrors
+int
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
int terminate) {
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
- (ctxt->input->buf != NULL)) {
+ (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
+ int base = ctxt->input->base - ctxt->input->buf->buffer->content;
+ int cur = ctxt->input->cur - ctxt->input->base;
+
xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+ ctxt->input->base = ctxt->input->buf->buffer->content + base;
+ ctxt->input->cur = ctxt->input->base + cur;
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: pushed %d\n", size);
+#endif
+
+ xmlParseTry(ctxt);
+ } else if (ctxt->instate != XML_PARSER_EOF)
+ xmlParseTry(ctxt);
+ if (terminate) {
+ if ((ctxt->instate != XML_PARSER_EOF) &&
+ (ctxt->instate != XML_PARSER_EPILOG)) {
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Extra content at the end of the document\n");
+ ctxt->wellFormed = 0;
+ ctxt->errNo = XML_ERR_DOCUMENT_END;
+ }
+ if (ctxt->instate != XML_PARSER_EOF) {
+ if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
+ ctxt->sax->endDocument(ctxt->userData);
+ }
+ ctxt->instate = XML_PARSER_EOF;
}
return((xmlParserErrors) ctxt->errNo);
}
@@ -6889,6 +7717,89 @@
************************************************************************/
/**
+ * xmlCreatePushParserCtxt :
+ * @sax: a SAX handler
+ * @user_data: The user data returned on SAX callbacks
+ * @chunk: a pointer to an array of chars
+ * @size: number of chars in the array
+ * @filename: an optional file name or URI
+ *
+ * Create a parser context for using the XML parser in push mode
+ * To allow content encoding detection, @size should be >= 4
+ * The value of @filename is used for fetching external entities
+ * and error/warning reports.
+ *
+ * Returns the new parser context or NULL
+ */
+xmlParserCtxtPtr
+xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
+ const char *chunk, int size, const char *filename) {
+ xmlParserCtxtPtr ctxt;
+ xmlParserInputPtr inputStream;
+ xmlParserInputBufferPtr buf;
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
+
+ /*
+ * plug some encoding conversion routines here. !!!
+ */
+ if ((chunk != NULL) && (size >= 4))
+ enc = xmlDetectCharEncoding((const xmlChar *) chunk);
+
+ buf = xmlAllocParserInputBuffer(enc);
+ if (buf == NULL) return(NULL);
+
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL) {
+ xmlFree(buf);
+ return(NULL);
+ }
+ if (sax != NULL) {
+ if (ctxt->sax != &xmlDefaultSAXHandler)
+ xmlFree(ctxt->sax);
+ ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
+ if (ctxt->sax == NULL) {
+ xmlFree(buf);
+ xmlFree(ctxt);
+ return(NULL);
+ }
+ memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
+ if (user_data != NULL)
+ ctxt->userData = user_data;
+ }
+ if (filename == NULL) {
+ ctxt->directory = NULL;
+ } else {
+ ctxt->directory = xmlParserGetDirectory(filename);
+ }
+
+ inputStream = xmlNewInputStream(ctxt);
+ if (inputStream == NULL) {
+ xmlFreeParserCtxt(ctxt);
+ return(NULL);
+ }
+
+ if (filename == NULL)
+ inputStream->filename = NULL;
+ else
+ inputStream->filename = xmlMemStrdup(filename);
+ inputStream->buf = buf;
+ inputStream->base = inputStream->buf->buffer->content;
+ inputStream->cur = inputStream->buf->buffer->content;
+
+ inputPush(ctxt, inputStream);
+
+ if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
+ (ctxt->input->buf != NULL)) {
+ xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
+#ifdef DEBUG_PUSH
+ fprintf(stderr, "PP: pushed %d\n", size);
+#endif
+ }
+
+ return(ctxt);
+}
+
+/**
* xmlCreateDocParserCtxt :
* @cur: a pointer to an array of xmlChar
*
@@ -7397,7 +8308,8 @@
if (ctxt->sax != &xmlDefaultSAXHandler)
xmlFree(ctxt->sax);
ctxt->sax = sax;
- ctxt->userData = user_data;
+ if (user_data != NULL)
+ ctxt->userData = user_data;
xmlParseDocument(ctxt);
diff --git a/parser.h b/parser.h
index 8a6443c..196aeb3 100644
--- a/parser.h
+++ b/parser.h
@@ -40,11 +40,12 @@
const char *filename; /* The file analyzed, if any */
const char *directory; /* the directory/base of teh file */
- const xmlChar *base; /* Base of the array to parse */
- const xmlChar *cur; /* Current char being parsed */
+ const xmlChar *base; /* Base of the array to parse */
+ const xmlChar *cur; /* Current char being parsed */
+ int length; /* length if known */
int line; /* Current line */
int col; /* Current column */
- int consumed; /* How many xmlChars were already consumed */
+ int consumed; /* How many xmlChars already consumed */
xmlParserInputDeallocate free; /* function to deallocate the base */
} xmlParserInput;
typedef xmlParserInput *xmlParserInputPtr;
@@ -77,20 +78,25 @@
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
/**
- * The parser is not a state based parser, but we need to maintain
+ * The parser is not (yet) a state based parser, but we need to maintain
* minimum state informations, especially for entities processing.
*/
typedef enum {
- XML_PARSER_EOF = 0,
- XML_PARSER_PROLOG,
- XML_PARSER_CONTENT,
- XML_PARSER_ENTITY_DECL,
- XML_PARSER_ENTITY_VALUE,
- XML_PARSER_ATTRIBUTE_VALUE,
- XML_PARSER_DTD,
- XML_PARSER_EPILOG,
- XML_PARSER_COMMENT,
- XML_PARSER_CDATA_SECTION
+ XML_PARSER_EOF = -1, /* nothing is to be parsed */
+ XML_PARSER_START = 0, /* nothing has been parsed */
+ XML_PARSER_MISC, /* Misc* before int subset */
+ XML_PARSER_PI, /* Whithin a processing instruction */
+ XML_PARSER_DTD, /* within some DTD content */
+ XML_PARSER_PROLOG, /* Misc* after internal subset */
+ XML_PARSER_COMMENT, /* within a comment */
+ XML_PARSER_START_TAG, /* within a start tag */
+ XML_PARSER_CONTENT, /* within the content */
+ XML_PARSER_CDATA_SECTION, /* within a CDATA section */
+ XML_PARSER_END_TAG, /* within a closing tag */
+ XML_PARSER_ENTITY_DECL, /* within an entity declaration */
+ XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
+ XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
+ XML_PARSER_EPILOG /* the Misc* after the last end tag */
} xmlParserInputState;
/**
@@ -151,6 +157,7 @@
xmlChar * *nameTab; /* array of nodes */
long nbChars; /* number of xmlChar processed */
+ long checkIndex; /* used by progressive parsing lookup */
} _xmlParserCtxt;
typedef _xmlParserCtxt xmlParserCtxt;
typedef xmlParserCtxt *xmlParserCtxtPtr;
@@ -347,13 +354,35 @@
xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
const xmlChar *ExternalID,
const xmlChar *SystemID);
+/**
+ * SAX initialization routines
+ */
+void xmlDefaultSAXHandlerInit(void);
+void htmlDefaultSAXHandlerInit(void);
+
+/**
+ * Parser contexts handling.
+ */
void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
+void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
const xmlChar* buffer,
const char* filename);
-void xmlDefaultSAXHandlerInit(void);
-void htmlDefaultSAXHandlerInit(void);
+xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
+
+/**
+ * Interfaces for the Push mode
+ */
+xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
+ void *user_data,
+ const char *chunk,
+ int size,
+ const char *filename);
+int xmlParseChunk (xmlParserCtxtPtr ctxt,
+ const char *chunk,
+ int size,
+ int terminate);
/**
* Node infos
diff --git a/parserInternals.h b/parserInternals.h
index 9da4846..5a7b7ff 100644
--- a/parserInternals.h
+++ b/parserInternals.h
@@ -435,9 +435,10 @@
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
*/
#define IS_CHAR(c) \
- ((((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
- (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
- (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) <= 0x10FFFF))
+ ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
+ ((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
+ (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
+ (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
/*
* [85] BaseChar ::= ... long list see REC ...
@@ -595,8 +596,7 @@
xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
xmlChar **value);
xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
-void xmlParseEndTag (xmlParserCtxtPtr ctxt,
- xmlChar *tagname);
+void xmlParseEndTag (xmlParserCtxtPtr ctxt);
void xmlParseCDSect (xmlParserCtxtPtr ctxt);
void xmlParseContent (xmlParserCtxtPtr ctxt);
void xmlParseElement (xmlParserCtxtPtr ctxt);
diff --git a/testSAX.c b/testSAX.c
index f33a1d6..fd65092 100644
--- a/testSAX.c
+++ b/testSAX.c
@@ -45,6 +45,7 @@
static int debug = 0;
static int copy = 0;
static int recovery = 0;
+static int push = 0;
xmlSAXHandler emptySAXHandlerStruct = {
NULL, /* internalSubset */
@@ -77,29 +78,6 @@
xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
extern xmlSAXHandlerPtr debugSAXHandler;
-/*
- * Note: there is a couple of errors introduced on purpose.
- */
-static char buffer[] =
-"<?xml version=\"1.0\"?>\n\
-<?xml:namespace ns = \"http://www.ietf.org/standards/dav/\" prefix = \"D\"?>\n\
-<?xml:namespace ns = \"http://www.w3.com/standards/z39.50/\" prefix = \"Z\"?>\n\
-<D:propertyupdate>\n\
-<D:set a=\"'toto'\" b>\n\
- <D:prop>\n\
- <Z:authors>\n\
- <Z:Author>Jim Whitehead</Z:Author>\n\
- <Z:Author>Roy Fielding</Z:Author>\n\
- </Z:authors>\n\
- </D:prop>\n\
- </D:set>\n\
- <D:remove>\n\
- <D:prop><Z:Copyright-Owner/></D:prop>\n\
- </D:remove>\n\
-</D:propertyupdate>\n\
-\n\
-";
-
/************************************************************************
* *
* Debug Handlers *
@@ -588,42 +566,76 @@
void parseAndPrintFile(char *filename) {
int res;
- /*
- * Empty callbacks for checking
- */
- res = xmlSAXUserParseFile(emptySAXHandler, NULL, filename);
- if (res != 0) {
- fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res);
- }
+ if (push) {
+ FILE *f;
- /*
- * Debug callback
- */
- res = xmlSAXUserParseFile(debugSAXHandler, NULL, filename);
- if (res != 0) {
- fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res);
+ /*
+ * Empty callbacks for checking
+ */
+ f = fopen(filename, "r");
+ if (f != NULL) {
+ int res;
+ char chars[10];
+ xmlParserCtxtPtr ctxt;
+
+ res = fread(chars, 1, 4, f);
+ if (res > 0) {
+ ctxt = xmlCreatePushParserCtxt(emptySAXHandler, NULL,
+ chars, res, filename);
+ while ((res = fread(chars, 1, 3, f)) > 0) {
+ xmlParseChunk(ctxt, chars, res, 0);
+ }
+ xmlParseChunk(ctxt, chars, 0, 1);
+ xmlFreeParserCtxt(ctxt);
+ }
+ fclose(f);
+ } else {
+ fprintf(stderr, "Cannot read file %s\n", filename);
+ }
+ /*
+ * Debug callback
+ */
+ f = fopen(filename, "r");
+ if (f != NULL) {
+ int res;
+ char chars[10];
+ xmlParserCtxtPtr ctxt;
+
+ res = fread(chars, 1, 4, f);
+ if (res > 0) {
+ ctxt = xmlCreatePushParserCtxt(debugSAXHandler, NULL,
+ chars, res, filename);
+ while ((res = fread(chars, 1, 3, f)) > 0) {
+ xmlParseChunk(ctxt, chars, res, 0);
+ }
+ res = xmlParseChunk(ctxt, chars, 0, 1);
+ xmlFreeParserCtxt(ctxt);
+ if (res != 0) {
+ fprintf(stdout,
+ "xmlSAXUserParseFile returned error %d\n", res);
+ }
+ }
+ fclose(f);
+ }
+ } else {
+ /*
+ * Empty callbacks for checking
+ */
+ res = xmlSAXUserParseFile(emptySAXHandler, NULL, filename);
+ if (res != 0) {
+ fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res);
+ }
+
+ /*
+ * Debug callback
+ */
+ res = xmlSAXUserParseFile(debugSAXHandler, NULL, filename);
+ if (res != 0) {
+ fprintf(stdout, "xmlSAXUserParseFile returned error %d\n", res);
+ }
}
}
-void parseAndPrintBuffer(char *buf) {
- int res;
-
- /*
- * Empty callbacks for checking
- */
- res = xmlSAXUserParseMemory(emptySAXHandler, NULL, buf, strlen(buf));
- if (res != 0) {
- fprintf(stdout, "xmlSAXUserParseMemory returned error %d\n", res);
- }
-
- /*
- * Debug callback
- */
- res = xmlSAXUserParseMemory(debugSAXHandler, NULL, buf, strlen(buf));
- if (res != 0) {
- fprintf(stdout, "xmlSAXUserParseMemory returned error %d\n", res);
- }
-}
int main(int argc, char **argv) {
int i;
@@ -637,6 +649,9 @@
else if ((!strcmp(argv[i], "-recover")) ||
(!strcmp(argv[i], "--recover")))
recovery++;
+ else if ((!strcmp(argv[i], "-push")) ||
+ (!strcmp(argv[i], "--push")))
+ push++;
}
for (i = 1; i < argc ; i++) {
if (argv[i][0] != '-') {
@@ -644,10 +659,6 @@
files ++;
}
}
- if (files == 0) {
- printf("\nFirst test for the parser, with errors\n");
- parseAndPrintBuffer(buffer);
- }
xmlCleanupParser();
xmlMemoryDump();
diff --git a/tester.c b/tester.c
index 3ee565b..8bb3e59 100644
--- a/tester.c
+++ b/tester.c
@@ -30,10 +30,19 @@
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
+#ifdef HAVE_LIBREADLINE
+#include <readline/readline.h>
+#ifdef HAVE_LIBHISTORY
+#include <readline/history.h>
+#endif
+#endif
#include "xmlmemory.h"
#include "parser.h"
+#include "HTMLparser.h"
+#include "HTMLtree.h"
#include "tree.h"
+#include "xpath.h"
#include "debugXML.h"
static int debug = 0;
@@ -47,20 +56,89 @@
static int repeat = 0;
static int insert = 0;
static int compress = 0;
+static int html = 0;
+static int shell = 0;
+static int push = 0;
extern int xmlDoValidityCheckingDefaultValue;
+/**
+ * xmlShellReadline:
+ * @prompt: the prompt value
+ *
+ * Read a string
+ *
+ * Returns a pointer to it or NULL on EOF the caller is expected to
+ * free the returned string.
+ */
+char *
+xmlShellReadline(char *prompt) {
+#ifdef HAVE_LIBREADLINE
+ char *line_read;
+
+ /* Get a line from the user. */
+ line_read = readline (prompt);
+
+ /* If the line has any text in it, save it on the history. */
+ if (line_read && *line_read)
+ add_history (line_read);
+
+ return (line_read);
+#else
+ char line_read[501];
+
+ if (prompt != NULL)
+ fprintf(stdout, "%s", prompt);
+ if (!fgets(line_read, 500, stdin))
+ return(NULL);
+ line_read[500] = 0;
+ return(strdup(line_read));
+#endif
+}
void parseAndPrintFile(char *filename) {
- xmlDocPtr doc, tmp;
+ xmlDocPtr doc = NULL, tmp;
+
+ if (html) {
+ doc = htmlParseFile(filename, NULL);
+ } else {
+ /*
+ * build an XML tree from a string;
+ */
+ if (push) {
+ FILE *f;
+
+ f = fopen(filename, "r");
+ if (f != NULL) {
+ int res, size = 3;
+ char chars[1024];
+ xmlParserCtxtPtr ctxt;
+
+ if (repeat)
+ size = 1024;
+ res = fread(chars, 1, 4, f);
+ if (res > 0) {
+ ctxt = xmlCreatePushParserCtxt(NULL, NULL,
+ chars, res, filename);
+ while ((res = fread(chars, 1, size, f)) > 0) {
+ xmlParseChunk(ctxt, chars, res, 0);
+ }
+ xmlParseChunk(ctxt, chars, 0, 1);
+ doc = ctxt->myDoc;
+ xmlFreeParserCtxt(ctxt);
+ }
+ }
+ } else if (recovery)
+ doc = xmlRecoverFile(filename);
+ else
+ doc = xmlParseFile(filename);
+ }
/*
- * build an XML tree from a string;
+ * shell interraction
*/
- if (recovery)
- doc = xmlRecoverFile(filename);
- else
- doc = xmlParseFile(filename);
+ if (shell)
+ xmlShell(doc, filename, xmlShellReadline, stdout);
/*
* test intermediate copy if needed.
@@ -71,7 +149,7 @@
xmlFreeDoc(tmp);
}
- if (insert) {
+ if ((insert) && (!html)) {
const xmlChar* list[256];
int nb, i;
xmlNodePtr node;
@@ -116,7 +194,7 @@
xmlValidateDocument(&cvp, doc);
}
- if (debugent)
+ if ((debugent) && (!html))
xmlDebugDumpEntities(stdout, doc);
/*
@@ -157,11 +235,23 @@
else if ((!strcmp(argv[i], "-repeat")) ||
(!strcmp(argv[i], "--repeat")))
repeat++;
+ else if ((!strcmp(argv[i], "-push")) ||
+ (!strcmp(argv[i], "--push")))
+ push++;
else if ((!strcmp(argv[i], "-compress")) ||
(!strcmp(argv[i], "--compress"))) {
compress++;
xmlSetCompressMode(9);
}
+ else if ((!strcmp(argv[i], "-html")) ||
+ (!strcmp(argv[i], "--html"))) {
+ html++;
+ }
+ else if ((!strcmp(argv[i], "-shell")) ||
+ (!strcmp(argv[i], "--shell"))) {
+ shell++;
+ noout = 1;
+ }
}
if (noent != 0) xmlSubstituteEntitiesDefault(1);
if (valid != 0) xmlDoValidityCheckingDefaultValue = 1;
@@ -190,6 +280,9 @@
printf("\t--repeat : repeat 100 times, for timing or profiling\n");
printf("\t--insert : ad-hoc test for valid insertions\n");
printf("\t--compress : turn on gzip compression of output\n");
+ printf("\t--html : use the HTML parser\n");
+ printf("\t--shell : run a navigating shell\n");
+ printf("\t--push : use the push mode of the parser\n");
}
xmlCleanupParser();
xmlMemoryDump();
diff --git a/tree.c b/tree.c
index 3242aec..c6685ea 100644
--- a/tree.c
+++ b/tree.c
@@ -2498,6 +2498,7 @@
return(xmlNodeListGetString(NULL, attr->val, 1));
break;
}
+ case XML_COMMENT_NODE:
case XML_PI_NODE:
if (cur->content != NULL)
#ifndef XML_USE_BUFFER_CONTENT
@@ -2507,8 +2508,12 @@
#endif
return(NULL);
case XML_ENTITY_REF_NODE:
+ /*
+ * Locate the entity, and get it's content
+ * @@@
+ */
+ return(NULL);
case XML_ENTITY_NODE:
- case XML_COMMENT_NODE:
case XML_DOCUMENT_NODE:
case XML_HTML_DOCUMENT_NODE:
case XML_DOCUMENT_TYPE_NODE:
@@ -2858,18 +2863,6 @@
}
node = node->parent;
}
-#if 0
- /* Removed support for old namespaces */
- if (doc != NULL) {
- cur = doc->oldNs;
- while (cur != NULL) {
- if ((cur->prefix != NULL) && (nameSpace != NULL) &&
- (!xmlStrcmp(cur->prefix, nameSpace)))
- return(cur);
- cur = cur->next;
- }
- }
-#endif
return(NULL);
}
@@ -2886,31 +2879,283 @@
xmlNsPtr
xmlSearchNsByHref(xmlDocPtr doc, xmlNodePtr node, const xmlChar *href) {
xmlNsPtr cur;
+ xmlNodePtr orig = node;
if ((node == NULL) || (href == NULL)) return(NULL);
while (node != NULL) {
cur = node->nsDef;
while (cur != NULL) {
if ((cur->href != NULL) && (href != NULL) &&
- (!xmlStrcmp(cur->href, href)))
+ (!xmlStrcmp(cur->href, href))) {
+ /*
+ * Check that the prefix is not shadowed between orig and node
+ */
+ xmlNodePtr check = orig;
+ xmlNsPtr tst;
+
+ while (check != node) {
+ tst = check->nsDef;
+ while (tst != NULL) {
+ if ((tst->prefix == NULL) && (cur->prefix == NULL))
+ goto shadowed;
+ if ((tst->prefix != NULL) && (cur->prefix != NULL) &&
+ (!xmlStrcmp(tst->prefix, cur->prefix)))
+ goto shadowed;
+ tst = tst->next;
+ }
+ }
return(cur);
+ }
+shadowed:
cur = cur->next;
}
node = node->parent;
}
-#if 0
- /* Removed support for old namespaces */
- if (doc != NULL) {
- cur = doc->oldNs;
- while (cur != NULL) {
- if ((cur->href != NULL) && (href != NULL) &&
- (!xmlStrcmp(cur->href, href)))
- return(cur);
- cur = cur->next;
+ return(NULL);
+}
+
+/**
+ * xmlNewReconciliedNs
+ * @doc: the document
+ * @tree: a node expected to hold the new namespace
+ * @ns: the original namespace
+ *
+ * This function tries to locate a namespace definition in a tree
+ * ancestors, or create a new namespace definition node similar to
+ * @ns trying to reuse the same prefix. However if the given prefix is
+ * null (default namespace) or reused within the subtree defined by
+ * @tree or on one of its ancestors then a new prefix is generated.
+ * Returns the (new) namespace definition or NULL in case of error
+ */
+xmlNsPtr
+xmlNewReconciliedNs(xmlDocPtr doc, xmlNodePtr tree, xmlNsPtr ns) {
+ xmlNsPtr def;
+ xmlChar prefix[50];
+ int counter = 1;
+
+ if (tree == NULL) {
+#ifdef DEBUG_TREE
+ fprintf(stderr, "xmlNewReconciliedNs : tree == NULL\n");
+#endif
+ return(NULL);
+ }
+ if (ns == NULL) {
+#ifdef DEBUG_TREE
+ fprintf(stderr, "xmlNewReconciliedNs : ns == NULL\n");
+#endif
+ return(NULL);
+ }
+ /*
+ * Search an existing namespace definition inherited.
+ */
+ def = xmlSearchNsByHref(doc, tree, ns->href);
+ if (def != NULL)
+ return(def);
+
+ /*
+ * Find a close prefix which is not already in use.
+ * Let's strip namespace prefixes longer than 20 chars !
+ */
+ sprintf((char *) prefix, "%.20s", ns->prefix);
+ def = xmlSearchNs(doc, tree, prefix);
+ while (def != NULL) {
+ if (counter > 1000) return(NULL);
+ sprintf((char *) prefix, "%.20s%d", ns->prefix, counter++);
+ def = xmlSearchNs(doc, tree, prefix);
+ }
+
+ /*
+ * Ok, now we are ready to create a new one.
+ */
+ def = xmlNewNs(tree, ns->href, prefix);
+ return(def);
+}
+
+/**
+ * xmlReconciliateNs
+ * @doc: the document
+ * @tree: a node defining the subtree to reconciliate
+ *
+ * This function checks that all the namespaces declared within the given
+ * tree are properly declared. This is needed for example after Copy or Cut
+ * and then paste operations. The subtree may still hold pointers to
+ * namespace declarations outside the subtree or invalid/masked. As much
+ * as possible the function try tu reuse the existing namespaces found in
+ * the new environment. If not possible the new namespaces are redeclared
+ * on @tree at the top of the given subtree.
+ * Returns the number of namespace declarations created or -1 in case of error.
+ */
+int
+xmlReconciliateNs(xmlDocPtr doc, xmlNodePtr tree) {
+ xmlNsPtr *oldNs = NULL;
+ xmlNsPtr *newNs = NULL;
+ int sizeCache = 0;
+ int nbCache = 0;
+
+ xmlNsPtr n;
+ xmlNodePtr node = tree;
+ xmlAttrPtr attr;
+ int ret = 0, i;
+
+ while (node != NULL) {
+ /*
+ * Reconciliate the node namespace
+ */
+ if (node->ns != NULL) {
+ /*
+ * initialize the cache if needed
+ */
+ if (sizeCache == 0) {
+ sizeCache = 10;
+ oldNs = (xmlNsPtr *) xmlMalloc(sizeCache *
+ sizeof(xmlNsPtr));
+ if (oldNs == NULL) {
+ fprintf(stderr, "xmlReconciliateNs : memory pbm\n");
+ return(-1);
+ }
+ newNs = (xmlNsPtr *) xmlMalloc(sizeCache *
+ sizeof(xmlNsPtr));
+ if (newNs == NULL) {
+ fprintf(stderr, "xmlReconciliateNs : memory pbm\n");
+ xmlFree(oldNs);
+ return(-1);
+ }
+ }
+ for (i = 0;i < nbCache;i++) {
+ if (oldNs[i] == node->ns) {
+ node->ns = newNs[i];
+ break;
+ }
+ }
+ if (i == nbCache) {
+ /*
+ * Ok we need to recreate a new namespace definition
+ */
+ n = xmlNewReconciliedNs(doc, tree, node->ns);
+ if (n != NULL) { /* :-( what if else ??? */
+ /*
+ * check if we need to grow the cache buffers.
+ */
+ if (sizeCache <= nbCache) {
+ sizeCache *= 2;
+ oldNs = (xmlNsPtr *) xmlRealloc(oldNs, sizeCache *
+ sizeof(xmlNsPtr));
+ if (oldNs == NULL) {
+ fprintf(stderr, "xmlReconciliateNs : memory pbm\n");
+ xmlFree(newNs);
+ return(-1);
+ }
+ newNs = (xmlNsPtr *) xmlRealloc(newNs, sizeCache *
+ sizeof(xmlNsPtr));
+ if (newNs == NULL) {
+ fprintf(stderr, "xmlReconciliateNs : memory pbm\n");
+ xmlFree(oldNs);
+ return(-1);
+ }
+ }
+ newNs[nbCache] = n;
+ oldNs[nbCache++] = node->ns;
+ node->ns = n;
+ }
+ }
+ }
+ /*
+ * now check for namespace hold by attributes on the node.
+ */
+ attr = node->properties;
+ while (attr != NULL) {
+ if (attr->ns != NULL) {
+ /*
+ * initialize the cache if needed
+ */
+ if (sizeCache == 0) {
+ sizeCache = 10;
+ oldNs = (xmlNsPtr *) xmlMalloc(sizeCache *
+ sizeof(xmlNsPtr));
+ if (oldNs == NULL) {
+ fprintf(stderr, "xmlReconciliateNs : memory pbm\n");
+ return(-1);
+ }
+ newNs = (xmlNsPtr *) xmlMalloc(sizeCache *
+ sizeof(xmlNsPtr));
+ if (newNs == NULL) {
+ fprintf(stderr, "xmlReconciliateNs : memory pbm\n");
+ xmlFree(oldNs);
+ return(-1);
+ }
+ }
+ for (i = 0;i < nbCache;i++) {
+ if (oldNs[i] == attr->ns) {
+ node->ns = newNs[i];
+ break;
+ }
+ }
+ if (i == nbCache) {
+ /*
+ * Ok we need to recreate a new namespace definition
+ */
+ n = xmlNewReconciliedNs(doc, tree, attr->ns);
+ if (n != NULL) { /* :-( what if else ??? */
+ /*
+ * check if we need to grow the cache buffers.
+ */
+ if (sizeCache <= nbCache) {
+ sizeCache *= 2;
+ oldNs = (xmlNsPtr *) xmlRealloc(oldNs, sizeCache *
+ sizeof(xmlNsPtr));
+ if (oldNs == NULL) {
+ fprintf(stderr,
+ "xmlReconciliateNs : memory pbm\n");
+ xmlFree(newNs);
+ return(-1);
+ }
+ newNs = (xmlNsPtr *) xmlRealloc(newNs, sizeCache *
+ sizeof(xmlNsPtr));
+ if (newNs == NULL) {
+ fprintf(stderr,
+ "xmlReconciliateNs : memory pbm\n");
+ xmlFree(oldNs);
+ return(-1);
+ }
+ }
+ newNs[nbCache] = n;
+ oldNs[nbCache++] = attr->ns;
+ attr->ns = n;
+ }
+ }
+ }
+ attr = attr->next;
+ }
+
+ /*
+ * Browse the full subtree, deep first
+ */
+ if (node->childs != NULL) {
+ /* deep first */
+ node = node->childs;
+ } else if ((node != tree) && (node->next != NULL)) {
+ /* then siblings */
+ node = node->next;
+ } else if (node != tree) {
+ /* go up to parents->next if needed */
+ while (node != tree) {
+ if (node->parent != NULL)
+ node = node->parent;
+ if ((node != tree) && (node->next != NULL)) {
+ node = node->next;
+ break;
+ }
+ if (node->parent == NULL) {
+ node = NULL;
+ break;
+ }
+ }
+ /* exit condition */
+ if (node == tree)
+ node = NULL;
}
}
-#endif
- return(NULL);
+ return(ret);
}
/**
@@ -3095,8 +3340,9 @@
xmlTextConcat(xmlNodePtr node, const xmlChar *content, int len) {
if (node == NULL) return;
- if (node->type != XML_TEXT_NODE) {
- fprintf(stderr, "xmlTextConcat: node is not text\n");
+ if ((node->type != XML_TEXT_NODE) &&
+ (node->type != XML_CDATA_SECTION_NODE)) {
+ fprintf(stderr, "xmlTextConcat: node is not text nor cdata\n");
return;
}
#ifndef XML_USE_BUFFER_CONTENT
@@ -3376,7 +3622,11 @@
if (len == 0) return;
/* CJN What's this for??? */
- l = xmlStrlen(str);
+ if (len < 0)
+ l = xmlStrlen(str);
+ else
+ for (l = 0;l < len;l++)
+ if (str[l] == 0) break;
if (l < len){ len = l; printf("xmlBufferAdd bad length\n"); }
/* CJN 11.18.99 okay, now I'm using the length */
@@ -3676,6 +3926,9 @@
static void
xmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int level,
int format);
+void
+htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
+
/**
* xmlNodeListDump:
* @buf: the XML buffer output
@@ -3851,6 +4104,40 @@
}
/**
+ * xmlElemDump:
+ * @buf: the XML buffer output
+ * @doc: the document
+ * @cur: the current node
+ *
+ * Dump an XML/HTML node, recursive behaviour,children are printed too.
+ */
+void
+xmlElemDump(FILE *f, xmlDocPtr doc, xmlNodePtr cur) {
+ xmlBufferPtr buf;
+
+ if (cur == NULL) {
+#ifdef DEBUG_TREE
+ fprintf(stderr, "xmlElemDump : cur == NULL\n");
+#endif
+ return;
+ }
+ if (doc == NULL) {
+#ifdef DEBUG_TREE
+ fprintf(stderr, "xmlElemDump : doc == NULL\n");
+#endif
+ }
+ buf = xmlBufferCreate();
+ if (buf == NULL) return;
+ if ((doc != NULL) &&
+ (doc->type == XML_HTML_DOCUMENT_NODE)) {
+ htmlNodeDump(buf, doc, cur);
+ } else
+ xmlNodeDump(buf, doc, cur, 0, 1);
+ xmlBufferDump(f, buf);
+ xmlBufferFree(buf);
+}
+
+/**
* xmlDocContentDump:
* @buf: the XML buffer output
* @cur: the document
@@ -3937,7 +4224,7 @@
* Returns 0 (uncompressed) to 9 (max compression)
*/
int
- xmlGetDocCompressMode (xmlDocPtr doc) {
+xmlGetDocCompressMode (xmlDocPtr doc) {
if (doc == NULL) return(-1);
return(doc->compression);
}
diff --git a/tree.h b/tree.h
index 3a0285b..cce6168 100644
--- a/tree.h
+++ b/tree.h
@@ -526,6 +526,9 @@
int *size);
void xmlDocDump (FILE *f,
xmlDocPtr cur);
+void xmlElemDump (FILE *f,
+ xmlDocPtr cur,
+ xmlNodePtr elem);
int xmlSaveFile (const char *filename,
xmlDocPtr cur);
diff --git a/valid.c b/valid.c
index 3af1dcd..f592c18 100644
--- a/valid.c
+++ b/valid.c
@@ -25,6 +25,8 @@
#include "parser.h"
#include "parserInternals.h"
+/* TODO: use hash table for accesses to elem and attribute dedinitions */
+
#define VERROR \
if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error
@@ -494,6 +496,7 @@
else
cur->name = NULL;
cur->content = xmlCopyElementContent(ent->content);
+ /* TODO : rebuild the attribute list on the copy */
cur->attributes = NULL;
}
return(ret);
@@ -2723,6 +2726,7 @@
xmlNodePtr elem) {
xmlElementPtr elemDecl;
xmlElementContentPtr cont;
+ xmlAttributePtr attr;
xmlNodePtr child;
int ret = 1;
const xmlChar *name;
@@ -2869,7 +2873,69 @@
break;
}
- /* TODO - [ VC: Required Attribute ] */
+ /* [ VC: Required Attribute ] */
+ attr = elemDecl->attributes;
+ while (attr != NULL) {
+ if (attr->def == XML_ATTRIBUTE_REQUIRED) {
+ xmlAttrPtr attrib;
+ int qualified = -1;
+
+ attrib = elem->properties;
+ while (attrib != NULL) {
+ if (!xmlStrcmp(attrib->name, attr->name)) {
+ if (attr->prefix != NULL) {
+ xmlNsPtr nameSpace = attrib->ns;
+
+ if (nameSpace == NULL)
+ nameSpace = elem->ns;
+ /*
+ * qualified names handling is problematic, having a
+ * different prefix should be possible but DTDs don't
+ * allow to define the URI instead of the prefix :-(
+ */
+ if (nameSpace == NULL) {
+ if (qualified < 0)
+ qualified = 0;
+ } else if (xmlStrcmp(nameSpace->prefix, attr->prefix)) {
+ if (qualified < 1)
+ qualified = 1;
+ } else
+ goto found;
+ } else {
+ /*
+ * We should allow applications to define namespaces
+ * for their application even if the DTD doesn't
+ * carry one, otherwise, basically we would always
+ * break.
+ */
+ goto found;
+ }
+ }
+ attrib = attrib->next;
+ }
+ if (qualified == -1) {
+ if (attr->prefix == NULL) {
+ VERROR(ctxt->userData,
+ "Element %s doesn't carry attribute %s\n",
+ elem->name, attr->name);
+ } else {
+ VERROR(ctxt->userData,
+ "Element %s doesn't carry attribute %s:%s\n",
+ elem->name, attr->prefix,attr->name);
+ }
+ } else if (qualified == 0) {
+ VWARNING(ctxt->userData,
+ "Element %s required attribute %s:%s has no prefix\n",
+ elem->name, attr->prefix,attr->name);
+ } else if (qualified == 1) {
+ VWARNING(ctxt->userData,
+ "Element %s required attribute %s:%s has different prefix\n",
+ elem->name, attr->prefix,attr->name);
+ }
+ }
+found:
+ attr = attr->next;
+ }
return(ret);
}
diff --git a/xmlIO.c b/xmlIO.c
index c7c9a86..cd3ead0 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -68,6 +68,11 @@
}
memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
ret->buffer = xmlBufferCreate();
+ if (ret->buffer == NULL) {
+ xmlFree(ret);
+ return(NULL);
+ }
+ ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
ret->encoder = xmlGetCharEncodingHandler(enc);
ret->fd = -1;
ret->netIO = NULL;
@@ -263,32 +268,30 @@
*/
int
xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
- char *buffer = NULL;
int nbchars = 0;
if (len < 0) return(0);
if (in->encoder != NULL) {
- xmlChar *buf;
+ xmlChar *buffer;
- buf = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
- if (buf == NULL) {
+ buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
+ if (buffer == NULL) {
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
xmlFree(buffer);
return(-1);
}
- nbchars = in->encoder->input(buf, (len + 1) * 2 * sizeof(xmlChar),
- BAD_CAST buffer, len);
+ nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
+ (xmlChar *) buf, len);
/*
* TODO : we really need to have something atomic or the
* encoder must report the number of bytes read
*/
- buf[nbchars] = 0;
- xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
- xmlFree(buf);
- } else {
- nbchars = len;
buffer[nbchars] = 0;
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
+ xmlFree(buffer);
+ } else {
+ nbchars = len;
+ xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
}
#ifdef DEBUG_INPUT
fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n",
@@ -401,7 +404,14 @@
int
xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
/* xmlBufferEmpty(in->buffer); */
- return(xmlParserInputBufferGrow(in, len));
+ if ((in->netIO != NULL) || (in->file != NULL) ||
+#ifdef HAVE_ZLIB_H
+ (in->gzfile != NULL) ||
+#endif
+ (in->fd >= 0))
+ return(xmlParserInputBufferGrow(in, len));
+ else
+ return(0);
}
/*
diff --git a/xmlIO.h b/xmlIO.h
index 2bdba5d..bf43de2 100644
--- a/xmlIO.h
+++ b/xmlIO.h
@@ -38,6 +38,9 @@
*/
xmlParserInputBufferPtr
+ xmlAllocParserInputBuffer (xmlCharEncoding enc);
+
+xmlParserInputBufferPtr
xmlParserInputBufferCreateFilename (const char *filename,
xmlCharEncoding enc);
xmlParserInputBufferPtr
diff --git a/xmlmemory.c b/xmlmemory.c
index 7076f80..2d46f38 100644
--- a/xmlmemory.c
+++ b/xmlmemory.c
@@ -369,9 +369,52 @@
}
/**
+ * xmlMemShow:
+ * @fp: a FILE descriptor used as the output file
+ * @nr: number of entries to dump
+ *
+ * show a show display of the memory allocated, and dump
+ * the @nr last allocated areas which were not freed
+ */
+
+void
+xmlMemShow(FILE *fp, int nr)
+{
+#ifdef MEM_LIST
+ MEMHDR *p;
+#endif
+
+ if (fp != NULL)
+ fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n",
+ debugMemSize, debugMaxMemSize);
+#ifdef MEM_LIST
+ if (nr > 0) {
+ fprintf(fp,"NUMBER SIZE TYPE WHERE\n");
+ p = memlist;
+ while ((p) && nr > 0) {
+ fprintf(fp,"%6lu %6u ",p->mh_number,p->mh_size);
+ switch (p->mh_type) {
+ case STRDUP_TYPE:fprintf(fp,"strdup() in ");break;
+ case MALLOC_TYPE:fprintf(fp,"malloc() in ");break;
+ case REALLOC_TYPE:fprintf(fp,"realloc() in ");break;
+ default:fprintf(fp," ??? in ");break;
+ }
+ if (p->mh_file != NULL)
+ fprintf(fp,"%s(%d)", p->mh_file, p->mh_line);
+ if (p->mh_tag != MEMTAG)
+ fprintf(fp," INVALID");
+ fprintf(fp,"\n");
+ nr--;
+ p = p->mh_next;
+ }
+ }
+#endif /* MEM_LIST */
+}
+
+/**
* xmlMemDisplay:
* @fp: a FILE descriptor used as the output file, if NULL, the result is
- 8 written to the file .memorylist
+ * written to the file .memorylist
*
* show in-extenso the memory blocks allocated
*/
diff --git a/xmlmemory.h b/xmlmemory.h
index 5c1b477..64477a1 100644
--- a/xmlmemory.h
+++ b/xmlmemory.h
@@ -1,5 +1,5 @@
/*
- * memory.h: interface for the memory allocation debug.
+ * xmlmemory.h: interface for the memory allocation debug.
*
* Daniel.Veillard@w3.org
*/
@@ -24,6 +24,7 @@
#define xmlInitMemory()
#define xmlMemoryDump()
#define xmlMemDisplay(x)
+#define xmlMemShow(x, d)
#else /* ! NO_DEBUG_MEMORY */
#include <stdio.h>
@@ -51,6 +52,7 @@
char * xmlMemStrdup (const char *str);
int xmlMemUsed (void);
void xmlMemDisplay (FILE *fp);
+void xmlMemShow (FILE *fp, int nr);
void xmlMemoryDump (void);
int xmlInitMemory (void);
diff --git a/xpath.c b/xpath.c
index c5ce36a..4882b04 100644
--- a/xpath.c
+++ b/xpath.c
@@ -47,6 +47,10 @@
#include "xpath.h"
#include "parserInternals.h"
+/* #define DEBUG */
+/* #define DEBUG_STEP */
+/* #define DEBUG_EXPR */
+
/*
* Setup stuff for floating point
* The lack of portability of this section of the libc is annoying !
@@ -151,10 +155,6 @@
initialized = 1;
}
-/* #define DEBUG */
-/* #define DEBUG_STEP */
-/* #define DEBUG_EXPR */
-
FILE *xmlXPathDebug = NULL;
#define TODO \
@@ -748,6 +748,22 @@
}
/**
+ * xmlXPathFreeNodeSetList:
+ * @obj: an existing NodeSetList object
+ *
+ * Free up the xmlXPathObjectPtr @obj but don't deallocate the objects in
+ * the list contrary to xmlXPathFreeObject().
+ */
+void
+xmlXPathFreeNodeSetList(xmlXPathObjectPtr obj) {
+ if (obj == NULL) return;
+#ifdef DEBUG
+ memset(obj, 0xB , (size_t) sizeof(xmlXPathObject));
+#endif
+ xmlFree(obj);
+}
+
+/**
* xmlXPathFreeObject:
* @obj: the object to free
*
@@ -791,6 +807,12 @@
}
memset(ret, 0 , (size_t) sizeof(xmlXPathContext));
ret->doc = doc;
+ /***********
+ ret->node = (xmlNodePtr) doc;
+ ret->nodelist = xmlXPathNodeSetCreate(ret->node);
+ ***********/
+ ret->node = NULL;
+ ret->nodelist = NULL;
ret->nb_variables = 0;
ret->max_variables = 0;
@@ -825,6 +847,10 @@
if (ctxt->namespaces != NULL)
xmlFree(ctxt->namespaces);
+ /***********
+ if (ctxt->nodelist != NULL)
+ xmlXPathFreeNodeSet(ctxt->nodelist);
+ ***********/
#ifdef DEBUG
memset(ctxt, 0xB , (size_t) sizeof(xmlXPathContext));
#endif
@@ -1467,12 +1493,13 @@
xmlNodePtr
xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
if (cur == NULL) {
- if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc)
- return(ctxt->context->doc->root);
+ if ((ctxt->context->node->type == XML_DOCUMENT_NODE) ||
+ (ctxt->context->node->type == XML_HTML_DOCUMENT_NODE))
+ return(((xmlDocPtr) ctxt->context->node)->root);
return(ctxt->context->node->childs);
}
- if ((ctxt->context->node->type == XML_DOCUMENT_NODE) ||
- (ctxt->context->node->type == XML_HTML_DOCUMENT_NODE))
+ if ((cur->type == XML_DOCUMENT_NODE) ||
+ (cur->type == XML_HTML_DOCUMENT_NODE))
return(NULL);
return(cur->next);
}
@@ -1918,23 +1945,23 @@
ctxt->context->nodelist->nodeNr);
switch (test) {
case NODE_TEST_NONE:
- fprintf(xmlXPathDebug, " seaching for none !!!\n");
+ fprintf(xmlXPathDebug, " searching for none !!!\n");
break;
case NODE_TEST_TYPE:
- fprintf(xmlXPathDebug, " seaching for type %d\n", type);
+ fprintf(xmlXPathDebug, " searching for type %d\n", type);
break;
case NODE_TEST_PI:
- fprintf(xmlXPathDebug, " seaching for PI !!!\n");
+ fprintf(xmlXPathDebug, " searching for PI !!!\n");
break;
case NODE_TEST_ALL:
- fprintf(xmlXPathDebug, " seaching for *\n");
+ fprintf(xmlXPathDebug, " searching for *\n");
break;
case NODE_TEST_NS:
- fprintf(xmlXPathDebug, " seaching for namespace %s\n",
+ fprintf(xmlXPathDebug, " searching for namespace %s\n",
prefix);
break;
case NODE_TEST_NAME:
- fprintf(xmlXPathDebug, " seaching for name %s\n", name);
+ fprintf(xmlXPathDebug, " searching for name %s\n", name);
if (prefix != NULL)
fprintf(xmlXPathDebug, " with namespace %s\n",
prefix);
@@ -1958,7 +1985,10 @@
STRANGE
return(NULL);
case NODE_TEST_TYPE:
- if (cur->type == type) {
+ if ((cur->type == type) ||
+ ((type == XML_ELEMENT_NODE) &&
+ ((cur->type == XML_DOCUMENT_NODE) ||
+ (cur->type == XML_HTML_DOCUMENT_NODE)))) {
#ifdef DEBUG_STEP
n++;
#endif
@@ -4400,6 +4430,7 @@
xmlXPathEval(const xmlChar *str, xmlXPathContextPtr ctxt) {
xmlXPathParserContextPtr pctxt;
xmlXPathObjectPtr res = NULL, tmp;
+ int stack = 0;
xmlXPathInit();
@@ -4408,16 +4439,26 @@
if (xmlXPathDebug == NULL)
xmlXPathDebug = stderr;
pctxt = xmlXPathNewParserContext(str, ctxt);
+ if (str[0] == '/')
+ xmlXPathRoot(pctxt);
xmlXPathEvalLocationPath(pctxt);
/* TODO: cleanup nodelist, res = valuePop(pctxt); */
do {
tmp = valuePop(pctxt);
- if (tmp != NULL);
+ if (tmp != NULL) {
xmlXPathFreeObject(tmp);
+ stack++;
+ }
} while (tmp != NULL);
- if (res == NULL)
+ if (stack != 0) {
+ fprintf(xmlXPathDebug, "xmlXPathEval: %d object left on the stack\n",
+ stack);
+ }
+ if (pctxt->error == XPATH_EXPRESSION_OK)
res = xmlXPathNewNodeSetList(pctxt->context->nodelist);
+ else
+ res = NULL;
xmlXPathFreeParserContext(pctxt);
return(res);
}
@@ -4436,6 +4477,7 @@
xmlXPathEvalExpression(const xmlChar *str, xmlXPathContextPtr ctxt) {
xmlXPathParserContextPtr pctxt;
xmlXPathObjectPtr res, tmp;
+ int stack = 0;
xmlXPathInit();
@@ -4449,9 +4491,15 @@
res = valuePop(pctxt);
do {
tmp = valuePop(pctxt);
- if (tmp != NULL);
+ if (tmp != NULL) {
xmlXPathFreeObject(tmp);
+ stack++;
+ }
} while (tmp != NULL);
+ if (stack != 0) {
+ fprintf(xmlXPathDebug, "xmlXPathEval: %d object left on the stack\n",
+ stack);
+ }
xmlXPathFreeParserContext(pctxt);
return(res);
}
diff --git a/xpath.h b/xpath.h
index 149b0be..84c8305 100644
--- a/xpath.h
+++ b/xpath.h
@@ -205,6 +205,9 @@
void xmlXPathFreeObject (xmlXPathObjectPtr obj);
xmlXPathObjectPtr xmlXPathEvalExpression (const xmlChar *str,
xmlXPathContextPtr ctxt);
+xmlNodeSetPtr xmlXPathNodeSetCreate (xmlNodePtr val);
+void xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj);
+void xmlXPathFreeNodeSet (xmlNodeSetPtr obj);
#ifdef __cplusplus
}