Added the Expand() and Next() operation to work on subtrees within the * xmlreader.c include/libxml/xmlreader.h: Added the Expand() and Next() operation to work on subtrees within the reader framework. * doc/libxml2-api.xml python/libxml2class.txt: resulting updates * python/tests/reader5.py: added an example for those new functions of the reader. Daniel

commit: c6cae7b690c1ded6f03a378eb59025522cc40105 [log] [tgz]
author: Daniel Veillard <veillard@src.gnome.org> Fri Apr 11 09:02:11 2003 +0000
committer: Daniel Veillard <veillard@src.gnome.org> Fri Apr 11 09:02:11 2003 +0000
tree: 356e79aa74f84182ac5231d41ef4e724b6bc34ef
parent: 645c690d49e812be1088e9f00fa8eded92edc635 [diff]
diff --git a/ChangeLog b/ChangeLog
index a9f69d5..882daed 100644
--- a/ChangeLog
+++ b/ChangeLog

@@ -1,3 +1,12 @@
+Fri Apr 11 10:59:24 CEST 2003 Daniel Veillard <daniel@veillard.com>
+
+	* xmlreader.c include/libxml/xmlreader.h: Added the Expand()
+	  and Next() operation to work on subtrees within the reader
+	  framework.
+	* doc/libxml2-api.xml python/libxml2class.txt: resulting updates
+	* python/tests/reader5.py: added an example for those new
+	  functions of the reader.
+
 Thu Apr 10 23:38:13 CEST 2003 Daniel Veillard <daniel@veillard.com>
 
 	* HTMLtree.c: patch from Vasily Tchekalkin to fix #109865

diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml
index 64cfa0b..6ee16a6 100644
--- a/doc/libxml2-api.xml
+++ b/doc/libxml2-api.xml

@@ -883,7 +883,6 @@
      <exports symbol='xmlRemoveID'/>
      <exports symbol='xmlRemoveRef'/>
      <exports symbol='xmlSnprintfElementContent'/>
-     <exports symbol='xmlSplitQName2'/>
      <exports symbol='xmlSprintfElementContent'/>
      <exports symbol='xmlValidBuildContentModel'/>
      <exports symbol='xmlValidCtxt'/>
@@ -929,6 +928,7 @@
      <exports symbol='_htmlEntityDesc'/>
      <exports symbol='htmlAttrAllowed'/>
      <exports symbol='htmlAutoCloseTag'/>
+     <exports symbol='htmlCreateMemoryParserCtxt'/>
      <exports symbol='htmlCreatePushParserCtxt'/>
      <exports symbol='htmlDefaultSubelement'/>
      <exports symbol='htmlDocPtr'/>
@@ -1127,6 +1127,7 @@
      <exports symbol='xmlTextReaderCurrentNode'/>
      <exports symbol='xmlTextReaderDepth'/>
      <exports symbol='xmlTextReaderErrorFunc'/>
+     <exports symbol='xmlTextReaderExpand'/>
      <exports symbol='xmlTextReaderGetAttribute'/>
      <exports symbol='xmlTextReaderGetAttributeNo'/>
      <exports symbol='xmlTextReaderGetAttributeNs'/>
@@ -1150,6 +1151,7 @@
      <exports symbol='xmlTextReaderMoveToNextAttribute'/>
      <exports symbol='xmlTextReaderName'/>
      <exports symbol='xmlTextReaderNamespaceUri'/>
+     <exports symbol='xmlTextReaderNext'/>
      <exports symbol='xmlTextReaderNodeType'/>
      <exports symbol='xmlTextReaderNormalization'/>
      <exports symbol='xmlTextReaderPrefix'/>
@@ -1552,6 +1554,7 @@
      <exports symbol='xmlBufferWriteCHAR'/>
      <exports symbol='xmlBufferWriteChar'/>
      <exports symbol='xmlBufferWriteQuotedString'/>
+     <exports symbol='xmlBuildQName'/>
      <exports symbol='xmlChar'/>
      <exports symbol='xmlChildrenNode'/>
      <exports symbol='xmlCopyDoc'/>
@@ -1701,6 +1704,7 @@
      <exports symbol='xmlSetNsProp'/>
      <exports symbol='xmlSetProp'/>
      <exports symbol='xmlSetTreeDoc'/>
+     <exports symbol='xmlSplitQName2'/>
      <exports symbol='xmlStringGetNodeList'/>
      <exports symbol='xmlStringLenGetNodeList'/>
      <exports symbol='xmlTextConcat'/>
@@ -3633,6 +3637,12 @@
       <arg name='filename' type='const char *' info='the filename'/>
       <arg name='encoding' type='const char *' info='a free form C string describing the HTML document encoding, or NULL'/>
     </function>
+    <function name='htmlCreateMemoryParserCtxt' file='HTMLparser'>
+      <info>Create a parser context for an HTML in-memory document.</info>
+      <return type='htmlParserCtxtPtr' info='the new parser context or NULL'/>
+      <arg name='buffer' type='const char *' info='a pointer to a char array'/>
+      <arg name='size' type='int' info='the size of the array'/>
+    </function>
     <function name='htmlCreatePushParserCtxt' file='HTMLparser'>
       <info>Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.</info>
       <return type='htmlParserCtxtPtr' info='the new parser context or NULL'/>
@@ -4560,6 +4570,14 @@
       <arg name='buf' type='xmlBufferPtr' info='the XML buffer output'/>
       <arg name='string' type='const xmlChar *' info='the string to add'/>
     </function>
+    <function name='xmlBuildQName' file='tree'>
+      <info>Builds the QName @prefix:@ncname in @memory if there is enough space and prefix is not NULL nor empty, otherwise allocate a new string. If prefix is NULL or empty it returns ncname.</info>
+      <return type='xmlChar *' info='the new string which must be freed by the caller if different from @memory and @ncname or NULL in case of error'/>
+      <arg name='ncname' type='const xmlChar *' info='the Name'/>
+      <arg name='prefix' type='const xmlChar *' info='the prefix'/>
+      <arg name='memory' type='xmlChar *' info='preallocated memory'/>
+      <arg name='len' type='int' info='preallocated memory length'/>
+    </function>
     <function name='xmlBuildURI' file='uri'>
       <info>Computes he final URI of the reference done by checking that the given URI is valid, and building the final URI using the base URI. This is processed according to section 5.2 of the RFC 2396  5.2. Resolving Relative References to Absolute Form</info>
       <return type='xmlChar *' info='a new URI string (to be freed by the caller) or NULL in case of error.'/>
@@ -8076,10 +8094,10 @@
       <arg name='name' type='const xmlChar *' info='an XML parser context'/>
       <arg name='prefix' type='xmlChar **' info='a xmlChar **'/>
     </function>
-    <function name='xmlSplitQName2' file='valid'>
+    <function name='xmlSplitQName2' file='tree'>
       <info>parse an XML qualified name string  [NS 5] QName ::= (Prefix &apos;:&apos;)? LocalPart  [NS 6] Prefix ::= NCName  [NS 7] LocalPart ::= NCName</info>
       <return type='xmlChar *' info='NULL if not a QName, otherwise the local part, and prefix is updated to get the Prefix if any.'/>
-      <arg name='name' type='const xmlChar *' info='an XML parser context'/>
+      <arg name='name' type='const xmlChar *' info='the full QName'/>
       <arg name='prefix' type='xmlChar **' info='a xmlChar **'/>
     </function>
     <function name='xmlSprintfElementContent' file='valid'>
@@ -8283,6 +8301,11 @@
       <arg name='severity' type='xmlParserSeverities' info=''/>
       <arg name='locator' type='xmlTextReaderLocatorPtr' info=''/>
     </functype>
+    <function name='xmlTextReaderExpand' file='xmlreader'>
+      <info>Reads the contents of the current node and the full subtree. It then makes the subtree availsble until the next xmlTextReaderRead() call</info>
+      <return type='xmlNodePtr' info='a node pointer valid until the next xmlTextReaderRead() call or NULL in case of error.'/>
+      <arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
+    </function>
     <function name='xmlTextReaderGetAttribute' file='xmlreader'>
       <info>Provides the value of the attribute with the specified qualified name.</info>
       <return type='xmlChar *' info='a string containing the value of the specified attribute, or NULL in case of error. The string must be deallocated by the caller.'/>
@@ -8405,6 +8428,11 @@
       <return type='xmlChar *' info='the namespace URI or NULL if not available'/>
       <arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
     </function>
+    <function name='xmlTextReaderNext' file='xmlreader'>
+      <info>Skip to the node following the current one in document order while avoiding the subtree if any.</info>
+      <return type='int' info='1 if the node was read successfully, 0 if there is no more nodes to read, or -1 in case of error'/>
+      <arg name='reader' type='xmlTextReaderPtr' info='the xmlTextReaderPtr used'/>
+    </function>
     <function name='xmlTextReaderNodeType' file='xmlreader'>
       <info>Get the node type of the current node Reference: http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html</info>
       <return type='int' info='the xmlNodeType of the current node or -1 in case of error'/>

diff --git a/include/libxml/xmlreader.h b/include/libxml/xmlreader.h
index e9475a7..f035258 100644
--- a/include/libxml/xmlreader.h
+++ b/include/libxml/xmlreader.h

@@ -106,6 +106,8 @@
 						 int prop);
 xmlNodePtr	xmlTextReaderCurrentNode	(xmlTextReaderPtr reader);
 xmlDocPtr	xmlTextReaderCurrentDoc		(xmlTextReaderPtr reader);
+xmlNodePtr	xmlTextReaderExpand		(xmlTextReaderPtr reader);
+int		xmlTextReaderNext		(xmlTextReaderPtr reader);
 
 /*
  * Error handling extensions

diff --git a/python/libxml2class.txt b/python/libxml2class.txt
index 92c6601..950397d 100644
--- a/python/libxml2class.txt
+++ b/python/libxml2class.txt

@@ -6,6 +6,7 @@
 
 
 # functions from module HTMLparser
+htmlCreateMemoryParserCtxt()
 htmlHandleOmittedElem()
 htmlIsScriptAttribute()
 htmlParseDoc()
@@ -132,6 +133,7 @@
 relaxNGNewParserCtxt()
 
 # functions from module tree
+buildQName()
 compressMode()
 isXHTML()
 newComment()
@@ -594,6 +596,7 @@
     CurrentDoc()
     CurrentNode()
     Depth()
+    Expand()
     GetAttribute()
     GetAttributeNo()
     GetAttributeNs()
@@ -613,6 +616,7 @@
     MoveToNextAttribute()
     Name()
     NamespaceUri()
+    Next()
     NodeType()
     Normalization()
     Prefix()

diff --git a/python/tests/reader5.py b/python/tests/reader5.py
new file mode 100755
index 0000000..fbfe4a6
--- /dev/null
+++ b/python/tests/reader5.py

@@ -0,0 +1,48 @@
+#!/usr/bin/python -u
+#
+# this tests the Expand() API of the xmlTextReader interface
+# this extract the Dragon bibliography entries from the XML specification
+#
+import libxml2
+import StringIO
+import sys
+
+# Memory debug specific
+libxml2.debugMemory(1)
+
+expect="""<bibl id="Aho" key="Aho/Ullman">Aho, Alfred V., 
+Ravi Sethi, and Jeffrey D. Ullman.
+<emph>Compilers:  Principles, Techniques, and Tools</emph>.
+Reading:  Addison-Wesley, 1986, rpt. corr. 1988.</bibl>"""
+
+f = open('../../test/valid/REC-xml-19980210.xml')
+input = libxml2.inputBuffer(f)
+reader = input.newTextReader("REC")
+res=""
+while reader.Read():
+    while reader.Name() == 'bibl':
+        node = reader.Expand()            # expand the subtree
+	if node.xpathEval("@id = 'Aho'"): # use XPath on it
+	    res = res + node.serialize()
+	if reader.Next() != 1:            # skip the subtree
+	    break;
+
+if res != expect:
+    print "Error: didn't get the expected output"
+    print "got '%s'" % (res)
+    print "expected '%s'" % (expect)
+    
+
+#
+# cleanup
+#
+del input
+del reader
+
+# Memory debug specific
+libxml2.cleanupParser()
+if libxml2.debugMemory(1) == 0:
+    print "OK"
+else:
+    print "Memory leak %d bytes" % (libxml2.debugMemory(1))
+    libxml2.dumpMemory()

diff --git a/xmlreader.c b/xmlreader.c
index 64da1bb..529ef56 100644
--- a/xmlreader.c
+++ b/xmlreader.c

@@ -555,6 +555,56 @@
 
 
 /**
+ * xmlTextReaderGetSuccessor:
+ * @cur:  the current node
+ *
+ * Get the successor of a node if available.
+ *
+ * Returns the successor node or NULL
+ */
+static xmlNodePtr
+xmlTextReaderGetSuccessor(xmlNodePtr cur) {
+    if (cur == NULL) return(NULL) ; /* ERROR */
+    if (cur->next != NULL) return(cur->next) ;
+    do {
+        cur = cur->parent;
+        if (cur == NULL) return(NULL);
+        if (cur->next != NULL) return(cur->next);
+    } while (cur != NULL);
+    return(cur);
+}
+
+/**
+ * xmlTextReaderDoExpand:
+ * @reader:  the xmlTextReaderPtr used
+ *
+ * Makes sure that the current node is fully read as well as all its
+ * descendant. It means the full DOM subtree must be available at the
+ * end of the call.
+ *
+ * Returns 1 if the node was expanded successfully, 0 if there is no more
+ *          nodes to read, or -1 in case of error
+ */
+static int
+xmlTextReaderDoExpand(xmlTextReaderPtr reader) {
+    int val;
+
+    if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL))
+        return(-1);
+
+    do {
+        if (xmlTextReaderGetSuccessor(reader->node) != NULL)
+	    return(1);
+	if (reader->mode == XML_TEXTREADER_MODE_EOF)
+	    return(1);
+	val = xmlTextReaderPushData(reader);
+	if (val < 0)
+	    return(-1);
+    } while(reader->mode != XML_TEXTREADER_MODE_EOF);
+    return(1);
+}
+
+/**
  * xmlTextReaderRead:
  * @reader:  the xmlTextReaderPtr used
  *
@@ -804,6 +854,7 @@
 #endif /* LIBXML_REGEXP_ENABLED */
     return(1);
 node_end:
+    reader->mode = XML_TEXTREADER_DONE;
     return(0);
 }
 
@@ -823,6 +874,57 @@
 }
 
 /**
+ * xmlTextReaderExpand:
+ * @reader:  the xmlTextReaderPtr used
+ *
+ * Reads the contents of the current node and the full subtree. It then makes
+ * the subtree availsble until the next xmlTextReaderRead() call
+ *
+ * Returns a node pointer valid until the next xmlTextReaderRead() call
+ *         or NULL in case of error.
+ */
+xmlNodePtr
+xmlTextReaderExpand(xmlTextReaderPtr reader) {
+    if ((reader == NULL) || (reader->node == NULL) || (reader->ctxt == NULL))
+        return(NULL);
+    if (xmlTextReaderDoExpand(reader) < 0)
+        return(NULL);
+    return(reader->node);
+}
+
+/**
+ * xmlTextReaderNext:
+ * @reader:  the xmlTextReaderPtr used
+ *
+ * Skip to the node following the current one in document order while
+ * avoiding the subtree if any.
+ *
+ * Returns 1 if the node was read successfully, 0 if there is no more
+ *          nodes to read, or -1 in case of error
+ */
+int
+xmlTextReaderNext(xmlTextReaderPtr reader) {
+    int ret;
+    xmlNodePtr cur;
+
+    if (reader == NULL)
+	return(-1);
+    cur = reader->node;
+    if ((cur == NULL) || (cur->type != XML_ELEMENT_NODE))
+        return(xmlTextReaderRead(reader));
+    if (reader->state == XML_TEXTREADER_END)
+        return(xmlTextReaderRead(reader));
+    if (cur->_private == (void *)xmlTextReaderIsEmpty)
+        return(xmlTextReaderRead(reader));
+    do {
+        ret = xmlTextReaderRead(reader);
+	if (ret != 1)
+	    return(ret);
+    } while (reader->node != cur);
+    return(xmlTextReaderRead(reader));
+}
+
+/**
  * xmlTextReaderReadInnerXml:
  * @reader:  the xmlTextReaderPtr used
  *
commit	c6cae7b690c1ded6f03a378eb59025522cc40105	[log] [tgz]
author	Daniel Veillard <veillard@src.gnome.org>	Fri Apr 11 09:02:11 2003 +0000
committer	Daniel Veillard <veillard@src.gnome.org>	Fri Apr 11 09:02:11 2003 +0000
tree	356e79aa74f84182ac5231d41ef4e724b6bc34ef
parent	645c690d49e812be1088e9f00fa8eded92edc635 [diff]