- release 1.8.2 - HTML handling improvement - new tree handling functions
- release 1.8.2
- HTML handling improvement
- new tree handling functions
- default namespace on attribute bug fixed
- libxml use for C++ fixed (for good this time !)
Daniel
diff --git a/doc/html/gnome-xml-htmlparser.html b/doc/html/gnome-xml-htmlparser.html
index 1f9c48c..02ffac5 100644
--- a/doc/html/gnome-xml-htmlparser.html
+++ b/doc/html/gnome-xml-htmlparser.html
@@ -115,7 +115,7 @@
><DIV
CLASS="REFNAMEDIV"
><A
-NAME="AEN7547"
+NAME="AEN7656"
></A
><H2
>Name</H2
@@ -123,7 +123,7 @@
><DIV
CLASS="REFSYNOPSISDIV"
><A
-NAME="AEN7550"
+NAME="AEN7659"
></A
><H2
>Synopsis</H2
@@ -138,10 +138,6 @@
CLASS="SYNOPSIS"
>
-#define <A
-HREF="gnome-xml-sax.html#EXTERN"
->extern</A
->
typedef <A
HREF="gnome-xml-htmlparser.html#HTMLPARSERCTXT"
>htmlParserCtxt</A
@@ -198,6 +194,32 @@
HREF="gnome-xml-tree.html#XMLCHAR"
>xmlChar</A
> *name);
+int <A
+HREF="gnome-xml-htmlparser.html#HTMLISAUTOCLOSED"
+>htmlIsAutoClosed</A
+> (<A
+HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
+>htmlDocPtr</A
+> doc,
+ <A
+HREF="gnome-xml-htmlparser.html#HTMLNODEPTR"
+>htmlNodePtr</A
+> elem);
+int <A
+HREF="gnome-xml-htmlparser.html#HTMLAUTOCLOSETAG"
+>htmlAutoCloseTag</A
+> (<A
+HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
+>htmlDocPtr</A
+> doc,
+ const <A
+HREF="gnome-xml-tree.html#XMLCHAR"
+>xmlChar</A
+> *name,
+ <A
+HREF="gnome-xml-htmlparser.html#HTMLNODEPTR"
+>htmlNodePtr</A
+> elem);
<GTKDOCLINK
HREF="HTMLENTITYDESCPTR"
>htmlEntityDescPtr</GTKDOCLINK
@@ -281,7 +303,7 @@
><DIV
CLASS="REFSECT1"
><A
-NAME="AEN7589"
+NAME="AEN7704"
></A
><H2
>Description</H2
@@ -291,274 +313,131 @@
><DIV
CLASS="REFSECT1"
><A
-NAME="AEN7592"
+NAME="AEN7707"
></A
><H2
>Details</H2
><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7594"
-></A
-><H3
-><A
-NAME="EXTERN"
-></A
->extern</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->#define extern</PRE
-></TD
-></TR
-></TABLE
-><P
-></P
-></DIV
-><HR><DIV
-CLASS="REFSECT2"
-><A
-NAME="AEN7599"
+NAME="AEN7709"
></A
><H3
><A
NAME="HTMLPARSERCTXT"
></A
>htmlParserCtxt</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlParserCtxt htmlParserCtxt;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7604"
+NAME="AEN7713"
></A
><H3
><A
NAME="HTMLPARSERCTXTPTR"
></A
>htmlParserCtxtPtr</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlParserCtxtPtr htmlParserCtxtPtr;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7609"
+NAME="AEN7717"
></A
><H3
><A
NAME="HTMLPARSERNODEINFO"
></A
>htmlParserNodeInfo</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlParserNodeInfo htmlParserNodeInfo;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7614"
+NAME="AEN7721"
></A
><H3
><A
NAME="HTMLSAXHANDLER"
></A
>htmlSAXHandler</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlSAXHandler htmlSAXHandler;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7619"
+NAME="AEN7725"
></A
><H3
><A
NAME="HTMLSAXHANDLERPTR"
></A
>htmlSAXHandlerPtr</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7624"
+NAME="AEN7729"
></A
><H3
><A
NAME="HTMLPARSERINPUT"
></A
>htmlParserInput</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlParserInput htmlParserInput;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7629"
+NAME="AEN7733"
></A
><H3
><A
NAME="HTMLPARSERINPUTPTR"
></A
>htmlParserInputPtr</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlParserInputPtr htmlParserInputPtr;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7634"
+NAME="AEN7737"
></A
><H3
><A
NAME="HTMLDOCPTR"
></A
>htmlDocPtr</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlDocPtr htmlDocPtr;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7639"
+NAME="AEN7741"
></A
><H3
><A
NAME="HTMLNODEPTR"
></A
>htmlNodePtr</H3
-><TABLE
-BORDER="0"
-BGCOLOR="#D6E8FF"
-WIDTH="100%"
-CELLPADDING="6"
-><TR
-><TD
-><PRE
-CLASS="PROGRAMLISTING"
->typedef xmlNodePtr htmlNodePtr;</PRE
-></TD
-></TR
-></TABLE
><P
></P
></DIV
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7644"
+NAME="AEN7745"
></A
><H3
><A
@@ -614,7 +493,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> The tag name</TD
+> </TD
></TR
><TR
><TD
@@ -629,7 +508,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the related htmlElemDescPtr or NULL if not found.</TD
+> </TD
></TR
></TABLE
><P
@@ -639,7 +518,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7665"
+NAME="AEN7766"
></A
><H3
><A
@@ -697,7 +576,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> the entity name</TD
+> </TD
></TR
><TR
><TD
@@ -712,7 +591,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the associated htmlEntityDescPtr if found, NULL otherwise.</TD
+> </TD
></TR
></TABLE
><P
@@ -722,7 +601,231 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7687"
+NAME="AEN7788"
+></A
+><H3
+><A
+NAME="HTMLISAUTOCLOSED"
+></A
+>htmlIsAutoClosed ()</H3
+><TABLE
+BORDER="0"
+BGCOLOR="#D6E8FF"
+WIDTH="100%"
+CELLPADDING="6"
+><TR
+><TD
+><PRE
+CLASS="PROGRAMLISTING"
+>int htmlIsAutoClosed (<A
+HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
+>htmlDocPtr</A
+> doc,
+ <A
+HREF="gnome-xml-htmlparser.html#HTMLNODEPTR"
+>htmlNodePtr</A
+> elem);</PRE
+></TD
+></TR
+></TABLE
+><P
+>The HTmL DtD allows a tag to implicitely close other tags.
+The list is kept in htmlStartClose array. This function checks
+if a tag is autoclosed by one of it's child</P
+><P
+></P
+><DIV
+CLASS="INFORMALTABLE"
+><P
+></P
+><TABLE
+BORDER="0"
+WIDTH="100%"
+BGCOLOR="#FFD0D0"
+CELLSPACING="0"
+CELLPADDING="4"
+CLASS="CALSTABLE"
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><TT
+CLASS="PARAMETER"
+><I
+>doc</I
+></TT
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><TT
+CLASS="PARAMETER"
+><I
+>elem</I
+></TT
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><I
+CLASS="EMPHASIS"
+>Returns</I
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+></TABLE
+><P
+></P
+></DIV
+></DIV
+><HR><DIV
+CLASS="REFSECT2"
+><A
+NAME="AEN7813"
+></A
+><H3
+><A
+NAME="HTMLAUTOCLOSETAG"
+></A
+>htmlAutoCloseTag ()</H3
+><TABLE
+BORDER="0"
+BGCOLOR="#D6E8FF"
+WIDTH="100%"
+CELLPADDING="6"
+><TR
+><TD
+><PRE
+CLASS="PROGRAMLISTING"
+>int htmlAutoCloseTag (<A
+HREF="gnome-xml-htmlparser.html#HTMLDOCPTR"
+>htmlDocPtr</A
+> doc,
+ const <A
+HREF="gnome-xml-tree.html#XMLCHAR"
+>xmlChar</A
+> *name,
+ <A
+HREF="gnome-xml-htmlparser.html#HTMLNODEPTR"
+>htmlNodePtr</A
+> elem);</PRE
+></TD
+></TR
+></TABLE
+><P
+>The HTmL DtD allows a tag to implicitely close other tags.
+The list is kept in htmlStartClose array. This function checks
+if the element or one of it's children would autoclose the
+given tag.</P
+><P
+></P
+><DIV
+CLASS="INFORMALTABLE"
+><P
+></P
+><TABLE
+BORDER="0"
+WIDTH="100%"
+BGCOLOR="#FFD0D0"
+CELLSPACING="0"
+CELLPADDING="4"
+CLASS="CALSTABLE"
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><TT
+CLASS="PARAMETER"
+><I
+>doc</I
+></TT
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><TT
+CLASS="PARAMETER"
+><I
+>name</I
+></TT
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><TT
+CLASS="PARAMETER"
+><I
+>elem</I
+></TT
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+><TR
+><TD
+WIDTH="20%"
+ALIGN="RIGHT"
+VALIGN="TOP"
+><I
+CLASS="EMPHASIS"
+>Returns</I
+> :</TD
+><TD
+WIDTH="80%"
+ALIGN="LEFT"
+VALIGN="TOP"
+> </TD
+></TR
+></TABLE
+><P
+></P
+></DIV
+></DIV
+><HR><DIV
+CLASS="REFSECT2"
+><A
+NAME="AEN7843"
></A
><H3
><A
@@ -784,7 +887,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> an HTML parser context</TD
+> </TD
></TR
><TR
><TD
@@ -801,7 +904,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> location to store the entity name</TD
+> </TD
></TR
><TR
><TD
@@ -816,8 +919,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the associated htmlEntityDescPtr if found, or NULL otherwise,
-if non-NULL *str will have to be freed by the caller.</TD
+> </TD
></TR
></TABLE
><P
@@ -827,7 +929,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7714"
+NAME="AEN7870"
></A
><H3
><A
@@ -886,7 +988,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> an HTML parser context</TD
+> </TD
></TR
><TR
><TD
@@ -901,7 +1003,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the value parsed (as an int)</TD
+> </TD
></TR
></TABLE
><P
@@ -911,7 +1013,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7736"
+NAME="AEN7892"
></A
><H3
><A
@@ -968,7 +1070,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> an HTML parser context</TD
+> </TD
></TR
></TABLE
><P
@@ -978,7 +1080,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7754"
+NAME="AEN7910"
></A
><H3
><A
@@ -1042,7 +1144,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> a pointer to an array of xmlChar</TD
+> </TD
></TR
><TR
><TD
@@ -1059,7 +1161,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> a free form C string describing the HTML document encoding, or NULL</TD
+> </TD
></TR
><TR
><TD
@@ -1076,7 +1178,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> the SAX handler block</TD
+> </TD
></TR
><TR
><TD
@@ -1093,7 +1195,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> if using SAX, this pointer will be provided on callbacks. </TD
+> </TD
></TR
><TR
><TD
@@ -1108,7 +1210,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the resulting document tree</TD
+> </TD
></TR
></TABLE
><P
@@ -1118,7 +1220,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7788"
+NAME="AEN7944"
></A
><H3
><A
@@ -1175,7 +1277,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> a pointer to an array of xmlChar</TD
+> </TD
></TR
><TR
><TD
@@ -1192,7 +1294,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> a free form C string describing the HTML document encoding, or NULL</TD
+> </TD
></TR
><TR
><TD
@@ -1207,7 +1309,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the resulting document tree</TD
+> </TD
></TR
></TABLE
><P
@@ -1217,7 +1319,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7813"
+NAME="AEN7969"
></A
><H3
><A
@@ -1279,7 +1381,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> the filename</TD
+> </TD
></TR
><TR
><TD
@@ -1296,7 +1398,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> a free form C string describing the HTML document encoding, or NULL</TD
+> </TD
></TR
><TR
><TD
@@ -1313,7 +1415,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> the SAX handler block</TD
+> </TD
></TR
><TR
><TD
@@ -1330,7 +1432,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> if using SAX, this pointer will be provided on callbacks. </TD
+> </TD
></TR
><TR
><TD
@@ -1345,7 +1447,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the resulting document tree</TD
+> </TD
></TR
></TABLE
><P
@@ -1355,7 +1457,7 @@
><HR><DIV
CLASS="REFSECT2"
><A
-NAME="AEN7846"
+NAME="AEN8002"
></A
><H3
><A
@@ -1410,7 +1512,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> the filename</TD
+> </TD
></TR
><TR
><TD
@@ -1427,7 +1529,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
-> a free form C string describing the HTML document encoding, or NULL</TD
+> </TD
></TR
><TR
><TD
@@ -1442,7 +1544,7 @@
WIDTH="80%"
ALIGN="LEFT"
VALIGN="TOP"
->the resulting document tree</TD
+> </TD
></TR
></TABLE
><P