Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| 2 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css"> |
| 3 | .synopsis, .classsynopsis { |
| 4 | background: #eeeeee; |
| 5 | border: solid 1px #aaaaaa; |
| 6 | padding: 0.5em; |
| 7 | } |
| 8 | .programlisting { |
| 9 | background: #eeeeff; |
| 10 | border: solid 1px #aaaaff; |
| 11 | padding: 0.5em; |
| 12 | } |
| 13 | .variablelist { |
| 14 | padding: 4px; |
| 15 | margin-left: 3em; |
| 16 | } |
| 17 | .navigation { |
| 18 | background: #ffeeee; |
| 19 | border: solid 1px #ffaaaa; |
| 20 | margin-top: 0.5em; |
| 21 | margin-bottom: 0.5em; |
| 22 | } |
| 23 | .navigation a { |
| 24 | color: #770000; |
| 25 | } |
| 26 | .navigation a:visited { |
| 27 | color: #550000; |
| 28 | } |
| 29 | .navigation .title { |
| 30 | font-size: 200%; |
| 31 | } |
| 32 | </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-xmlerror.html" title="xmlerror"><link rel="next" href="libxml-HTMLtree.html" title="HTMLtree"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-xmlerror.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-HTMLtree.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-HTMLparser"></a><div class="titlepage"></div><div class="refnamediv"><h2>HTMLparser</h2><p>HTMLparser — </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis"> |
| 33 | |
| 34 | |
| 35 | |
| 36 | typedef <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>; |
| 37 | typedef <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>; |
| 38 | typedef <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>; |
| 39 | typedef <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>; |
| 40 | typedef <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>; |
| 41 | typedef <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>; |
| 42 | typedef <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>; |
| 43 | typedef <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>; |
| 44 | typedef <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>; |
| 45 | struct <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>; |
| 46 | typedef <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>; |
| 47 | struct <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>; |
| 48 | typedef <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>; |
| 49 | const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag); |
| 50 | const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name); |
| 51 | const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value); |
| 52 | int <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 53 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem); |
| 54 | int <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 55 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name, |
| 56 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem); |
| 57 | const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 58 | <a href="libxml-tree.html#xmlChar">xmlChar</a> **str); |
| 59 | int <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 60 | void <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 61 | <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a> |
| 62 | (const char *buffer, |
| 63 | int size); |
| 64 | int <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 65 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 66 | const char *encoding, |
| 67 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 68 | void *userData); |
| 69 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 70 | const char *encoding); |
| 71 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a> (const char *filename, |
| 72 | const char *encoding, |
| 73 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 74 | void *userData); |
| 75 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a> (const char *filename, |
| 76 | const char *encoding); |
| 77 | int <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a> (unsigned char *out, |
| 78 | int *outlen, |
| 79 | unsigned char *in, |
| 80 | int *inlen); |
| 81 | int <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char *out, |
| 82 | int *outlen, |
| 83 | unsigned char *in, |
| 84 | int *inlen, |
| 85 | int quoteChar); |
| 86 | int <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name); |
| 87 | int <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val); |
| 88 | void <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 89 | <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 90 | void *user_data, |
| 91 | const char *chunk, |
| 92 | int size, |
| 93 | const char *filename, |
| 94 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); |
| 95 | int <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 96 | const char *chunk, |
| 97 | int size, |
| 98 | int terminate); |
| 99 | </pre></div><div class="refsect1" lang="en"><h2>Description</h2><p> |
| 100 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 101 | </p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 102 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 103 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 104 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 105 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 106 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 107 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 108 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 109 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 110 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 111 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 112 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 113 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 114 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 115 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 116 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 117 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 118 | |
| 119 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc { |
| 120 | const char *name; /* The tag name */ |
| 121 | char startTag; /* Whether the start tag can be implied */ |
| 122 | char endTag; /* Whether the end tag can be implied */ |
| 123 | char saveEndTag; /* Whether the end tag should be saved */ |
| 124 | char empty; /* Is this an empty element ? */ |
| 125 | char depr; /* Is this a deprecated element ? */ |
| 126 | char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ |
| 127 | char isinline; /* is this a block 0 or inline 1 element */ |
| 128 | const char *desc; /* the description */ |
| 129 | |
| 130 | /* NRK Jan.2003 |
| 131 | * New fields encapsulating HTML structure |
| 132 | * |
| 133 | * Bugs: |
| 134 | * This is a very limited representation. It fails to tell us when |
| 135 | * an element *requires* subelements (we only have whether they're |
| 136 | * allowed or not), and it doesn't tell us where CDATA and PCDATA |
| 137 | * are allowed. Some element relationships are not fully represented: |
| 138 | * these are flagged with the word MODIFIER |
| 139 | */ |
| 140 | const char** subelts; /* allowed sub-elements of this element */ |
| 141 | const char* defaultsubelt; /* subelement for suggested auto-repair |
| 142 | if necessary or NULL */ |
| 143 | const char** attrs_opt; /* Optional Attributes */ |
| 144 | const char** attrs_depr; /* Additional deprecated attributes */ |
| 145 | const char** attrs_req; /* Required attributes */ |
| 146 | }; |
| 147 | </pre><p> |
| 148 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 149 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 150 | |
| 151 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc { |
| 152 | unsigned int value; /* the UNICODE value for the character */ |
| 153 | const char *name; /* The entity name */ |
| 154 | const char *desc; /* the description */ |
| 155 | }; |
| 156 | </pre><p> |
| 157 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 158 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 159 | |
| 160 | </p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p> |
| 161 | Lookup the HTML tag in the ElementTable</p><p> |
| 162 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 163 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>tag</tt></i> :</span></td><td> |
| 164 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 165 | |
| 166 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 167 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p> |
| 168 | Lookup the given entity in EntitiesTable |
| 169 | </p><p> |
| 170 | TODO: the linear scan is really ugly, an hash table is really needed.</p><p> |
| 171 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 172 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> |
| 173 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 174 | |
| 175 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 176 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p> |
| 177 | Lookup the given entity in EntitiesTable |
| 178 | </p><p> |
| 179 | TODO: the linear scan is really ugly, an hash table is really needed.</p><p> |
| 180 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 181 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>value</tt></i> :</span></td><td> |
| 182 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 183 | |
| 184 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 185 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 186 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p> |
| 187 | The HTML DTD allows a tag to implicitly close other tags. |
| 188 | The list is kept in htmlStartClose array. This function checks |
| 189 | if a tag is autoclosed by one of it's child</p><p> |
| 190 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 191 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i> :</span></td><td> |
| 192 | </td></tr><tr><td><span class="term"><i><tt>elem</tt></i> :</span></td><td> |
| 193 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 194 | |
| 195 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 196 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 197 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name, |
| 198 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p> |
| 199 | The HTML DTD allows a tag to implicitly close other tags. |
| 200 | The list is kept in htmlStartClose array. This function checks |
| 201 | if the element or one of it's children would autoclose the |
| 202 | given tag.</p><p> |
| 203 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 204 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i> :</span></td><td> |
| 205 | </td></tr><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> |
| 206 | </td></tr><tr><td><span class="term"><i><tt>elem</tt></i> :</span></td><td> |
| 207 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 208 | |
| 209 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 210 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 211 | <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p> |
| 212 | parse an HTML ENTITY references |
| 213 | </p><p> |
| 214 | [68] EntityRef ::= '&' Name ';'</p><p> |
| 215 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 216 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td> |
| 217 | </td></tr><tr><td><span class="term"><i><tt>str</tt></i> :</span></td><td> |
| 218 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 219 | |
| 220 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 221 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 222 | parse Reference declarations |
| 223 | </p><p> |
| 224 | [66] CharRef ::= '&#' [0-9]+ ';' | |
| 225 | '&<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p> |
| 226 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 227 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td> |
| 228 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 229 | |
| 230 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 231 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 232 | parse an HTML element, this is highly recursive |
| 233 | </p><p> |
| 234 | [39] element ::= EmptyElemTag | STag content ETag |
| 235 | </p><p> |
| 236 | [41] Attribute ::= Name Eq AttValue</p><p> |
| 237 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 238 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td> |
| 239 | |
| 240 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 241 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt |
| 242 | (const char *buffer, |
| 243 | int size);</pre><p> |
| 244 | Create a parser context for an HTML in-memory document.</p><p> |
| 245 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 246 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i> :</span></td><td> |
| 247 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i> :</span></td><td> |
| 248 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 249 | |
| 250 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 251 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 252 | parse an HTML document (and build a tree if using the standard SAX |
| 253 | interface).</p><p> |
| 254 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 255 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td> |
| 256 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 257 | |
| 258 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 259 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 260 | const char *encoding, |
| 261 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 262 | void *userData);</pre><p> |
| 263 | Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks |
| 264 | to handle parse events. If sax is NULL, fallback to the default DOM |
| 265 | behavior and return a tree.</p><p> |
| 266 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 267 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i> :</span></td><td> |
| 268 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td> |
| 269 | </td></tr><tr><td><span class="term"><i><tt>sax</tt></i> :</span></td><td> |
| 270 | </td></tr><tr><td><span class="term"><i><tt>userData</tt></i> :</span></td><td> |
| 271 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 272 | |
| 273 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 274 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 275 | const char *encoding);</pre><p> |
| 276 | parse an HTML in-memory document and build a tree.</p><p> |
| 277 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 278 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i> :</span></td><td> |
| 279 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td> |
| 280 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 281 | |
| 282 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 283 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char *filename, |
| 284 | const char *encoding, |
| 285 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 286 | void *userData);</pre><p> |
| 287 | parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
| 288 | compressed document is provided by default if found at compile-time. |
| 289 | It use the given SAX function block to handle the parsing callback. |
| 290 | If sax is NULL, fallback to the default DOM tree building routines.</p><p> |
| 291 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 292 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i> :</span></td><td> |
| 293 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td> |
| 294 | </td></tr><tr><td><span class="term"><i><tt>sax</tt></i> :</span></td><td> |
| 295 | </td></tr><tr><td><span class="term"><i><tt>userData</tt></i> :</span></td><td> |
| 296 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 297 | |
| 298 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 299 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char *filename, |
| 300 | const char *encoding);</pre><p> |
| 301 | parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
| 302 | compressed document is provided by default if found at compile-time.</p><p> |
| 303 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 304 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i> :</span></td><td> |
| 305 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td> |
| 306 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 307 | |
| 308 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 309 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char *out, |
| 310 | int *outlen, |
| 311 | unsigned char *in, |
| 312 | int *inlen);</pre><p> |
| 313 | Take a block of UTF-8 chars in and try to convert it to an ASCII |
| 314 | plus HTML entities block of chars out.</p><p> |
| 315 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 316 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> |
| 317 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> |
| 318 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> |
| 319 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> |
| 320 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 321 | |
| 322 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 323 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char *out, |
| 324 | int *outlen, |
| 325 | unsigned char *in, |
| 326 | int *inlen, |
| 327 | int quoteChar);</pre><p> |
| 328 | Take a block of UTF-8 chars in and try to convert it to an ASCII |
| 329 | plus HTML entities block of chars out.</p><p> |
| 330 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 331 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td> |
| 332 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td> |
| 333 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td> |
| 334 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td> |
| 335 | </td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i> :</span></td><td> |
| 336 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 337 | |
| 338 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 339 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p> |
| 340 | Check if an attribute is of content type Script</p><p> |
| 341 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 342 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td> |
| 343 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 344 | |
| 345 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 346 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val);</pre><p> |
| 347 | Set and return the previous value for handling HTML omitted tags.</p><p> |
| 348 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 349 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>val</tt></i> :</span></td><td> |
| 350 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 351 | |
| 352 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 353 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 354 | Free all the memory used by a parser context. However the parsed |
| 355 | document in ctxt->myDoc is not freed.</p><p> |
| 356 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 357 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td> |
| 358 | |
| 359 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 360 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 361 | void *user_data, |
| 362 | const char *chunk, |
| 363 | int size, |
| 364 | const char *filename, |
| 365 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> |
| 366 | Create a parser context for using the HTML parser in push mode |
| 367 | The value of <i><tt>filename</tt></i> is used for fetching external entities |
| 368 | and error/warning reports.</p><p> |
| 369 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 370 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>sax</tt></i> :</span></td><td> |
| 371 | </td></tr><tr><td><span class="term"><i><tt>user_data</tt></i> :</span></td><td> |
| 372 | </td></tr><tr><td><span class="term"><i><tt>chunk</tt></i> :</span></td><td> |
| 373 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i> :</span></td><td> |
| 374 | </td></tr><tr><td><span class="term"><i><tt>filename</tt></i> :</span></td><td> |
| 375 | </td></tr><tr><td><span class="term"><i><tt>enc</tt></i> :</span></td><td> |
| 376 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 377 | |
| 378 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 379 | </td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 380 | const char *chunk, |
| 381 | int size, |
| 382 | int terminate);</pre><p> |
| 383 | Parse a Chunk of memory</p><p> |
| 384 | |
Daniel Veillard | 93d9525 | 2003-04-29 20:25:40 +0000 | [diff] [blame] | 385 | </p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td> |
| 386 | </td></tr><tr><td><span class="term"><i><tt>chunk</tt></i> :</span></td><td> |
| 387 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i> :</span></td><td> |
| 388 | </td></tr><tr><td><span class="term"><i><tt>terminate</tt></i> :</span></td><td> |
| 389 | </td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td> |
| 390 | |
| 391 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 392 | </td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-xmlerror.html"><b><< xmlerror</b></a></td><td align="right"><a accesskey="n" href="libxml-HTMLtree.html"><b>HTMLtree >></b></a></td></tr></table></body></html> |