blob: 35f63e3a30843115cf8f72ac05bcca764d084e71 [file] [log] [blame]
William M. Brack2ad1dff2003-11-15 10:35:20 +00001<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.48"><meta name="generator" content="GTK-Doc V1.1 (XML mode)"><style type="text/css">
Daniel Veillardd4330462003-04-29 12:40:16 +00002 .synopsis, .classsynopsis {
3 background: #eeeeee;
4 border: solid 1px #aaaaaa;
5 padding: 0.5em;
6 }
7 .programlisting {
8 background: #eeeeff;
9 border: solid 1px #aaaaff;
10 padding: 0.5em;
11 }
12 .variablelist {
13 padding: 4px;
14 margin-left: 3em;
15 }
16 .navigation {
17 background: #ffeeee;
18 border: solid 1px #ffaaaa;
19 margin-top: 0.5em;
20 margin-bottom: 0.5em;
21 }
22 .navigation a {
23 color: #770000;
24 }
25 .navigation a:visited {
26 color: #550000;
27 }
28 .navigation .title {
29 font-size: 200%;
30 }
William M. Brack60f394e2003-11-16 06:25:42 +000031 </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="ch01.html" title="Libxml Programming Notes"><link rel="previous" href="libxml-entities.html" title="entities"><link rel="next" href="libxml-valid.html" title="valid"></head><body text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-entities.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></a></td><td><a accesskey="u" href="ch01.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-valid.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></a></td></tr></table><div class="refentry"><a name="libxml-HTMLparser"></a><div class="titlepage"></div><div class="refnamediv"><h2>HTMLparser</h2><p>HTMLparser &#8212; </p></div><div class="refsynopsisdiv"><h2><h1 class="title"><a name="id2762484"></a>Synopsis</h1></h2><pre class="synopsis">
Daniel Veillardd4330462003-04-29 12:40:16 +000032
33
34
35typedef <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>;
36typedef <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>;
37typedef <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>;
38typedef <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>;
39typedef <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>;
40typedef <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>;
41typedef <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>;
42typedef <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>;
43typedef <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>;
44struct <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>;
45typedef <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>;
46struct <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>;
47typedef <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>;
William M. Brack60f394e2003-11-16 06:25:42 +000048const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);
49const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
50const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value);
51int <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
52 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
53int <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
54 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
55 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
56const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
57 const <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);
58int <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
59void <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
60<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a>
61 (const char *buffer,
62 int size);
63int <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
64<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
65 const char *encoding,
66 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
67 void *userData);
68<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
69 const char *encoding);
70<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a> (const char *filename,
71 const char *encoding,
72 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
73 void *userData);
74<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a> (const char *filename,
75 const char *encoding);
76int <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a> (unsigned char *out,
77 int *outlen,
78 unsigned char *in,
79 int *inlen);
80int <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char *out,
81 int *outlen,
82 unsigned char *in,
83 int *inlen,
84 int quoteChar);
85int <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
86int <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val);
87<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
88 void *user_data,
89 const char *chunk,
90 int size,
91 const char *filename,
92 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
93int <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
94 const char *chunk,
95 int size,
96 int terminate);
97void <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
William M. Brack2ad1dff2003-11-15 10:35:20 +000098enum <a href="libxml-HTMLparser.html#htmlParserOption">htmlParserOption</a>;
William M. Brack60f394e2003-11-16 06:25:42 +000099void <a href="libxml-HTMLparser.html#htmlCtxtReset">htmlCtxtReset</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
100int <a href="libxml-HTMLparser.html#htmlCtxtUseOptions">htmlCtxtUseOptions</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
101 int options);
102<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadDoc">htmlReadDoc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
103 const char *URL,
104 const char *encoding,
105 int options);
106<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadFile">htmlReadFile</a> (const char *URL,
107 const char *encoding,
108 int options);
109<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadMemory">htmlReadMemory</a> (const char *buffer,
110 int size,
111 const char *URL,
112 const char *encoding,
113 int options);
114<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadFd">htmlReadFd</a> (int fd,
115 const char *URL,
116 const char *encoding,
117 int options);
118<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadIO">htmlReadIO</a> (<a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread,
119 <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose,
120 void *ioctx,
121 const char *URL,
122 const char *encoding,
123 int options);
124<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadDoc">htmlCtxtReadDoc</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
125 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
126 const char *URL,
127 const char *encoding,
128 int options);
129<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadFile">htmlCtxtReadFile</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
130 const char *filename,
131 const char *encoding,
132 int options);
133<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadMemory">htmlCtxtReadMemory</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
134 const char *buffer,
135 int size,
136 const char *URL,
137 const char *encoding,
138 int options);
139<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadFd">htmlCtxtReadFd</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
140 int fd,
141 const char *URL,
142 const char *encoding,
143 int options);
144<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadIO">htmlCtxtReadIO</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
145 <a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread,
146 <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose,
147 void *ioctx,
148 const char *URL,
149 const char *encoding,
150 int options);
William M. Brack2ad1dff2003-11-15 10:35:20 +0000151</pre></div><div class="refsect1"><h2>Description</h2><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000152
William M. Brack2ad1dff2003-11-15 10:35:20 +0000153</p></div><div class="refsect1"><h2>Details</h2><div class="refsect2"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><pre class="programlisting">typedef xmlParserCtxt htmlParserCtxt;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000154</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000155
William M. Brack2ad1dff2003-11-15 10:35:20 +0000156</p></div><hr><div class="refsect2"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><pre class="programlisting">typedef xmlParserCtxtPtr htmlParserCtxtPtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000157</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000158
William M. Brack2ad1dff2003-11-15 10:35:20 +0000159</p></div><hr><div class="refsect2"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><pre class="programlisting">typedef xmlParserNodeInfo htmlParserNodeInfo;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000160</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000161
William M. Brack2ad1dff2003-11-15 10:35:20 +0000162</p></div><hr><div class="refsect2"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><pre class="programlisting">typedef xmlSAXHandler htmlSAXHandler;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000163</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000164
William M. Brack2ad1dff2003-11-15 10:35:20 +0000165</p></div><hr><div class="refsect2"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><pre class="programlisting">typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000166</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000167
William M. Brack2ad1dff2003-11-15 10:35:20 +0000168</p></div><hr><div class="refsect2"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><pre class="programlisting">typedef xmlParserInput htmlParserInput;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000169</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000170
William M. Brack2ad1dff2003-11-15 10:35:20 +0000171</p></div><hr><div class="refsect2"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><pre class="programlisting">typedef xmlParserInputPtr htmlParserInputPtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000172</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000173
William M. Brack2ad1dff2003-11-15 10:35:20 +0000174</p></div><hr><div class="refsect2"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><pre class="programlisting">typedef xmlDocPtr htmlDocPtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000175</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000176
William M. Brack2ad1dff2003-11-15 10:35:20 +0000177</p></div><hr><div class="refsect2"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><pre class="programlisting">typedef xmlNodePtr htmlNodePtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000178</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000179
William M. Brack2ad1dff2003-11-15 10:35:20 +0000180</p></div><hr><div class="refsect2"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc {
181
Daniel Veillardd4330462003-04-29 12:40:16 +0000182 const char *name; /* The tag name */
183 char startTag; /* Whether the start tag can be implied */
184 char endTag; /* Whether the end tag can be implied */
185 char saveEndTag; /* Whether the end tag should be saved */
186 char empty; /* Is this an empty element ? */
187 char depr; /* Is this a deprecated element ? */
188 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
189 char isinline; /* is this a block 0 or inline 1 element */
190 const char *desc; /* the description */
191
192/* NRK Jan.2003
193 * New fields encapsulating HTML structure
194 *
195 * Bugs:
196 * This is a very limited representation. It fails to tell us when
197 * an element *requires* subelements (we only have whether they're
198 * allowed or not), and it doesn't tell us where CDATA and PCDATA
199 * are allowed. Some element relationships are not fully represented:
200 * these are flagged with the word MODIFIER
201 */
202 const char** subelts; /* allowed sub-elements of this element */
203 const char* defaultsubelt; /* subelement for suggested auto-repair
204 if necessary or NULL */
205 const char** attrs_opt; /* Optional Attributes */
206 const char** attrs_depr; /* Additional deprecated attributes */
207 const char** attrs_req; /* Required attributes */
208};
209</pre><p>
210
William M. Brack2ad1dff2003-11-15 10:35:20 +0000211</p></div><hr><div class="refsect2"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><pre class="programlisting">typedef htmlElemDesc *htmlElemDescPtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000212</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000213
William M. Brack2ad1dff2003-11-15 10:35:20 +0000214</p></div><hr><div class="refsect2"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc {
215
Daniel Veillardd4330462003-04-29 12:40:16 +0000216 unsigned int value; /* the UNICODE value for the character */
217 const char *name; /* The entity name */
218 const char *desc; /* the description */
219};
220</pre><p>
221
William M. Brack2ad1dff2003-11-15 10:35:20 +0000222</p></div><hr><div class="refsect2"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><pre class="programlisting">typedef htmlEntityDesc *htmlEntityDescPtr;
Daniel Veillardd7cec922003-06-13 12:30:10 +0000223</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000224
William M. Brack60f394e2003-11-16 06:25:42 +0000225</p></div><hr><div class="refsect2"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p>
226Lookup the HTML tag in the ElementTable</p><p>
227
228</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>tag</tt></i>:</span></td><td> The tag name in lowercase
229</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the related htmlElemDescPtr or NULL if not found.
230</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
231Lookup the given entity in EntitiesTable
232</p><p>
233TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
234
235</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td> the entity name
236</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise.
237</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p>
238Lookup the given entity in EntitiesTable
239</p><p>
240TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
241
242</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>value</tt></i>:</span></td><td> the entity's unicode value
243</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise.
244</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
245 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
246The HTML DTD allows a tag to implicitly close other tags.
247The list is kept in htmlStartClose array. This function checks
248if a tag is autoclosed by one of it's child</p><p>
249
250</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>:</span></td><td> the HTML document
251</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>:</span></td><td> the HTML element
252</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>1 if autoclosed, 0 otherwise
253</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
254 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
255 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
256The HTML DTD allows a tag to implicitly close other tags.
257The list is kept in htmlStartClose array. This function checks
258if the element or one of it's children would autoclose the
259given tag.</p><p>
260
261</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>:</span></td><td> the HTML document
262</td></tr><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td> The tag name
263</td></tr><tr><td><span class="term"><i><tt>elem</tt></i>:</span></td><td> the HTML element
264</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>1 if autoclose, 0 otherwise
265</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
266 const <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p>
267parse an HTML ENTITY references
268</p><p>
269[68] EntityRef ::= '&amp;' Name ';'</p><p>
270
271</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
272</td></tr><tr><td><span class="term"><i><tt>str</tt></i>:</span></td><td> location to store the entity name
273</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the associated htmlEntityDescPtr if found, or NULL otherwise,
274 if non-NULL *str will have to be freed by the caller.
275</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
276parse Reference declarations
277</p><p>
278[66] CharRef ::= '&amp;#' [0-9]+ ';' |
279 '&amp;<GTKDOCLINK HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p>
280
281</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
282</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the value parsed (as an int)
283</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
284parse an HTML element, this is highly recursive
285</p><p>
286[39] element ::= EmptyElemTag | STag content ETag
287</p><p>
288[41] Attribute ::= Name Eq AttValue</p><p>
289
290</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
291</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt
292 (const char *buffer,
293 int size);</pre><p>
294Create a parser context for an HTML in-memory document.</p><p>
295
296</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td> a pointer to a char array
297</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size of the array
298</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the new parser context or NULL
299</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
300parse an HTML document (and build a tree if using the standard SAX
301interface).</p><p>
302
303</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
304</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0, -1 in case of error. the parser context is augmented
305 as a result of the parsing.
306</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
307 const char *encoding,
308 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
309 void *userData);</pre><p>
310Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
311to handle parse events. If sax is NULL, fallback to the default DOM
312behavior and return a tree.</p><p>
313
314</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to an array of xmlChar
315</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL
316</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td> the SAX handler block
317</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>:</span></td><td> if using SAX, this pointer will be provided on callbacks.
318</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is
319 not well formed.
320</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
321 const char *encoding);</pre><p>
322parse an HTML in-memory document and build a tree.</p><p>
323
324</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to an array of xmlChar
325</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL
326</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
327</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char *filename,
328 const char *encoding,
329 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
330 void *userData);</pre><p>
331parse an HTML file and build a tree. Automatic support for ZLIB/Compress
332compressed document is provided by default if found at compile-time.
333It use the given SAX function block to handle the parsing callback.
334If sax is NULL, fallback to the default DOM tree building routines.</p><p>
335
336</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> the filename
337</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL
338</td></tr><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td> the SAX handler block
339</td></tr><tr><td><span class="term"><i><tt>userData</tt></i>:</span></td><td> if using SAX, this pointer will be provided on callbacks.
340</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is
341 not well formed.
342</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char *filename,
343 const char *encoding);</pre><p>
344parse an HTML file and build a tree. Automatic support for ZLIB/Compress
345compressed document is provided by default if found at compile-time.</p><p>
346
347</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> the filename
348</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL
349</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
350</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char *out,
351 int *outlen,
352 unsigned char *in,
353 int *inlen);</pre><p>
354Take a block of UTF-8 chars in and try to convert it to an ASCII
355plus HTML entities block of chars out.</p><p>
356
357</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>out</tt></i>:</span></td><td> a pointer to an array of bytes to store the result
358</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>:</span></td><td> the length of <i><tt>out</tt></i>
359</td></tr><tr><td><span class="term"><i><tt>in</tt></i>:</span></td><td> a pointer to an array of UTF-8 chars
360</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>:</span></td><td> the length of <i><tt>in</tt></i>
361</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
362The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
363 as the return value is positive, else unpredictable.
364The value of <i><tt>outlen</tt></i> after return is the number of octets consumed.
365</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char *out,
366 int *outlen,
367 unsigned char *in,
368 int *inlen,
369 int quoteChar);</pre><p>
370Take a block of UTF-8 chars in and try to convert it to an ASCII
371plus HTML entities block of chars out.</p><p>
372
373</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>out</tt></i>:</span></td><td> a pointer to an array of bytes to store the result
374</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>:</span></td><td> the length of <i><tt>out</tt></i>
375</td></tr><tr><td><span class="term"><i><tt>in</tt></i>:</span></td><td> a pointer to an array of UTF-8 chars
376</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>:</span></td><td> the length of <i><tt>in</tt></i>
377</td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i>:</span></td><td> the quote character to escape (' or ") or zero.
378</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
379The value of <i><tt>inlen</tt></i> after return is the number of octets consumed
380 as the return value is positive, else unpredictable.
381The value of <i><tt>outlen</tt></i> after return is the number of octets consumed.
382</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
383Check if an attribute is of content type Script</p><p>
384
385</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td> an attribute name
386</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>1 is the attribute is a script 0 otherwise
387</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val);</pre><p>
388Set and return the previous value for handling HTML omitted tags.</p><p>
389
390</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>val</tt></i>:</span></td><td> int 0 or 1
391</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the last value for 0 for no handling, 1 for auto insertion.
392</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
393 void *user_data,
394 const char *chunk,
395 int size,
396 const char *filename,
397 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
398Create a parser context for using the HTML parser in push mode
399The value of <i><tt>filename</tt></i> is used for fetching external entities
400and error/warning reports.</p><p>
401
402</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td> a SAX handler
403</td></tr><tr><td><span class="term"><i><tt>user_data</tt></i>:</span></td><td> The user data returned on SAX callbacks
404</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>:</span></td><td> a pointer to an array of chars
405</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> number of chars in the array
406</td></tr><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> an optional file name or URI
407</td></tr><tr><td><span class="term"><i><tt>enc</tt></i>:</span></td><td> an optional encoding
408</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the new parser context or NULL
409</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
410 const char *chunk,
411 int size,
412 int terminate);</pre><p>
413Parse a Chunk of memory</p><p>
414
415</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
416</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>:</span></td><td> an char array
417</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size in byte of the chunk
418</td></tr><tr><td><span class="term"><i><tt>terminate</tt></i>:</span></td><td> last chunk indicator
419</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>zero if no error, the xmlParserErrors otherwise.
420</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
421Free all the memory used by a parser context. However the parsed
422document in ctxt-&gt;myDoc is not freed.</p><p>
423
424</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
425</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParserOption"></a>enum htmlParserOption</h3><pre class="programlisting">typedef enum {
William M. Brack2ad1dff2003-11-15 10:35:20 +0000426 HTML_PARSE_NOERROR = 1&lt;&lt;5, /* suppress error reports */
427 HTML_PARSE_NOWARNING= 1&lt;&lt;6, /* suppress warning reports */
428 HTML_PARSE_PEDANTIC = 1&lt;&lt;7, /* pedantic error reporting */
429 HTML_PARSE_NOBLANKS = 1&lt;&lt;8, /* remove blank nodes */
430 HTML_PARSE_NONET = 1&lt;&lt;11 /* Forbid network access */
431} htmlParserOption;
432</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000433
William M. Brack60f394e2003-11-16 06:25:42 +0000434</p></div><hr><div class="refsect2"><h3><a name="htmlCtxtReset"></a>htmlCtxtReset ()</h3><pre class="programlisting">void htmlCtxtReset (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
435Reset a parser context</p><p>
436
437</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
438</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtUseOptions"></a>htmlCtxtUseOptions ()</h3><pre class="programlisting">int htmlCtxtUseOptions (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
439 int options);</pre><p>
440Applies the options to the parser context</p><p>
441
442</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
443</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
444</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0 in case of success, the set of unknown or unimplemented options
445 in case of error.
446</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadDoc"></a>htmlReadDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadDoc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
447 const char *URL,
448 const char *encoding,
449 int options);</pre><p>
450parse an XML in-memory document and build a tree.</p><p>
451
452</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to a zero terminated string
453</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
454</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
455</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
456</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
457</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadFile"></a>htmlReadFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFile (const char *URL,
458 const char *encoding,
459 int options);</pre><p>
460parse an XML file from the filesystem or the network.</p><p>
461
462</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td>
463</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
464</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
465</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
466</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadMemory"></a>htmlReadMemory ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadMemory (const char *buffer,
467 int size,
468 const char *URL,
469 const char *encoding,
470 int options);</pre><p>
471parse an XML in-memory document and build a tree.</p><p>
472
473</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td> a pointer to a char array
474</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size of the array
475</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
476</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
477</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
478</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
479</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadFd"></a>htmlReadFd ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFd (int fd,
480 const char *URL,
481 const char *encoding,
482 int options);</pre><p>
483parse an XML from a file descriptor and build a tree.</p><p>
484
485</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>fd</tt></i>:</span></td><td> an open file descriptor
486</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
487</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
488</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
489</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
490</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadIO"></a>htmlReadIO ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadIO (<a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread,
491 <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose,
492 void *ioctx,
493 const char *URL,
494 const char *encoding,
495 int options);</pre><p>
496parse an HTML document from I/O functions and source and build a tree.</p><p>
497
498</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ioread</tt></i>:</span></td><td> an I/O read function
499</td></tr><tr><td><span class="term"><i><tt>ioclose</tt></i>:</span></td><td> an I/O close function
500</td></tr><tr><td><span class="term"><i><tt>ioctx</tt></i>:</span></td><td> an I/O handler
501</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
502</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
503</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
504</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
505</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadDoc"></a>htmlCtxtReadDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadDoc (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
506 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
507 const char *URL,
508 const char *encoding,
509 int options);</pre><p>
510parse an XML in-memory document and build a tree.
511This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p>
512
513</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
514</td></tr><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to a zero terminated string
515</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
516</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
517</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
518</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
519</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadFile"></a>htmlCtxtReadFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFile (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
520 const char *filename,
521 const char *encoding,
522 int options);</pre><p>
523parse an XML file from the filesystem or the network.
524This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p>
525
526</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
527</td></tr><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> a file or URL
528</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
529</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
530</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
531</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadMemory"></a>htmlCtxtReadMemory ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadMemory (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
532 const char *buffer,
533 int size,
534 const char *URL,
535 const char *encoding,
536 int options);</pre><p>
537parse an XML in-memory document and build a tree.
538This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p>
539
540</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
541</td></tr><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td> a pointer to a char array
542</td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size of the array
543</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
544</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
545</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
546</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
547</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadFd"></a>htmlCtxtReadFd ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFd (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
548 int fd,
549 const char *URL,
550 const char *encoding,
551 int options);</pre><p>
552parse an XML from a file descriptor and build a tree.
553This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p>
554
555</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
556</td></tr><tr><td><span class="term"><i><tt>fd</tt></i>:</span></td><td> an open file descriptor
557</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
558</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
559</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
560</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
561</td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadIO"></a>htmlCtxtReadIO ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadIO (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt,
562 <a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread,
563 <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose,
564 void *ioctx,
565 const char *URL,
566 const char *encoding,
567 int options);</pre><p>
568parse an HTML document from I/O functions and source and build a tree.
569This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p>
570
571</p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context
572</td></tr><tr><td><span class="term"><i><tt>ioread</tt></i>:</span></td><td> an I/O read function
573</td></tr><tr><td><span class="term"><i><tt>ioclose</tt></i>:</span></td><td> an I/O close function
574</td></tr><tr><td><span class="term"><i><tt>ioctx</tt></i>:</span></td><td> an I/O handler
575</td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document
576</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL
577</td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s)
578</td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree
579</td></tr></tbody></table></div></div></div></div><table class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-entities.html"><b>&lt;&lt; entities</b></a></td><td align="right"><a accesskey="n" href="libxml-valid.html"><b>valid &gt;&gt;</b></a></td></tr></table></body></html>