blob: 7a28fd627878242c65b9c4daf3955213eb7f6e7b [file] [log] [blame]
Daniel Veillardd4330462003-04-29 12:40:16 +00001<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.58.1"><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css">
3 .synopsis, .classsynopsis {
4 background: #eeeeee;
5 border: solid 1px #aaaaaa;
6 padding: 0.5em;
7 }
8 .programlisting {
9 background: #eeeeff;
10 border: solid 1px #aaaaff;
11 padding: 0.5em;
12 }
13 .variablelist {
14 padding: 4px;
15 margin-left: 3em;
16 }
17 .navigation {
18 background: #ffeeee;
19 border: solid 1px #ffaaaa;
20 margin-top: 0.5em;
21 margin-bottom: 0.5em;
22 }
23 .navigation a {
24 color: #770000;
25 }
26 .navigation a:visited {
27 color: #550000;
28 }
29 .navigation .title {
30 font-size: 200%;
31 }
32 </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="libxml-lib.html" title="Libxml Library Reference"><link rel="previous" href="libxml-xmlerror.html" title="xmlerror"><link rel="next" href="libxml-HTMLtree.html" title="HTMLtree"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-xmlerror.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="libxml-lib.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-HTMLtree.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-HTMLparser"></a><div class="titlepage"></div><div class="refnamediv"><h2>HTMLparser</h2><p>HTMLparser &#8212; </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis">
33
34
35
36typedef <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>;
37typedef <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>;
38typedef <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>;
39typedef <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>;
40typedef <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>;
41typedef <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>;
42typedef <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>;
43typedef <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>;
44typedef <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>;
45struct <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>;
46typedef <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>;
47struct <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>;
48typedef <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>;
49const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);
50const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
51const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value);
52int <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
53 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
54int <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
55 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
56 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
57const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
58 <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);
59int <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
60void <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
61<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a>
62 (const char *buffer,
63 int size);
64int <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
65<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
66 const char *encoding,
67 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
68 void *userData);
69<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
70 const char *encoding);
71<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a> (const char *filename,
72 const char *encoding,
73 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
74 void *userData);
75<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a> (const char *filename,
76 const char *encoding);
77int <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a> (unsigned char *out,
78 int *outlen,
79 unsigned char *in,
80 int *inlen);
81int <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char *out,
82 int *outlen,
83 unsigned char *in,
84 int *inlen,
85 int quoteChar);
86int <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
87int <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val);
88void <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
89<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
90 void *user_data,
91 const char *chunk,
92 int size,
93 const char *filename,
94 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
95int <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
96 const char *chunk,
97 int size,
98 int terminate);
99</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p>
100
Daniel Veillard93d95252003-04-29 20:25:40 +0000101</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000102
Daniel Veillard93d95252003-04-29 20:25:40 +0000103</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000104
Daniel Veillard93d95252003-04-29 20:25:40 +0000105</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000106
Daniel Veillard93d95252003-04-29 20:25:40 +0000107</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000108
Daniel Veillard93d95252003-04-29 20:25:40 +0000109</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000110
Daniel Veillard93d95252003-04-29 20:25:40 +0000111</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000112
Daniel Veillard93d95252003-04-29 20:25:40 +0000113</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000114
Daniel Veillard93d95252003-04-29 20:25:40 +0000115</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000116
Daniel Veillard93d95252003-04-29 20:25:40 +0000117</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000118
119</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc {
120 const char *name; /* The tag name */
121 char startTag; /* Whether the start tag can be implied */
122 char endTag; /* Whether the end tag can be implied */
123 char saveEndTag; /* Whether the end tag should be saved */
124 char empty; /* Is this an empty element ? */
125 char depr; /* Is this a deprecated element ? */
126 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
127 char isinline; /* is this a block 0 or inline 1 element */
128 const char *desc; /* the description */
129
130/* NRK Jan.2003
131 * New fields encapsulating HTML structure
132 *
133 * Bugs:
134 * This is a very limited representation. It fails to tell us when
135 * an element *requires* subelements (we only have whether they're
136 * allowed or not), and it doesn't tell us where CDATA and PCDATA
137 * are allowed. Some element relationships are not fully represented:
138 * these are flagged with the word MODIFIER
139 */
140 const char** subelts; /* allowed sub-elements of this element */
141 const char* defaultsubelt; /* subelement for suggested auto-repair
142 if necessary or NULL */
143 const char** attrs_opt; /* Optional Attributes */
144 const char** attrs_depr; /* Additional deprecated attributes */
145 const char** attrs_req; /* Required attributes */
146};
147</pre><p>
148
Daniel Veillard93d95252003-04-29 20:25:40 +0000149</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000150
151</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc {
152 unsigned int value; /* the UNICODE value for the character */
153 const char *name; /* The entity name */
154 const char *desc; /* the description */
155};
156</pre><p>
157
Daniel Veillard93d95252003-04-29 20:25:40 +0000158</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000159
160</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p>
161Lookup the HTML tag in the ElementTable</p><p>
162
Daniel Veillard93d95252003-04-29 20:25:40 +0000163</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>tag</tt></i> :</span></td><td>
164</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
165
166
Daniel Veillardd4330462003-04-29 12:40:16 +0000167</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
168Lookup the given entity in EntitiesTable
169</p><p>
170TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
171
Daniel Veillard93d95252003-04-29 20:25:40 +0000172</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td>
173</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
174
175
Daniel Veillardd4330462003-04-29 12:40:16 +0000176</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p>
177Lookup the given entity in EntitiesTable
178</p><p>
179TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
180
Daniel Veillard93d95252003-04-29 20:25:40 +0000181</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>value</tt></i> :</span></td><td>
182</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
183
184
Daniel Veillardd4330462003-04-29 12:40:16 +0000185</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
186 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
187The HTML DTD allows a tag to implicitly close other tags.
188The list is kept in htmlStartClose array. This function checks
189if a tag is autoclosed by one of it's child</p><p>
190
Daniel Veillard93d95252003-04-29 20:25:40 +0000191</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i> :</span></td><td>
192</td></tr><tr><td><span class="term"><i><tt>elem</tt></i> :</span></td><td>
193</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
194
195
Daniel Veillardd4330462003-04-29 12:40:16 +0000196</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
197 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
198 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
199The HTML DTD allows a tag to implicitly close other tags.
200The list is kept in htmlStartClose array. This function checks
201if the element or one of it's children would autoclose the
202given tag.</p><p>
203
Daniel Veillard93d95252003-04-29 20:25:40 +0000204</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>doc</tt></i> :</span></td><td>
205</td></tr><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td>
206</td></tr><tr><td><span class="term"><i><tt>elem</tt></i> :</span></td><td>
207</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
208
209
Daniel Veillardd4330462003-04-29 12:40:16 +0000210</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
211 <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p>
212parse an HTML ENTITY references
213</p><p>
214[68] EntityRef ::= '&amp;' Name ';'</p><p>
215
Daniel Veillard93d95252003-04-29 20:25:40 +0000216</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td>
217</td></tr><tr><td><span class="term"><i><tt>str</tt></i> :</span></td><td>
218</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
219
220
Daniel Veillardd4330462003-04-29 12:40:16 +0000221</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
222parse Reference declarations
223</p><p>
224[66] CharRef ::= '&amp;#' [0-9]+ ';' |
225 '&amp;<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p>
226
Daniel Veillard93d95252003-04-29 20:25:40 +0000227</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td>
228</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
229
230
Daniel Veillardd4330462003-04-29 12:40:16 +0000231</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
232parse an HTML element, this is highly recursive
233</p><p>
234[39] element ::= EmptyElemTag | STag content ETag
235</p><p>
236[41] Attribute ::= Name Eq AttValue</p><p>
237
Daniel Veillard93d95252003-04-29 20:25:40 +0000238</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td>
239
240
Daniel Veillardd4330462003-04-29 12:40:16 +0000241</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt
242 (const char *buffer,
243 int size);</pre><p>
244Create a parser context for an HTML in-memory document.</p><p>
245
Daniel Veillard93d95252003-04-29 20:25:40 +0000246</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i> :</span></td><td>
247</td></tr><tr><td><span class="term"><i><tt>size</tt></i> :</span></td><td>
248</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
249
250
Daniel Veillardd4330462003-04-29 12:40:16 +0000251</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
252parse an HTML document (and build a tree if using the standard SAX
253interface).</p><p>
254
Daniel Veillard93d95252003-04-29 20:25:40 +0000255</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td>
256</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
257
258
Daniel Veillardd4330462003-04-29 12:40:16 +0000259</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
260 const char *encoding,
261 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
262 void *userData);</pre><p>
263Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
264to handle parse events. If sax is NULL, fallback to the default DOM
265behavior and return a tree.</p><p>
266
Daniel Veillard93d95252003-04-29 20:25:40 +0000267</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i> :</span></td><td>
268</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td>
269</td></tr><tr><td><span class="term"><i><tt>sax</tt></i> :</span></td><td>
270</td></tr><tr><td><span class="term"><i><tt>userData</tt></i> :</span></td><td>
271</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
272
273
Daniel Veillardd4330462003-04-29 12:40:16 +0000274</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
275 const char *encoding);</pre><p>
276parse an HTML in-memory document and build a tree.</p><p>
277
Daniel Veillard93d95252003-04-29 20:25:40 +0000278</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>cur</tt></i> :</span></td><td>
279</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td>
280</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
281
282
Daniel Veillardd4330462003-04-29 12:40:16 +0000283</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char *filename,
284 const char *encoding,
285 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
286 void *userData);</pre><p>
287parse an HTML file and build a tree. Automatic support for ZLIB/Compress
288compressed document is provided by default if found at compile-time.
289It use the given SAX function block to handle the parsing callback.
290If sax is NULL, fallback to the default DOM tree building routines.</p><p>
291
Daniel Veillard93d95252003-04-29 20:25:40 +0000292</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i> :</span></td><td>
293</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td>
294</td></tr><tr><td><span class="term"><i><tt>sax</tt></i> :</span></td><td>
295</td></tr><tr><td><span class="term"><i><tt>userData</tt></i> :</span></td><td>
296</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
297
298
Daniel Veillardd4330462003-04-29 12:40:16 +0000299</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char *filename,
300 const char *encoding);</pre><p>
301parse an HTML file and build a tree. Automatic support for ZLIB/Compress
302compressed document is provided by default if found at compile-time.</p><p>
303
Daniel Veillard93d95252003-04-29 20:25:40 +0000304</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>filename</tt></i> :</span></td><td>
305</td></tr><tr><td><span class="term"><i><tt>encoding</tt></i> :</span></td><td>
306</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
307
308
Daniel Veillardd4330462003-04-29 12:40:16 +0000309</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char *out,
310 int *outlen,
311 unsigned char *in,
312 int *inlen);</pre><p>
313Take a block of UTF-8 chars in and try to convert it to an ASCII
314plus HTML entities block of chars out.</p><p>
315
Daniel Veillard93d95252003-04-29 20:25:40 +0000316</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td>
317</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td>
318</td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td>
319</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td>
320</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
321
322
Daniel Veillardd4330462003-04-29 12:40:16 +0000323</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char *out,
324 int *outlen,
325 unsigned char *in,
326 int *inlen,
327 int quoteChar);</pre><p>
328Take a block of UTF-8 chars in and try to convert it to an ASCII
329plus HTML entities block of chars out.</p><p>
330
Daniel Veillard93d95252003-04-29 20:25:40 +0000331</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>out</tt></i> :</span></td><td>
332</td></tr><tr><td><span class="term"><i><tt>outlen</tt></i> :</span></td><td>
333</td></tr><tr><td><span class="term"><i><tt>in</tt></i> :</span></td><td>
334</td></tr><tr><td><span class="term"><i><tt>inlen</tt></i> :</span></td><td>
335</td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i> :</span></td><td>
336</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
337
338
Daniel Veillardd4330462003-04-29 12:40:16 +0000339</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
340Check if an attribute is of content type Script</p><p>
341
Daniel Veillard93d95252003-04-29 20:25:40 +0000342</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>name</tt></i> :</span></td><td>
343</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
344
345
Daniel Veillardd4330462003-04-29 12:40:16 +0000346</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val);</pre><p>
347Set and return the previous value for handling HTML omitted tags.</p><p>
348
Daniel Veillard93d95252003-04-29 20:25:40 +0000349</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>val</tt></i> :</span></td><td>
350</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
351
352
Daniel Veillardd4330462003-04-29 12:40:16 +0000353</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
354Free all the memory used by a parser context. However the parsed
355document in ctxt-&gt;myDoc is not freed.</p><p>
356
Daniel Veillard93d95252003-04-29 20:25:40 +0000357</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td>
358
359
Daniel Veillardd4330462003-04-29 12:40:16 +0000360</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
361 void *user_data,
362 const char *chunk,
363 int size,
364 const char *filename,
365 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
366Create a parser context for using the HTML parser in push mode
367The value of <i><tt>filename</tt></i> is used for fetching external entities
368and error/warning reports.</p><p>
369
Daniel Veillard93d95252003-04-29 20:25:40 +0000370</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>sax</tt></i> :</span></td><td>
371</td></tr><tr><td><span class="term"><i><tt>user_data</tt></i> :</span></td><td>
372</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i> :</span></td><td>
373</td></tr><tr><td><span class="term"><i><tt>size</tt></i> :</span></td><td>
374</td></tr><tr><td><span class="term"><i><tt>filename</tt></i> :</span></td><td>
375</td></tr><tr><td><span class="term"><i><tt>enc</tt></i> :</span></td><td>
376</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
377
378
Daniel Veillardd4330462003-04-29 12:40:16 +0000379</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
380 const char *chunk,
381 int size,
382 int terminate);</pre><p>
383Parse a Chunk of memory</p><p>
384
Daniel Veillard93d95252003-04-29 20:25:40 +0000385</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i> :</span></td><td>
386</td></tr><tr><td><span class="term"><i><tt>chunk</tt></i> :</span></td><td>
387</td></tr><tr><td><span class="term"><i><tt>size</tt></i> :</span></td><td>
388</td></tr><tr><td><span class="term"><i><tt>terminate</tt></i> :</span></td><td>
389</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>
390
391
Daniel Veillardd4330462003-04-29 12:40:16 +0000392</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-xmlerror.html"><b>&lt;&lt; xmlerror</b></a></td><td align="right"><a accesskey="n" href="libxml-HTMLtree.html"><b>HTMLtree &gt;&gt;</b></a></td></tr></table></body></html>