blob: 475ae0280313df1fb6f094c55705757325d2c15c [file] [log] [blame]
William M. Brackc6e07552003-08-16 12:44:47 +00001<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.60.1"><meta xmlns="http://www.w3.org/TR/xhtml1/transitional" name="generator" content="GTK-Doc V1.0 (XML mode)"></meta><style xmlns="http://www.w3.org/TR/xhtml1/transitional" type="text/css">
Daniel Veillardd4330462003-04-29 12:40:16 +00002 .synopsis, .classsynopsis {
3 background: #eeeeee;
4 border: solid 1px #aaaaaa;
5 padding: 0.5em;
6 }
7 .programlisting {
8 background: #eeeeff;
9 border: solid 1px #aaaaff;
10 padding: 0.5em;
11 }
12 .variablelist {
13 padding: 4px;
14 margin-left: 3em;
15 }
16 .navigation {
17 background: #ffeeee;
18 border: solid 1px #ffaaaa;
19 margin-top: 0.5em;
20 margin-bottom: 0.5em;
21 }
22 .navigation a {
23 color: #770000;
24 }
25 .navigation a:visited {
26 color: #550000;
27 }
28 .navigation .title {
29 font-size: 200%;
30 }
William M. Brackc6e07552003-08-16 12:44:47 +000031 </style><link rel="home" href="index.html" title="[Insert name here] Reference Manual"><link rel="up" href="ch01.html" title="[Insert title here]"><link rel="previous" href="libxml-entities.html" title="entities"><link rel="next" href="libxml-valid.html" title="valid"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-entities.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></img></a></td><td><a accesskey="u" href="ch01.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></img></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></img></a></td><th width="100%" align="center">[Insert name here] Reference Manual</th><td><a accesskey="n" href="libxml-valid.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></img></a></td></tr></table><div class="refentry" lang="en"><a name="libxml-HTMLparser"></a><div class="titlepage"><div></div><div></div></div><div class="refnamediv"><h2><span class="refentrytitle">HTMLparser</span></h2><p>HTMLparser &#8212; </p></div><div class="refsynopsisdiv"><h2>Synopsis</h2><pre class="synopsis">
Daniel Veillardd4330462003-04-29 12:40:16 +000032
33
34
35typedef <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>;
36typedef <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>;
37typedef <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>;
38typedef <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>;
39typedef <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>;
40typedef <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>;
41typedef <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>;
42typedef <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>;
43typedef <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>;
44struct <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>;
45typedef <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>;
46struct <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>;
47typedef <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>;
48const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);
49const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
50const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value);
51int <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
52 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
53int <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
54 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
55 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);
56const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
57 <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);
58int <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
59void <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
60<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a>
61 (const char *buffer,
62 int size);
63int <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
64<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
65 const char *encoding,
66 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
67 void *userData);
68<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
69 const char *encoding);
70<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a> (const char *filename,
71 const char *encoding,
72 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
73 void *userData);
74<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a> (const char *filename,
75 const char *encoding);
76int <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a> (unsigned char *out,
77 int *outlen,
78 unsigned char *in,
79 int *inlen);
80int <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char *out,
81 int *outlen,
82 unsigned char *in,
83 int *inlen,
84 int quoteChar);
85int <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);
86int <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val);
87void <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);
88<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
89 void *user_data,
90 const char *chunk,
91 int size,
92 const char *filename,
93 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);
94int <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
95 const char *chunk,
96 int size,
97 int terminate);
98</pre></div><div class="refsect1" lang="en"><h2>Description</h2><p>
99
Daniel Veillardd7cec922003-06-13 12:30:10 +0000100</p></div><div class="refsect1" lang="en"><h2>Details</h2><div class="refsect2" lang="en"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><pre class="programlisting">typedef xmlParserCtxt htmlParserCtxt;
101</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000102
Daniel Veillardd7cec922003-06-13 12:30:10 +0000103</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><pre class="programlisting">typedef xmlParserCtxtPtr htmlParserCtxtPtr;
104</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000105
Daniel Veillardd7cec922003-06-13 12:30:10 +0000106</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><pre class="programlisting">typedef xmlParserNodeInfo htmlParserNodeInfo;
107</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000108
Daniel Veillardd7cec922003-06-13 12:30:10 +0000109</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><pre class="programlisting">typedef xmlSAXHandler htmlSAXHandler;
110</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000111
Daniel Veillardd7cec922003-06-13 12:30:10 +0000112</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><pre class="programlisting">typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
113</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000114
Daniel Veillardd7cec922003-06-13 12:30:10 +0000115</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><pre class="programlisting">typedef xmlParserInput htmlParserInput;
116</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000117
Daniel Veillardd7cec922003-06-13 12:30:10 +0000118</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><pre class="programlisting">typedef xmlParserInputPtr htmlParserInputPtr;
119</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000120
Daniel Veillardd7cec922003-06-13 12:30:10 +0000121</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><pre class="programlisting">typedef xmlDocPtr htmlDocPtr;
122</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000123
Daniel Veillardd7cec922003-06-13 12:30:10 +0000124</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><pre class="programlisting">typedef xmlNodePtr htmlNodePtr;
125</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000126
127</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc {
William M. Brackc6e07552003-08-16 12:44:47 +0000128
Daniel Veillardd4330462003-04-29 12:40:16 +0000129 const char *name; /* The tag name */
130 char startTag; /* Whether the start tag can be implied */
131 char endTag; /* Whether the end tag can be implied */
132 char saveEndTag; /* Whether the end tag should be saved */
133 char empty; /* Is this an empty element ? */
134 char depr; /* Is this a deprecated element ? */
135 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
136 char isinline; /* is this a block 0 or inline 1 element */
137 const char *desc; /* the description */
138
139/* NRK Jan.2003
140 * New fields encapsulating HTML structure
141 *
142 * Bugs:
143 * This is a very limited representation. It fails to tell us when
144 * an element *requires* subelements (we only have whether they're
145 * allowed or not), and it doesn't tell us where CDATA and PCDATA
146 * are allowed. Some element relationships are not fully represented:
147 * these are flagged with the word MODIFIER
148 */
149 const char** subelts; /* allowed sub-elements of this element */
150 const char* defaultsubelt; /* subelement for suggested auto-repair
151 if necessary or NULL */
152 const char** attrs_opt; /* Optional Attributes */
153 const char** attrs_depr; /* Additional deprecated attributes */
154 const char** attrs_req; /* Required attributes */
155};
156</pre><p>
157
Daniel Veillardd7cec922003-06-13 12:30:10 +0000158</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><pre class="programlisting">typedef htmlElemDesc *htmlElemDescPtr;
159</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000160
161</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc {
William M. Brackc6e07552003-08-16 12:44:47 +0000162
Daniel Veillardd4330462003-04-29 12:40:16 +0000163 unsigned int value; /* the UNICODE value for the character */
164 const char *name; /* The entity name */
165 const char *desc; /* the description */
166};
167</pre><p>
168
Daniel Veillardd7cec922003-06-13 12:30:10 +0000169</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><pre class="programlisting">typedef htmlEntityDesc *htmlEntityDescPtr;
170</pre><p>
Daniel Veillardd4330462003-04-29 12:40:16 +0000171
172</p></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p>
173Lookup the HTML tag in the ElementTable</p><p>
174
William M. Brackc6e07552003-08-16 12:44:47 +0000175</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>tag</tt></i> :</span></td><td> The tag name in lowercase
Daniel Veillardd7cec922003-06-13 12:30:10 +0000176</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the related htmlElemDescPtr or NULL if not found.
Daniel Veillardd4330462003-04-29 12:40:16 +0000177</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
178Lookup the given entity in EntitiesTable
179</p><p>
180TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
181
William M. Brackc6e07552003-08-16 12:44:47 +0000182</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> the entity name
Daniel Veillardd7cec922003-06-13 12:30:10 +0000183</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise.
Daniel Veillardd4330462003-04-29 12:40:16 +0000184</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p>
185Lookup the given entity in EntitiesTable
186</p><p>
187TODO: the linear scan is really ugly, an hash table is really needed.</p><p>
188
William M. Brackc6e07552003-08-16 12:44:47 +0000189</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>value</tt></i> :</span></td><td> the entity's unicode value
Daniel Veillardd7cec922003-06-13 12:30:10 +0000190</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise.
Daniel Veillardd4330462003-04-29 12:40:16 +0000191</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
192 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
193The HTML DTD allows a tag to implicitly close other tags.
194The list is kept in htmlStartClose array. This function checks
195if a tag is autoclosed by one of it's child</p><p>
196
William M. Brackc6e07552003-08-16 12:44:47 +0000197</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>doc</tt></i> :</span></td><td> the HTML document
198</td></tr><tr><td><span class="term"><i class="parameter"><tt>elem</tt></i> :</span></td><td> the HTML element
Daniel Veillardd7cec922003-06-13 12:30:10 +0000199</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 if autoclosed, 0 otherwise
Daniel Veillardd4330462003-04-29 12:40:16 +0000200</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc,
201 const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name,
202 <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p>
203The HTML DTD allows a tag to implicitly close other tags.
204The list is kept in htmlStartClose array. This function checks
205if the element or one of it's children would autoclose the
206given tag.</p><p>
207
William M. Brackc6e07552003-08-16 12:44:47 +0000208</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>doc</tt></i> :</span></td><td> the HTML document
209</td></tr><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> The tag name
210</td></tr><tr><td><span class="term"><i class="parameter"><tt>elem</tt></i> :</span></td><td> the HTML element
Daniel Veillardd7cec922003-06-13 12:30:10 +0000211</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 if autoclose, 0 otherwise
Daniel Veillardd4330462003-04-29 12:40:16 +0000212</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
213 <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p>
214parse an HTML ENTITY references
215</p><p>
216[68] EntityRef ::= '&amp;' Name ';'</p><p>
217
William M. Brackc6e07552003-08-16 12:44:47 +0000218</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>ctxt</tt></i> :</span></td><td> an HTML parser context
219</td></tr><tr><td><span class="term"><i class="parameter"><tt>str</tt></i> :</span></td><td> location to store the entity name
Daniel Veillardd7cec922003-06-13 12:30:10 +0000220</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the associated htmlEntityDescPtr if found, or NULL otherwise,
221 if non-NULL *str will have to be freed by the caller.
Daniel Veillardd4330462003-04-29 12:40:16 +0000222</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
223parse Reference declarations
224</p><p>
225[66] CharRef ::= '&amp;#' [0-9]+ ';' |
226 '&amp;<GTKDOCLINK xmlns="http://www.w3.org/TR/xhtml1/transitional" HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p>
227
William M. Brackc6e07552003-08-16 12:44:47 +0000228</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>ctxt</tt></i> :</span></td><td> an HTML parser context
Daniel Veillardd7cec922003-06-13 12:30:10 +0000229</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the value parsed (as an int)
Daniel Veillardd4330462003-04-29 12:40:16 +0000230</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
231parse an HTML element, this is highly recursive
232</p><p>
233[39] element ::= EmptyElemTag | STag content ETag
234</p><p>
235[41] Attribute ::= Name Eq AttValue</p><p>
236
William M. Brackc6e07552003-08-16 12:44:47 +0000237</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>ctxt</tt></i> :</span></td><td> an HTML parser context
Daniel Veillardd4330462003-04-29 12:40:16 +0000238</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt
239 (const char *buffer,
240 int size);</pre><p>
241Create a parser context for an HTML in-memory document.</p><p>
242
William M. Brackc6e07552003-08-16 12:44:47 +0000243</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>buffer</tt></i> :</span></td><td> a pointer to a char array
244</td></tr><tr><td><span class="term"><i class="parameter"><tt>size</tt></i> :</span></td><td> the size of the array
Daniel Veillardd7cec922003-06-13 12:30:10 +0000245</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the new parser context or NULL
Daniel Veillardd4330462003-04-29 12:40:16 +0000246</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
247parse an HTML document (and build a tree if using the standard SAX
248interface).</p><p>
249
William M. Brackc6e07552003-08-16 12:44:47 +0000250</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>ctxt</tt></i> :</span></td><td> an HTML parser context
Daniel Veillardd7cec922003-06-13 12:30:10 +0000251</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0, -1 in case of error. the parser context is augmented
252 as a result of the parsing.
Daniel Veillardd4330462003-04-29 12:40:16 +0000253</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
254 const char *encoding,
255 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
256 void *userData);</pre><p>
257Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
258to handle parse events. If sax is NULL, fallback to the default DOM
259behavior and return a tree.</p><p>
260
William M. Brackc6e07552003-08-16 12:44:47 +0000261</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>cur</tt></i> :</span></td><td> a pointer to an array of xmlChar
262</td></tr><tr><td><span class="term"><i class="parameter"><tt>encoding</tt></i> :</span></td><td> a free form C string describing the HTML document encoding, or NULL
263</td></tr><tr><td><span class="term"><i class="parameter"><tt>sax</tt></i> :</span></td><td> the SAX handler block
264</td></tr><tr><td><span class="term"><i class="parameter"><tt>userData</tt></i> :</span></td><td> if using SAX, this pointer will be provided on callbacks.
Daniel Veillardd7cec922003-06-13 12:30:10 +0000265</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is
266 not well formed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000267</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur,
268 const char *encoding);</pre><p>
269parse an HTML in-memory document and build a tree.</p><p>
270
William M. Brackc6e07552003-08-16 12:44:47 +0000271</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>cur</tt></i> :</span></td><td> a pointer to an array of xmlChar
272</td></tr><tr><td><span class="term"><i class="parameter"><tt>encoding</tt></i> :</span></td><td> a free form C string describing the HTML document encoding, or NULL
Daniel Veillardd7cec922003-06-13 12:30:10 +0000273</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree
Daniel Veillardd4330462003-04-29 12:40:16 +0000274</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char *filename,
275 const char *encoding,
276 <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
277 void *userData);</pre><p>
278parse an HTML file and build a tree. Automatic support for ZLIB/Compress
279compressed document is provided by default if found at compile-time.
280It use the given SAX function block to handle the parsing callback.
281If sax is NULL, fallback to the default DOM tree building routines.</p><p>
282
William M. Brackc6e07552003-08-16 12:44:47 +0000283</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>filename</tt></i> :</span></td><td> the filename
284</td></tr><tr><td><span class="term"><i class="parameter"><tt>encoding</tt></i> :</span></td><td> a free form C string describing the HTML document encoding, or NULL
285</td></tr><tr><td><span class="term"><i class="parameter"><tt>sax</tt></i> :</span></td><td> the SAX handler block
286</td></tr><tr><td><span class="term"><i class="parameter"><tt>userData</tt></i> :</span></td><td> if using SAX, this pointer will be provided on callbacks.
Daniel Veillardd7cec922003-06-13 12:30:10 +0000287</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is
288 not well formed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000289</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char *filename,
290 const char *encoding);</pre><p>
291parse an HTML file and build a tree. Automatic support for ZLIB/Compress
292compressed document is provided by default if found at compile-time.</p><p>
293
William M. Brackc6e07552003-08-16 12:44:47 +0000294</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>filename</tt></i> :</span></td><td> the filename
295</td></tr><tr><td><span class="term"><i class="parameter"><tt>encoding</tt></i> :</span></td><td> a free form C string describing the HTML document encoding, or NULL
Daniel Veillardd7cec922003-06-13 12:30:10 +0000296</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the resulting document tree
Daniel Veillardd4330462003-04-29 12:40:16 +0000297</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char *out,
298 int *outlen,
299 unsigned char *in,
300 int *inlen);</pre><p>
301Take a block of UTF-8 chars in and try to convert it to an ASCII
302plus HTML entities block of chars out.</p><p>
303
William M. Brackc6e07552003-08-16 12:44:47 +0000304</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result
305</td></tr><tr><td><span class="term"><i class="parameter"><tt>outlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>out</tt></i>
306</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars
307</td></tr><tr><td><span class="term"><i class="parameter"><tt>inlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>in</tt></i>
Daniel Veillardd7cec922003-06-13 12:30:10 +0000308</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
William M. Brackc6e07552003-08-16 12:44:47 +0000309The value of <i class="parameter"><tt>inlen</tt></i> after return is the number of octets consumed
Daniel Veillardd7cec922003-06-13 12:30:10 +0000310 as the return value is positive, else unpredictable.
William M. Brackc6e07552003-08-16 12:44:47 +0000311The value of <i class="parameter"><tt>outlen</tt></i> after return is the number of octets consumed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000312</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char *out,
313 int *outlen,
314 unsigned char *in,
315 int *inlen,
316 int quoteChar);</pre><p>
317Take a block of UTF-8 chars in and try to convert it to an ASCII
318plus HTML entities block of chars out.</p><p>
319
William M. Brackc6e07552003-08-16 12:44:47 +0000320</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>out</tt></i> :</span></td><td> a pointer to an array of bytes to store the result
321</td></tr><tr><td><span class="term"><i class="parameter"><tt>outlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>out</tt></i>
322</td></tr><tr><td><span class="term"><i class="parameter"><tt>in</tt></i> :</span></td><td> a pointer to an array of UTF-8 chars
323</td></tr><tr><td><span class="term"><i class="parameter"><tt>inlen</tt></i> :</span></td><td> the length of <i class="parameter"><tt>in</tt></i>
324</td></tr><tr><td><span class="term"><i class="parameter"><tt>quoteChar</tt></i> :</span></td><td> the quote character to escape (' or ") or zero.
Daniel Veillardd7cec922003-06-13 12:30:10 +0000325</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise
William M. Brackc6e07552003-08-16 12:44:47 +0000326The value of <i class="parameter"><tt>inlen</tt></i> after return is the number of octets consumed
Daniel Veillardd7cec922003-06-13 12:30:10 +0000327 as the return value is positive, else unpredictable.
William M. Brackc6e07552003-08-16 12:44:47 +0000328The value of <i class="parameter"><tt>outlen</tt></i> after return is the number of octets consumed.
Daniel Veillardd4330462003-04-29 12:40:16 +0000329</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p>
330Check if an attribute is of content type Script</p><p>
331
William M. Brackc6e07552003-08-16 12:44:47 +0000332</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>name</tt></i> :</span></td><td> an attribute name
Daniel Veillardd7cec922003-06-13 12:30:10 +0000333</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>1 is the attribute is a script 0 otherwise
Daniel Veillardd4330462003-04-29 12:40:16 +0000334</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val);</pre><p>
335Set and return the previous value for handling HTML omitted tags.</p><p>
336
William M. Brackc6e07552003-08-16 12:44:47 +0000337</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>val</tt></i> :</span></td><td> int 0 or 1
Daniel Veillardd7cec922003-06-13 12:30:10 +0000338</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the last value for 0 for no handling, 1 for auto insertion.
Daniel Veillardd4330462003-04-29 12:40:16 +0000339</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p>
340Free all the memory used by a parser context. However the parsed
341document in ctxt-&gt;myDoc is not freed.</p><p>
342
William M. Brackc6e07552003-08-16 12:44:47 +0000343</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>ctxt</tt></i> :</span></td><td> an HTML parser context
Daniel Veillardd4330462003-04-29 12:40:16 +0000344</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax,
345 void *user_data,
346 const char *chunk,
347 int size,
348 const char *filename,
349 <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p>
350Create a parser context for using the HTML parser in push mode
William M. Brackc6e07552003-08-16 12:44:47 +0000351The value of <i class="parameter"><tt>filename</tt></i> is used for fetching external entities
Daniel Veillardd4330462003-04-29 12:40:16 +0000352and error/warning reports.</p><p>
353
William M. Brackc6e07552003-08-16 12:44:47 +0000354</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>sax</tt></i> :</span></td><td> a SAX handler
355</td></tr><tr><td><span class="term"><i class="parameter"><tt>user_data</tt></i> :</span></td><td> The user data returned on SAX callbacks
356</td></tr><tr><td><span class="term"><i class="parameter"><tt>chunk</tt></i> :</span></td><td> a pointer to an array of chars
357</td></tr><tr><td><span class="term"><i class="parameter"><tt>size</tt></i> :</span></td><td> number of chars in the array
358</td></tr><tr><td><span class="term"><i class="parameter"><tt>filename</tt></i> :</span></td><td> an optional file name or URI
359</td></tr><tr><td><span class="term"><i class="parameter"><tt>enc</tt></i> :</span></td><td> an optional encoding
Daniel Veillardd7cec922003-06-13 12:30:10 +0000360</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>the new parser context or NULL
Daniel Veillardd4330462003-04-29 12:40:16 +0000361</td></tr></tbody></table></div></div><hr xmlns="http://www.w3.org/TR/xhtml1/transitional"></hr><div class="refsect2" lang="en"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt,
362 const char *chunk,
363 int size,
364 int terminate);</pre><p>
365Parse a Chunk of memory</p><p>
366
William M. Brackc6e07552003-08-16 12:44:47 +0000367</p><div class="variablelist"><table border="0"><col align="left" valign="top"><tbody><tr><td><span class="term"><i class="parameter"><tt>ctxt</tt></i> :</span></td><td> an XML parser context
368</td></tr><tr><td><span class="term"><i class="parameter"><tt>chunk</tt></i> :</span></td><td> an char array
369</td></tr><tr><td><span class="term"><i class="parameter"><tt>size</tt></i> :</span></td><td> the size in byte of the chunk
370</td></tr><tr><td><span class="term"><i class="parameter"><tt>terminate</tt></i> :</span></td><td> last chunk indicator
Daniel Veillardd7cec922003-06-13 12:30:10 +0000371</td></tr><tr><td><span class="term"><span class="emphasis"><em>Returns</em></span> :</span></td><td>zero if no error, the xmlParserErrors otherwise.
William M. Brackc6e07552003-08-16 12:44:47 +0000372</td></tr></tbody></table></div></div></div></div><table xmlns="http://www.w3.org/TR/xhtml1/transitional" class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-entities.html"><b>&lt;&lt; entities</b></a></td><td align="right"><a accesskey="n" href="libxml-valid.html"><b>valid &gt;&gt;</b></a></td></tr></table></body></html>