William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 1 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>HTMLparser</title><meta name="generator" content="DocBook XSL Stylesheets V1.48"><meta name="generator" content="GTK-Doc V1.1 (XML mode)"><style type="text/css"> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 2 | .synopsis, .classsynopsis { |
| 3 | background: #eeeeee; |
| 4 | border: solid 1px #aaaaaa; |
| 5 | padding: 0.5em; |
| 6 | } |
| 7 | .programlisting { |
| 8 | background: #eeeeff; |
| 9 | border: solid 1px #aaaaff; |
| 10 | padding: 0.5em; |
| 11 | } |
| 12 | .variablelist { |
| 13 | padding: 4px; |
| 14 | margin-left: 3em; |
| 15 | } |
| 16 | .navigation { |
| 17 | background: #ffeeee; |
| 18 | border: solid 1px #ffaaaa; |
| 19 | margin-top: 0.5em; |
| 20 | margin-bottom: 0.5em; |
| 21 | } |
| 22 | .navigation a { |
| 23 | color: #770000; |
| 24 | } |
| 25 | .navigation a:visited { |
| 26 | color: #550000; |
| 27 | } |
| 28 | .navigation .title { |
| 29 | font-size: 200%; |
| 30 | } |
William M. Brack | 60f394e | 2003-11-16 06:25:42 +0000 | [diff] [blame] | 31 | </style><link rel="home" href="index.html" title="Gnome XML Library Reference Manual"><link rel="up" href="ch01.html" title="Libxml Programming Notes"><link rel="previous" href="libxml-entities.html" title="entities"><link rel="next" href="libxml-valid.html" title="valid"></head><body text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><table class="navigation" width="100%" summary="Navigation header" cellpadding="2" cellspacing="2"><tr valign="middle"><td><a accesskey="p" href="libxml-entities.html"><img src="left.png" width="24" height="24" border="0" alt="Prev"></a></td><td><a accesskey="u" href="ch01.html"><img src="up.png" width="24" height="24" border="0" alt="Up"></a></td><td><a accesskey="h" href="index.html"><img src="home.png" width="24" height="24" border="0" alt="Home"></a></td><th width="100%" align="center">Gnome XML Library Reference Manual</th><td><a accesskey="n" href="libxml-valid.html"><img src="right.png" width="24" height="24" border="0" alt="Next"></a></td></tr></table><div class="refentry"><a name="libxml-HTMLparser"></a><div class="titlepage"></div><div class="refnamediv"><h2>HTMLparser</h2><p>HTMLparser — </p></div><div class="refsynopsisdiv"><h2><h1 class="title"><a name="id2762484"></a>Synopsis</h1></h2><pre class="synopsis"> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 32 | |
| 33 | |
| 34 | |
| 35 | typedef <a href="libxml-HTMLparser.html#htmlParserCtxt">htmlParserCtxt</a>; |
| 36 | typedef <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a>; |
| 37 | typedef <a href="libxml-HTMLparser.html#htmlParserNodeInfo">htmlParserNodeInfo</a>; |
| 38 | typedef <a href="libxml-HTMLparser.html#htmlSAXHandler">htmlSAXHandler</a>; |
| 39 | typedef <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a>; |
| 40 | typedef <a href="libxml-HTMLparser.html#htmlParserInput">htmlParserInput</a>; |
| 41 | typedef <a href="libxml-HTMLparser.html#htmlParserInputPtr">htmlParserInputPtr</a>; |
| 42 | typedef <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a>; |
| 43 | typedef <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a>; |
| 44 | struct <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>; |
| 45 | typedef <a href="libxml-HTMLparser.html#htmlElemDescPtr">htmlElemDescPtr</a>; |
| 46 | struct <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>; |
| 47 | typedef <a href="libxml-HTMLparser.html#htmlEntityDescPtr">htmlEntityDescPtr</a>; |
William M. Brack | 60f394e | 2003-11-16 06:25:42 +0000 | [diff] [blame] | 48 | const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* <a href="libxml-HTMLparser.html#htmlTagLookup">htmlTagLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag); |
| 49 | const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityLookup">htmlEntityLookup</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name); |
| 50 | const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlEntityValueLookup">htmlEntityValueLookup</a> (unsigned int value); |
| 51 | int <a href="libxml-HTMLparser.html#htmlIsAutoClosed">htmlIsAutoClosed</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 52 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem); |
| 53 | int <a href="libxml-HTMLparser.html#htmlAutoCloseTag">htmlAutoCloseTag</a> (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 54 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name, |
| 55 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem); |
| 56 | const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* <a href="libxml-HTMLparser.html#htmlParseEntityRef">htmlParseEntityRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 57 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> **str); |
| 58 | int <a href="libxml-HTMLparser.html#htmlParseCharRef">htmlParseCharRef</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 59 | void <a href="libxml-HTMLparser.html#htmlParseElement">htmlParseElement</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 60 | <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreateMemoryParserCtxt">htmlCreateMemoryParserCtxt</a> |
| 61 | (const char *buffer, |
| 62 | int size); |
| 63 | int <a href="libxml-HTMLparser.html#htmlParseDocument">htmlParseDocument</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 64 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseDoc">htmlSAXParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 65 | const char *encoding, |
| 66 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 67 | void *userData); |
| 68 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseDoc">htmlParseDoc</a> (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 69 | const char *encoding); |
| 70 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlSAXParseFile">htmlSAXParseFile</a> (const char *filename, |
| 71 | const char *encoding, |
| 72 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 73 | void *userData); |
| 74 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlParseFile">htmlParseFile</a> (const char *filename, |
| 75 | const char *encoding); |
| 76 | int <a href="libxml-HTMLparser.html#UTF8ToHtml">UTF8ToHtml</a> (unsigned char *out, |
| 77 | int *outlen, |
| 78 | unsigned char *in, |
| 79 | int *inlen); |
| 80 | int <a href="libxml-HTMLparser.html#htmlEncodeEntities">htmlEncodeEntities</a> (unsigned char *out, |
| 81 | int *outlen, |
| 82 | unsigned char *in, |
| 83 | int *inlen, |
| 84 | int quoteChar); |
| 85 | int <a href="libxml-HTMLparser.html#htmlIsScriptAttribute">htmlIsScriptAttribute</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name); |
| 86 | int <a href="libxml-HTMLparser.html#htmlHandleOmittedElem">htmlHandleOmittedElem</a> (int val); |
| 87 | <a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> <a href="libxml-HTMLparser.html#htmlCreatePushParserCtxt">htmlCreatePushParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 88 | void *user_data, |
| 89 | const char *chunk, |
| 90 | int size, |
| 91 | const char *filename, |
| 92 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc); |
| 93 | int <a href="libxml-HTMLparser.html#htmlParseChunk">htmlParseChunk</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 94 | const char *chunk, |
| 95 | int size, |
| 96 | int terminate); |
| 97 | void <a href="libxml-HTMLparser.html#htmlFreeParserCtxt">htmlFreeParserCtxt</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 98 | enum <a href="libxml-HTMLparser.html#htmlParserOption">htmlParserOption</a>; |
William M. Brack | 60f394e | 2003-11-16 06:25:42 +0000 | [diff] [blame] | 99 | void <a href="libxml-HTMLparser.html#htmlCtxtReset">htmlCtxtReset</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt); |
| 100 | int <a href="libxml-HTMLparser.html#htmlCtxtUseOptions">htmlCtxtUseOptions</a> (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 101 | int options); |
| 102 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadDoc">htmlReadDoc</a> (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 103 | const char *URL, |
| 104 | const char *encoding, |
| 105 | int options); |
| 106 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadFile">htmlReadFile</a> (const char *URL, |
| 107 | const char *encoding, |
| 108 | int options); |
| 109 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadMemory">htmlReadMemory</a> (const char *buffer, |
| 110 | int size, |
| 111 | const char *URL, |
| 112 | const char *encoding, |
| 113 | int options); |
| 114 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadFd">htmlReadFd</a> (int fd, |
| 115 | const char *URL, |
| 116 | const char *encoding, |
| 117 | int options); |
| 118 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlReadIO">htmlReadIO</a> (<a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, |
| 119 | <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, |
| 120 | void *ioctx, |
| 121 | const char *URL, |
| 122 | const char *encoding, |
| 123 | int options); |
| 124 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadDoc">htmlCtxtReadDoc</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 125 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 126 | const char *URL, |
| 127 | const char *encoding, |
| 128 | int options); |
| 129 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadFile">htmlCtxtReadFile</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 130 | const char *filename, |
| 131 | const char *encoding, |
| 132 | int options); |
| 133 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadMemory">htmlCtxtReadMemory</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 134 | const char *buffer, |
| 135 | int size, |
| 136 | const char *URL, |
| 137 | const char *encoding, |
| 138 | int options); |
| 139 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadFd">htmlCtxtReadFd</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 140 | int fd, |
| 141 | const char *URL, |
| 142 | const char *encoding, |
| 143 | int options); |
| 144 | <a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> <a href="libxml-HTMLparser.html#htmlCtxtReadIO">htmlCtxtReadIO</a> (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 145 | <a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, |
| 146 | <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, |
| 147 | void *ioctx, |
| 148 | const char *URL, |
| 149 | const char *encoding, |
| 150 | int options); |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 151 | </pre></div><div class="refsect1"><h2>Description</h2><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 152 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 153 | </p></div><div class="refsect1"><h2>Details</h2><div class="refsect2"><h3><a name="htmlParserCtxt"></a>htmlParserCtxt</h3><pre class="programlisting">typedef xmlParserCtxt htmlParserCtxt; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 154 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 155 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 156 | </p></div><hr><div class="refsect2"><h3><a name="htmlParserCtxtPtr"></a>htmlParserCtxtPtr</h3><pre class="programlisting">typedef xmlParserCtxtPtr htmlParserCtxtPtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 157 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 158 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 159 | </p></div><hr><div class="refsect2"><h3><a name="htmlParserNodeInfo"></a>htmlParserNodeInfo</h3><pre class="programlisting">typedef xmlParserNodeInfo htmlParserNodeInfo; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 160 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 161 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 162 | </p></div><hr><div class="refsect2"><h3><a name="htmlSAXHandler"></a>htmlSAXHandler</h3><pre class="programlisting">typedef xmlSAXHandler htmlSAXHandler; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 163 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 164 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 165 | </p></div><hr><div class="refsect2"><h3><a name="htmlSAXHandlerPtr"></a>htmlSAXHandlerPtr</h3><pre class="programlisting">typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 166 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 167 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 168 | </p></div><hr><div class="refsect2"><h3><a name="htmlParserInput"></a>htmlParserInput</h3><pre class="programlisting">typedef xmlParserInput htmlParserInput; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 169 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 170 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 171 | </p></div><hr><div class="refsect2"><h3><a name="htmlParserInputPtr"></a>htmlParserInputPtr</h3><pre class="programlisting">typedef xmlParserInputPtr htmlParserInputPtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 172 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 173 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 174 | </p></div><hr><div class="refsect2"><h3><a name="htmlDocPtr"></a>htmlDocPtr</h3><pre class="programlisting">typedef xmlDocPtr htmlDocPtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 175 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 176 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 177 | </p></div><hr><div class="refsect2"><h3><a name="htmlNodePtr"></a>htmlNodePtr</h3><pre class="programlisting">typedef xmlNodePtr htmlNodePtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 178 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 179 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 180 | </p></div><hr><div class="refsect2"><h3><a name="htmlElemDesc"></a>struct htmlElemDesc</h3><pre class="programlisting">struct htmlElemDesc { |
| 181 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 182 | const char *name; /* The tag name */ |
| 183 | char startTag; /* Whether the start tag can be implied */ |
| 184 | char endTag; /* Whether the end tag can be implied */ |
| 185 | char saveEndTag; /* Whether the end tag should be saved */ |
| 186 | char empty; /* Is this an empty element ? */ |
| 187 | char depr; /* Is this a deprecated element ? */ |
| 188 | char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ |
| 189 | char isinline; /* is this a block 0 or inline 1 element */ |
| 190 | const char *desc; /* the description */ |
| 191 | |
| 192 | /* NRK Jan.2003 |
| 193 | * New fields encapsulating HTML structure |
| 194 | * |
| 195 | * Bugs: |
| 196 | * This is a very limited representation. It fails to tell us when |
| 197 | * an element *requires* subelements (we only have whether they're |
| 198 | * allowed or not), and it doesn't tell us where CDATA and PCDATA |
| 199 | * are allowed. Some element relationships are not fully represented: |
| 200 | * these are flagged with the word MODIFIER |
| 201 | */ |
| 202 | const char** subelts; /* allowed sub-elements of this element */ |
| 203 | const char* defaultsubelt; /* subelement for suggested auto-repair |
| 204 | if necessary or NULL */ |
| 205 | const char** attrs_opt; /* Optional Attributes */ |
| 206 | const char** attrs_depr; /* Additional deprecated attributes */ |
| 207 | const char** attrs_req; /* Required attributes */ |
| 208 | }; |
| 209 | </pre><p> |
| 210 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 211 | </p></div><hr><div class="refsect2"><h3><a name="htmlElemDescPtr"></a>htmlElemDescPtr</h3><pre class="programlisting">typedef htmlElemDesc *htmlElemDescPtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 212 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 213 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 214 | </p></div><hr><div class="refsect2"><h3><a name="htmlEntityDesc"></a>struct htmlEntityDesc</h3><pre class="programlisting">struct htmlEntityDesc { |
| 215 | |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 216 | unsigned int value; /* the UNICODE value for the character */ |
| 217 | const char *name; /* The entity name */ |
| 218 | const char *desc; /* the description */ |
| 219 | }; |
| 220 | </pre><p> |
| 221 | |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 222 | </p></div><hr><div class="refsect2"><h3><a name="htmlEntityDescPtr"></a>htmlEntityDescPtr</h3><pre class="programlisting">typedef htmlEntityDesc *htmlEntityDescPtr; |
Daniel Veillard | d7cec92 | 2003-06-13 12:30:10 +0000 | [diff] [blame] | 223 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 224 | |
William M. Brack | 60f394e | 2003-11-16 06:25:42 +0000 | [diff] [blame] | 225 | </p></div><hr><div class="refsect2"><h3><a name="htmlTagLookup"></a>htmlTagLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlElemDesc">htmlElemDesc</a>* htmlTagLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *tag);</pre><p> |
| 226 | Lookup the HTML tag in the ElementTable</p><p> |
| 227 | |
| 228 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>tag</tt></i>:</span></td><td> The tag name in lowercase |
| 229 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the related htmlElemDescPtr or NULL if not found. |
| 230 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlEntityLookup"></a>htmlEntityLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityLookup (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p> |
| 231 | Lookup the given entity in EntitiesTable |
| 232 | </p><p> |
| 233 | TODO: the linear scan is really ugly, an hash table is really needed.</p><p> |
| 234 | |
| 235 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td> the entity name |
| 236 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise. |
| 237 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlEntityValueLookup"></a>htmlEntityValueLookup ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlEntityValueLookup (unsigned int value);</pre><p> |
| 238 | Lookup the given entity in EntitiesTable |
| 239 | </p><p> |
| 240 | TODO: the linear scan is really ugly, an hash table is really needed.</p><p> |
| 241 | |
| 242 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>value</tt></i>:</span></td><td> the entity's unicode value |
| 243 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the associated htmlEntityDescPtr if found, NULL otherwise. |
| 244 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlIsAutoClosed"></a>htmlIsAutoClosed ()</h3><pre class="programlisting">int htmlIsAutoClosed (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 245 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p> |
| 246 | The HTML DTD allows a tag to implicitly close other tags. |
| 247 | The list is kept in htmlStartClose array. This function checks |
| 248 | if a tag is autoclosed by one of it's child</p><p> |
| 249 | |
| 250 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>:</span></td><td> the HTML document |
| 251 | </td></tr><tr><td><span class="term"><i><tt>elem</tt></i>:</span></td><td> the HTML element |
| 252 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>1 if autoclosed, 0 otherwise |
| 253 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlAutoCloseTag"></a>htmlAutoCloseTag ()</h3><pre class="programlisting">int htmlAutoCloseTag (<a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> doc, |
| 254 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name, |
| 255 | <a href="libxml-HTMLparser.html#htmlNodePtr">htmlNodePtr</a> elem);</pre><p> |
| 256 | The HTML DTD allows a tag to implicitly close other tags. |
| 257 | The list is kept in htmlStartClose array. This function checks |
| 258 | if the element or one of it's children would autoclose the |
| 259 | given tag.</p><p> |
| 260 | |
| 261 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>doc</tt></i>:</span></td><td> the HTML document |
| 262 | </td></tr><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td> The tag name |
| 263 | </td></tr><tr><td><span class="term"><i><tt>elem</tt></i>:</span></td><td> the HTML element |
| 264 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>1 if autoclose, 0 otherwise |
| 265 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseEntityRef"></a>htmlParseEntityRef ()</h3><pre class="programlisting">const <a href="libxml-HTMLparser.html#htmlEntityDesc">htmlEntityDesc</a>* htmlParseEntityRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 266 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> **str);</pre><p> |
| 267 | parse an HTML ENTITY references |
| 268 | </p><p> |
| 269 | [68] EntityRef ::= '&' Name ';'</p><p> |
| 270 | |
| 271 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 272 | </td></tr><tr><td><span class="term"><i><tt>str</tt></i>:</span></td><td> location to store the entity name |
| 273 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the associated htmlEntityDescPtr if found, or NULL otherwise, |
| 274 | if non-NULL *str will have to be freed by the caller. |
| 275 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseCharRef"></a>htmlParseCharRef ()</h3><pre class="programlisting">int htmlParseCharRef (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 276 | parse Reference declarations |
| 277 | </p><p> |
| 278 | [66] CharRef ::= '&#' [0-9]+ ';' | |
| 279 | '&<GTKDOCLINK HREF="x">x</GTKDOCLINK>' [0-9a-fA-F]+ ';'</p><p> |
| 280 | |
| 281 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 282 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the value parsed (as an int) |
| 283 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseElement"></a>htmlParseElement ()</h3><pre class="programlisting">void htmlParseElement (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 284 | parse an HTML element, this is highly recursive |
| 285 | </p><p> |
| 286 | [39] element ::= EmptyElemTag | STag content ETag |
| 287 | </p><p> |
| 288 | [41] Attribute ::= Name Eq AttValue</p><p> |
| 289 | |
| 290 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 291 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCreateMemoryParserCtxt"></a>htmlCreateMemoryParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreateMemoryParserCtxt |
| 292 | (const char *buffer, |
| 293 | int size);</pre><p> |
| 294 | Create a parser context for an HTML in-memory document.</p><p> |
| 295 | |
| 296 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td> a pointer to a char array |
| 297 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size of the array |
| 298 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the new parser context or NULL |
| 299 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseDocument"></a>htmlParseDocument ()</h3><pre class="programlisting">int htmlParseDocument (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 300 | parse an HTML document (and build a tree if using the standard SAX |
| 301 | interface).</p><p> |
| 302 | |
| 303 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 304 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0, -1 in case of error. the parser context is augmented |
| 305 | as a result of the parsing. |
| 306 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlSAXParseDoc"></a>htmlSAXParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 307 | const char *encoding, |
| 308 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 309 | void *userData);</pre><p> |
| 310 | Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks |
| 311 | to handle parse events. If sax is NULL, fallback to the default DOM |
| 312 | behavior and return a tree.</p><p> |
| 313 | |
| 314 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to an array of xmlChar |
| 315 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL |
| 316 | </td></tr><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td> the SAX handler block |
| 317 | </td></tr><tr><td><span class="term"><i><tt>userData</tt></i>:</span></td><td> if using SAX, this pointer will be provided on callbacks. |
| 318 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is |
| 319 | not well formed. |
| 320 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseDoc"></a>htmlParseDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseDoc (<a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 321 | const char *encoding);</pre><p> |
| 322 | parse an HTML in-memory document and build a tree.</p><p> |
| 323 | |
| 324 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to an array of xmlChar |
| 325 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL |
| 326 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 327 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlSAXParseFile"></a>htmlSAXParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlSAXParseFile (const char *filename, |
| 328 | const char *encoding, |
| 329 | <a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 330 | void *userData);</pre><p> |
| 331 | parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
| 332 | compressed document is provided by default if found at compile-time. |
| 333 | It use the given SAX function block to handle the parsing callback. |
| 334 | If sax is NULL, fallback to the default DOM tree building routines.</p><p> |
| 335 | |
| 336 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> the filename |
| 337 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL |
| 338 | </td></tr><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td> the SAX handler block |
| 339 | </td></tr><tr><td><span class="term"><i><tt>userData</tt></i>:</span></td><td> if using SAX, this pointer will be provided on callbacks. |
| 340 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree unless SAX is NULL or the document is |
| 341 | not well formed. |
| 342 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseFile"></a>htmlParseFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlParseFile (const char *filename, |
| 343 | const char *encoding);</pre><p> |
| 344 | parse an HTML file and build a tree. Automatic support for ZLIB/Compress |
| 345 | compressed document is provided by default if found at compile-time.</p><p> |
| 346 | |
| 347 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> the filename |
| 348 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> a free form C string describing the HTML document encoding, or NULL |
| 349 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 350 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="UTF8ToHtml"></a>UTF8ToHtml ()</h3><pre class="programlisting">int UTF8ToHtml (unsigned char *out, |
| 351 | int *outlen, |
| 352 | unsigned char *in, |
| 353 | int *inlen);</pre><p> |
| 354 | Take a block of UTF-8 chars in and try to convert it to an ASCII |
| 355 | plus HTML entities block of chars out.</p><p> |
| 356 | |
| 357 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>out</tt></i>:</span></td><td> a pointer to an array of bytes to store the result |
| 358 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>:</span></td><td> the length of <i><tt>out</tt></i> |
| 359 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i>:</span></td><td> a pointer to an array of UTF-8 chars |
| 360 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>:</span></td><td> the length of <i><tt>in</tt></i> |
| 361 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise |
| 362 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
| 363 | as the return value is positive, else unpredictable. |
| 364 | The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. |
| 365 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlEncodeEntities"></a>htmlEncodeEntities ()</h3><pre class="programlisting">int htmlEncodeEntities (unsigned char *out, |
| 366 | int *outlen, |
| 367 | unsigned char *in, |
| 368 | int *inlen, |
| 369 | int quoteChar);</pre><p> |
| 370 | Take a block of UTF-8 chars in and try to convert it to an ASCII |
| 371 | plus HTML entities block of chars out.</p><p> |
| 372 | |
| 373 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>out</tt></i>:</span></td><td> a pointer to an array of bytes to store the result |
| 374 | </td></tr><tr><td><span class="term"><i><tt>outlen</tt></i>:</span></td><td> the length of <i><tt>out</tt></i> |
| 375 | </td></tr><tr><td><span class="term"><i><tt>in</tt></i>:</span></td><td> a pointer to an array of UTF-8 chars |
| 376 | </td></tr><tr><td><span class="term"><i><tt>inlen</tt></i>:</span></td><td> the length of <i><tt>in</tt></i> |
| 377 | </td></tr><tr><td><span class="term"><i><tt>quoteChar</tt></i>:</span></td><td> the quote character to escape (' or ") or zero. |
| 378 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0 if success, -2 if the transcoding fails, or -1 otherwise |
| 379 | The value of <i><tt>inlen</tt></i> after return is the number of octets consumed |
| 380 | as the return value is positive, else unpredictable. |
| 381 | The value of <i><tt>outlen</tt></i> after return is the number of octets consumed. |
| 382 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlIsScriptAttribute"></a>htmlIsScriptAttribute ()</h3><pre class="programlisting">int htmlIsScriptAttribute (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *name);</pre><p> |
| 383 | Check if an attribute is of content type Script</p><p> |
| 384 | |
| 385 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>name</tt></i>:</span></td><td> an attribute name |
| 386 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>1 is the attribute is a script 0 otherwise |
| 387 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlHandleOmittedElem"></a>htmlHandleOmittedElem ()</h3><pre class="programlisting">int htmlHandleOmittedElem (int val);</pre><p> |
| 388 | Set and return the previous value for handling HTML omitted tags.</p><p> |
| 389 | |
| 390 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>val</tt></i>:</span></td><td> int 0 or 1 |
| 391 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the last value for 0 for no handling, 1 for auto insertion. |
| 392 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCreatePushParserCtxt"></a>htmlCreatePushParserCtxt ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> htmlCreatePushParserCtxt (<a href="libxml-HTMLparser.html#htmlSAXHandlerPtr">htmlSAXHandlerPtr</a> sax, |
| 393 | void *user_data, |
| 394 | const char *chunk, |
| 395 | int size, |
| 396 | const char *filename, |
| 397 | <a href="libxml-encoding.html#xmlCharEncoding">xmlCharEncoding</a> enc);</pre><p> |
| 398 | Create a parser context for using the HTML parser in push mode |
| 399 | The value of <i><tt>filename</tt></i> is used for fetching external entities |
| 400 | and error/warning reports.</p><p> |
| 401 | |
| 402 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>sax</tt></i>:</span></td><td> a SAX handler |
| 403 | </td></tr><tr><td><span class="term"><i><tt>user_data</tt></i>:</span></td><td> The user data returned on SAX callbacks |
| 404 | </td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>:</span></td><td> a pointer to an array of chars |
| 405 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> number of chars in the array |
| 406 | </td></tr><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> an optional file name or URI |
| 407 | </td></tr><tr><td><span class="term"><i><tt>enc</tt></i>:</span></td><td> an optional encoding |
| 408 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the new parser context or NULL |
| 409 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParseChunk"></a>htmlParseChunk ()</h3><pre class="programlisting">int htmlParseChunk (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 410 | const char *chunk, |
| 411 | int size, |
| 412 | int terminate);</pre><p> |
| 413 | Parse a Chunk of memory</p><p> |
| 414 | |
| 415 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 416 | </td></tr><tr><td><span class="term"><i><tt>chunk</tt></i>:</span></td><td> an char array |
| 417 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size in byte of the chunk |
| 418 | </td></tr><tr><td><span class="term"><i><tt>terminate</tt></i>:</span></td><td> last chunk indicator |
| 419 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>zero if no error, the xmlParserErrors otherwise. |
| 420 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlFreeParserCtxt"></a>htmlFreeParserCtxt ()</h3><pre class="programlisting">void htmlFreeParserCtxt (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 421 | Free all the memory used by a parser context. However the parsed |
| 422 | document in ctxt->myDoc is not freed.</p><p> |
| 423 | |
| 424 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 425 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlParserOption"></a>enum htmlParserOption</h3><pre class="programlisting">typedef enum { |
William M. Brack | 2ad1dff | 2003-11-15 10:35:20 +0000 | [diff] [blame] | 426 | HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ |
| 427 | HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ |
| 428 | HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ |
| 429 | HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ |
| 430 | HTML_PARSE_NONET = 1<<11 /* Forbid network access */ |
| 431 | } htmlParserOption; |
| 432 | </pre><p> |
Daniel Veillard | d433046 | 2003-04-29 12:40:16 +0000 | [diff] [blame] | 433 | |
William M. Brack | 60f394e | 2003-11-16 06:25:42 +0000 | [diff] [blame] | 434 | </p></div><hr><div class="refsect2"><h3><a name="htmlCtxtReset"></a>htmlCtxtReset ()</h3><pre class="programlisting">void htmlCtxtReset (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt);</pre><p> |
| 435 | Reset a parser context</p><p> |
| 436 | |
| 437 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 438 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtUseOptions"></a>htmlCtxtUseOptions ()</h3><pre class="programlisting">int htmlCtxtUseOptions (<a href="libxml-HTMLparser.html#htmlParserCtxtPtr">htmlParserCtxtPtr</a> ctxt, |
| 439 | int options);</pre><p> |
| 440 | Applies the options to the parser context</p><p> |
| 441 | |
| 442 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 443 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 444 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>0 in case of success, the set of unknown or unimplemented options |
| 445 | in case of error. |
| 446 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadDoc"></a>htmlReadDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadDoc (const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 447 | const char *URL, |
| 448 | const char *encoding, |
| 449 | int options);</pre><p> |
| 450 | parse an XML in-memory document and build a tree.</p><p> |
| 451 | |
| 452 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to a zero terminated string |
| 453 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 454 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 455 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 456 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 457 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadFile"></a>htmlReadFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFile (const char *URL, |
| 458 | const char *encoding, |
| 459 | int options);</pre><p> |
| 460 | parse an XML file from the filesystem or the network.</p><p> |
| 461 | |
| 462 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> |
| 463 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 464 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 465 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 466 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadMemory"></a>htmlReadMemory ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadMemory (const char *buffer, |
| 467 | int size, |
| 468 | const char *URL, |
| 469 | const char *encoding, |
| 470 | int options);</pre><p> |
| 471 | parse an XML in-memory document and build a tree.</p><p> |
| 472 | |
| 473 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td> a pointer to a char array |
| 474 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size of the array |
| 475 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 476 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 477 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 478 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 479 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadFd"></a>htmlReadFd ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadFd (int fd, |
| 480 | const char *URL, |
| 481 | const char *encoding, |
| 482 | int options);</pre><p> |
| 483 | parse an XML from a file descriptor and build a tree.</p><p> |
| 484 | |
| 485 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>fd</tt></i>:</span></td><td> an open file descriptor |
| 486 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 487 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 488 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 489 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 490 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlReadIO"></a>htmlReadIO ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlReadIO (<a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, |
| 491 | <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, |
| 492 | void *ioctx, |
| 493 | const char *URL, |
| 494 | const char *encoding, |
| 495 | int options);</pre><p> |
| 496 | parse an HTML document from I/O functions and source and build a tree.</p><p> |
| 497 | |
| 498 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ioread</tt></i>:</span></td><td> an I/O read function |
| 499 | </td></tr><tr><td><span class="term"><i><tt>ioclose</tt></i>:</span></td><td> an I/O close function |
| 500 | </td></tr><tr><td><span class="term"><i><tt>ioctx</tt></i>:</span></td><td> an I/O handler |
| 501 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 502 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 503 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 504 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 505 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadDoc"></a>htmlCtxtReadDoc ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadDoc (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 506 | const <a href="libxml-tree.html#xmlChar">xmlChar</a> *cur, |
| 507 | const char *URL, |
| 508 | const char *encoding, |
| 509 | int options);</pre><p> |
| 510 | parse an XML in-memory document and build a tree. |
| 511 | This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p> |
| 512 | |
| 513 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 514 | </td></tr><tr><td><span class="term"><i><tt>cur</tt></i>:</span></td><td> a pointer to a zero terminated string |
| 515 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 516 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 517 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 518 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 519 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadFile"></a>htmlCtxtReadFile ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFile (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 520 | const char *filename, |
| 521 | const char *encoding, |
| 522 | int options);</pre><p> |
| 523 | parse an XML file from the filesystem or the network. |
| 524 | This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p> |
| 525 | |
| 526 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 527 | </td></tr><tr><td><span class="term"><i><tt>filename</tt></i>:</span></td><td> a file or URL |
| 528 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 529 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 530 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 531 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadMemory"></a>htmlCtxtReadMemory ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadMemory (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 532 | const char *buffer, |
| 533 | int size, |
| 534 | const char *URL, |
| 535 | const char *encoding, |
| 536 | int options);</pre><p> |
| 537 | parse an XML in-memory document and build a tree. |
| 538 | This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p> |
| 539 | |
| 540 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 541 | </td></tr><tr><td><span class="term"><i><tt>buffer</tt></i>:</span></td><td> a pointer to a char array |
| 542 | </td></tr><tr><td><span class="term"><i><tt>size</tt></i>:</span></td><td> the size of the array |
| 543 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 544 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 545 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 546 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 547 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadFd"></a>htmlCtxtReadFd ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadFd (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 548 | int fd, |
| 549 | const char *URL, |
| 550 | const char *encoding, |
| 551 | int options);</pre><p> |
| 552 | parse an XML from a file descriptor and build a tree. |
| 553 | This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p> |
| 554 | |
| 555 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 556 | </td></tr><tr><td><span class="term"><i><tt>fd</tt></i>:</span></td><td> an open file descriptor |
| 557 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 558 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 559 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 560 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 561 | </td></tr></tbody></table></div></div><hr><div class="refsect2"><h3><a name="htmlCtxtReadIO"></a>htmlCtxtReadIO ()</h3><pre class="programlisting"><a href="libxml-HTMLparser.html#htmlDocPtr">htmlDocPtr</a> htmlCtxtReadIO (<a href="libxml-tree.html#xmlParserCtxtPtr">xmlParserCtxtPtr</a> ctxt, |
| 562 | <a href="libxml-xmlIO.html#xmlInputReadCallback">xmlInputReadCallback</a> ioread, |
| 563 | <a href="libxml-xmlIO.html#xmlInputCloseCallback">xmlInputCloseCallback</a> ioclose, |
| 564 | void *ioctx, |
| 565 | const char *URL, |
| 566 | const char *encoding, |
| 567 | int options);</pre><p> |
| 568 | parse an HTML document from I/O functions and source and build a tree. |
| 569 | This reuses the existing <i><tt>ctxt</tt></i> parser context</p><p> |
| 570 | |
| 571 | </p><div class="variablelist"><table border="0"><col align="left"><tbody><tr><td><span class="term"><i><tt>ctxt</tt></i>:</span></td><td> an HTML parser context |
| 572 | </td></tr><tr><td><span class="term"><i><tt>ioread</tt></i>:</span></td><td> an I/O read function |
| 573 | </td></tr><tr><td><span class="term"><i><tt>ioclose</tt></i>:</span></td><td> an I/O close function |
| 574 | </td></tr><tr><td><span class="term"><i><tt>ioctx</tt></i>:</span></td><td> an I/O handler |
| 575 | </td></tr><tr><td><span class="term"><i><tt>URL</tt></i>:</span></td><td> the base URL to use for the document |
| 576 | </td></tr><tr><td><span class="term"><i><tt>encoding</tt></i>:</span></td><td> the document encoding, or NULL |
| 577 | </td></tr><tr><td><span class="term"><i><tt>options</tt></i>:</span></td><td> a combination of htmlParserOption(s) |
| 578 | </td></tr><tr><td><span class="term"><span class="emphasis"><i>Returns</i></span> :</span></td><td>the resulting document tree |
| 579 | </td></tr></tbody></table></div></div></div></div><table class="navigation" width="100%" summary="Navigation footer" cellpadding="2" cellspacing="0"><tr valign="middle"><td align="left"><a accesskey="p" href="libxml-entities.html"><b><< entities</b></a></td><td align="right"><a accesskey="n" href="libxml-valid.html"><b>valid >></b></a></td></tr></table></body></html> |