blob: b04e3b099f9e4a8614bb482bcac9911075f54cfd [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __HTML_PARSER_H__
10#define __HTML_PARSER_H__
Daniel Veillard361d8452000-04-03 19:48:13 +000011#include <libxml/parser.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000012
Daniel Veillardf600e251999-12-18 15:32:46 +000013#ifdef __cplusplus
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000014extern "C" {
Daniel Veillardf600e251999-12-18 15:32:46 +000015#endif
16
Daniel Veillard5233ffc1999-07-06 22:25:25 +000017/*
18 * Most of the back-end structures from XML and HTML are shared
19 */
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020typedef xmlParserCtxt htmlParserCtxt;
21typedef xmlParserCtxtPtr htmlParserCtxtPtr;
22typedef xmlParserNodeInfo htmlParserNodeInfo;
23typedef xmlSAXHandler htmlSAXHandler;
24typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
25typedef xmlParserInput htmlParserInput;
26typedef xmlParserInputPtr htmlParserInputPtr;
27typedef xmlDocPtr htmlDocPtr;
28typedef xmlNodePtr htmlNodePtr;
29
Daniel Veillard5233ffc1999-07-06 22:25:25 +000030/*
31 * Internal description of an HTML element
32 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000033typedef struct _htmlElemDesc htmlElemDesc;
34typedef htmlElemDesc *htmlElemDescPtr;
35struct _htmlElemDesc {
Daniel Veillardb96e6431999-08-29 21:02:19 +000036 const char *name; /* The tag name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000037 int startTag; /* Whether the start tag can be implied */
38 int endTag; /* Whether the end tag can be implied */
39 int empty; /* Is this an empty element ? */
40 int depr; /* Is this a deprecated element ? */
41 int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
42 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000043};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000044
45/*
46 * Internal description of an HTML entity
47 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000048typedef struct _htmlEntityDesc htmlEntityDesc;
49typedef htmlEntityDesc *htmlEntityDescPtr;
50struct _htmlEntityDesc {
Daniel Veillard5233ffc1999-07-06 22:25:25 +000051 int value; /* the UNICODE value for the character */
Daniel Veillardb96e6431999-08-29 21:02:19 +000052 const char *name; /* The entity name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000053 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000054};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000055
56/*
57 * There is only few public functions.
58 */
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000059htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
60htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
Daniel Veillard82150d81999-07-07 07:32:15 +000061
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000062int htmlIsAutoClosed(htmlDocPtr doc,
63 htmlNodePtr elem);
64int htmlAutoCloseTag(htmlDocPtr doc,
65 const xmlChar *name,
66 htmlNodePtr elem);
67htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
68 xmlChar **str);
69int htmlParseCharRef(htmlParserCtxtPtr ctxt);
70void htmlParseElement(htmlParserCtxtPtr ctxt);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000071
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000072htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
73 const char *encoding,
74 htmlSAXHandlerPtr sax,
75 void *userData);
76htmlDocPtr htmlParseDoc (xmlChar *cur,
77 const char *encoding);
78htmlDocPtr htmlSAXParseFile(const char *filename,
79 const char *encoding,
80 htmlSAXHandlerPtr sax,
81 void *userData);
82htmlDocPtr htmlParseFile (const char *filename,
83 const char *encoding);
Daniel Veillard32bc74e2000-07-14 14:49:25 +000084int UTF8ToHtml (unsigned char* out,
85 int *outlen,
86 const unsigned char* in,
87 int *inlen);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000088
Daniel Veillard5e5c6231999-12-29 12:49:06 +000089/**
90 * Interfaces for the Push mode
91 */
92void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
93htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
94 void *user_data,
95 const char *chunk,
96 int size,
97 const char *filename,
98 xmlCharEncoding enc);
99int htmlParseChunk (htmlParserCtxtPtr ctxt,
100 const char *chunk,
101 int size,
102 int terminate);
Daniel Veillardf600e251999-12-18 15:32:46 +0000103#ifdef __cplusplus
104}
105#endif
106
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000107#endif /* __HTML_PARSER_H__ */