blob: 22fe6147bd022d806add0cd60e6c5d03c71387c4 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __HTML_PARSER_H__
10#define __HTML_PARSER_H__
11#include "parser.h"
12
Daniel Veillardf600e251999-12-18 15:32:46 +000013#ifdef __cplusplus
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000014extern "C" {
Daniel Veillardf600e251999-12-18 15:32:46 +000015#endif
16
Daniel Veillard5233ffc1999-07-06 22:25:25 +000017/*
18 * Most of the back-end structures from XML and HTML are shared
19 */
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020typedef xmlParserCtxt htmlParserCtxt;
21typedef xmlParserCtxtPtr htmlParserCtxtPtr;
22typedef xmlParserNodeInfo htmlParserNodeInfo;
23typedef xmlSAXHandler htmlSAXHandler;
24typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
25typedef xmlParserInput htmlParserInput;
26typedef xmlParserInputPtr htmlParserInputPtr;
27typedef xmlDocPtr htmlDocPtr;
28typedef xmlNodePtr htmlNodePtr;
29
Daniel Veillard5233ffc1999-07-06 22:25:25 +000030/*
31 * Internal description of an HTML element
32 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000033typedef struct _htmlElemDesc htmlElemDesc;
34typedef htmlElemDesc *htmlElemDescPtr;
35struct _htmlElemDesc {
Daniel Veillardb96e6431999-08-29 21:02:19 +000036 const char *name; /* The tag name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000037 int startTag; /* Whether the start tag can be implied */
38 int endTag; /* Whether the end tag can be implied */
39 int empty; /* Is this an empty element ? */
40 int depr; /* Is this a deprecated element ? */
41 int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
42 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000043};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000044
45/*
46 * Internal description of an HTML entity
47 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000048typedef struct _htmlEntityDesc htmlEntityDesc;
49typedef htmlEntityDesc *htmlEntityDescPtr;
50struct _htmlEntityDesc {
Daniel Veillard5233ffc1999-07-06 22:25:25 +000051 int value; /* the UNICODE value for the character */
Daniel Veillardb96e6431999-08-29 21:02:19 +000052 const char *name; /* The entity name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000053 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000054};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000055
56/*
57 * There is only few public functions.
58 */
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000059htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
60htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
Daniel Veillard82150d81999-07-07 07:32:15 +000061
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000062int htmlIsAutoClosed(htmlDocPtr doc,
63 htmlNodePtr elem);
64int htmlAutoCloseTag(htmlDocPtr doc,
65 const xmlChar *name,
66 htmlNodePtr elem);
67htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
68 xmlChar **str);
69int htmlParseCharRef(htmlParserCtxtPtr ctxt);
70void htmlParseElement(htmlParserCtxtPtr ctxt);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000071
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000072htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
73 const char *encoding,
74 htmlSAXHandlerPtr sax,
75 void *userData);
76htmlDocPtr htmlParseDoc (xmlChar *cur,
77 const char *encoding);
78htmlDocPtr htmlSAXParseFile(const char *filename,
79 const char *encoding,
80 htmlSAXHandlerPtr sax,
81 void *userData);
82htmlDocPtr htmlParseFile (const char *filename,
83 const char *encoding);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000084
Daniel Veillard5e5c6231999-12-29 12:49:06 +000085/**
86 * Interfaces for the Push mode
87 */
88void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
89htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
90 void *user_data,
91 const char *chunk,
92 int size,
93 const char *filename,
94 xmlCharEncoding enc);
95int htmlParseChunk (htmlParserCtxtPtr ctxt,
96 const char *chunk,
97 int size,
98 int terminate);
Daniel Veillardf600e251999-12-18 15:32:46 +000099#ifdef __cplusplus
100}
101#endif
102
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000103#endif /* __HTML_PARSER_H__ */