blob: c79ad09c1f839d295637116a2c418ea4e079c9f2 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __HTML_PARSER_H__
10#define __HTML_PARSER_H__
Daniel Veillard361d8452000-04-03 19:48:13 +000011#include <libxml/parser.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000012
Daniel Veillardf600e251999-12-18 15:32:46 +000013#ifdef __cplusplus
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000014extern "C" {
Daniel Veillardf600e251999-12-18 15:32:46 +000015#endif
16
Daniel Veillard5233ffc1999-07-06 22:25:25 +000017/*
18 * Most of the back-end structures from XML and HTML are shared
19 */
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020typedef xmlParserCtxt htmlParserCtxt;
21typedef xmlParserCtxtPtr htmlParserCtxtPtr;
22typedef xmlParserNodeInfo htmlParserNodeInfo;
23typedef xmlSAXHandler htmlSAXHandler;
24typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
25typedef xmlParserInput htmlParserInput;
26typedef xmlParserInputPtr htmlParserInputPtr;
27typedef xmlDocPtr htmlDocPtr;
28typedef xmlNodePtr htmlNodePtr;
29
Daniel Veillard5233ffc1999-07-06 22:25:25 +000030/*
31 * Internal description of an HTML element
32 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000033typedef struct _htmlElemDesc htmlElemDesc;
34typedef htmlElemDesc *htmlElemDescPtr;
35struct _htmlElemDesc {
Daniel Veillardb96e6431999-08-29 21:02:19 +000036 const char *name; /* The tag name */
Daniel Veillardf41fbbf2001-02-13 17:05:35 +000037 char startTag; /* Whether the start tag can be implied */
38 char endTag; /* Whether the end tag can be implied */
39 char saveEndTag; /* Whether the end tag should be saved */
40 char empty; /* Is this an empty element ? */
41 char depr; /* Is this a deprecated element ? */
42 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000043 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000044};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000045
46/*
47 * Internal description of an HTML entity
48 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000049typedef struct _htmlEntityDesc htmlEntityDesc;
50typedef htmlEntityDesc *htmlEntityDescPtr;
51struct _htmlEntityDesc {
Daniel Veillard5233ffc1999-07-06 22:25:25 +000052 int value; /* the UNICODE value for the character */
Daniel Veillardb96e6431999-08-29 21:02:19 +000053 const char *name; /* The entity name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000054 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000055};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000056
57/*
58 * There is only few public functions.
59 */
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000060htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
61htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
Daniel Veillard47f3f312000-08-27 22:40:15 +000062htmlEntityDescPtr htmlEntityValueLookup(int value);
Daniel Veillard82150d81999-07-07 07:32:15 +000063
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000064int htmlIsAutoClosed(htmlDocPtr doc,
65 htmlNodePtr elem);
66int htmlAutoCloseTag(htmlDocPtr doc,
67 const xmlChar *name,
68 htmlNodePtr elem);
69htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
70 xmlChar **str);
71int htmlParseCharRef(htmlParserCtxtPtr ctxt);
72void htmlParseElement(htmlParserCtxtPtr ctxt);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000073
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000074htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
75 const char *encoding,
76 htmlSAXHandlerPtr sax,
77 void *userData);
78htmlDocPtr htmlParseDoc (xmlChar *cur,
79 const char *encoding);
80htmlDocPtr htmlSAXParseFile(const char *filename,
81 const char *encoding,
82 htmlSAXHandlerPtr sax,
83 void *userData);
84htmlDocPtr htmlParseFile (const char *filename,
85 const char *encoding);
Daniel Veillard32bc74e2000-07-14 14:49:25 +000086int UTF8ToHtml (unsigned char* out,
87 int *outlen,
88 const unsigned char* in,
89 int *inlen);
Daniel Veillarde010c172000-08-28 10:04:51 +000090int htmlEncodeEntities(unsigned char* out,
91 int *outlen,
92 const unsigned char* in,
93 int *inlen, int quoteChar);
Daniel Veillard47e12f22000-10-15 14:24:25 +000094int htmlIsScriptAttribute(const xmlChar *name);
Daniel Veillarda6d8eb62000-12-27 10:46:47 +000095int htmlHandleOmittedElem(int val);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000096
Daniel Veillard5e5c6231999-12-29 12:49:06 +000097/**
98 * Interfaces for the Push mode
99 */
100void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
101htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
102 void *user_data,
103 const char *chunk,
104 int size,
105 const char *filename,
106 xmlCharEncoding enc);
107int htmlParseChunk (htmlParserCtxtPtr ctxt,
108 const char *chunk,
109 int size,
110 int terminate);
Daniel Veillardf600e251999-12-18 15:32:46 +0000111#ifdef __cplusplus
112}
113#endif
114
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000115#endif /* __HTML_PARSER_H__ */