blob: b70c0a120391d6ae1c9ca244fd2d72f7257a508a [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __HTML_PARSER_H__
10#define __HTML_PARSER_H__
Daniel Veillard361d8452000-04-03 19:48:13 +000011#include <libxml/parser.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000012
Daniel Veillardf600e251999-12-18 15:32:46 +000013#ifdef __cplusplus
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000014extern "C" {
Daniel Veillardf600e251999-12-18 15:32:46 +000015#endif
16
Daniel Veillard5233ffc1999-07-06 22:25:25 +000017/*
18 * Most of the back-end structures from XML and HTML are shared
19 */
Daniel Veillardbe70ff71999-07-05 16:50:46 +000020typedef xmlParserCtxt htmlParserCtxt;
21typedef xmlParserCtxtPtr htmlParserCtxtPtr;
22typedef xmlParserNodeInfo htmlParserNodeInfo;
23typedef xmlSAXHandler htmlSAXHandler;
24typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
25typedef xmlParserInput htmlParserInput;
26typedef xmlParserInputPtr htmlParserInputPtr;
27typedef xmlDocPtr htmlDocPtr;
28typedef xmlNodePtr htmlNodePtr;
29
Daniel Veillard5233ffc1999-07-06 22:25:25 +000030/*
31 * Internal description of an HTML element
32 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000033typedef struct _htmlElemDesc htmlElemDesc;
34typedef htmlElemDesc *htmlElemDescPtr;
35struct _htmlElemDesc {
Daniel Veillardb96e6431999-08-29 21:02:19 +000036 const char *name; /* The tag name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000037 int startTag; /* Whether the start tag can be implied */
38 int endTag; /* Whether the end tag can be implied */
39 int empty; /* Is this an empty element ? */
40 int depr; /* Is this a deprecated element ? */
41 int dtd; /* 1: only in Loose DTD, 2: only Frameset one */
42 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000043};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000044
45/*
46 * Internal description of an HTML entity
47 */
Daniel Veillard71b656e2000-01-05 14:46:17 +000048typedef struct _htmlEntityDesc htmlEntityDesc;
49typedef htmlEntityDesc *htmlEntityDescPtr;
50struct _htmlEntityDesc {
Daniel Veillard5233ffc1999-07-06 22:25:25 +000051 int value; /* the UNICODE value for the character */
Daniel Veillardb96e6431999-08-29 21:02:19 +000052 const char *name; /* The entity name */
Daniel Veillard5233ffc1999-07-06 22:25:25 +000053 const char *desc; /* the description */
Daniel Veillard71b656e2000-01-05 14:46:17 +000054};
Daniel Veillard5233ffc1999-07-06 22:25:25 +000055
56/*
57 * There is only few public functions.
58 */
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000059htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
60htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
Daniel Veillard47f3f312000-08-27 22:40:15 +000061htmlEntityDescPtr htmlEntityValueLookup(int value);
Daniel Veillard82150d81999-07-07 07:32:15 +000062
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000063int htmlIsAutoClosed(htmlDocPtr doc,
64 htmlNodePtr elem);
65int htmlAutoCloseTag(htmlDocPtr doc,
66 const xmlChar *name,
67 htmlNodePtr elem);
68htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
69 xmlChar **str);
70int htmlParseCharRef(htmlParserCtxtPtr ctxt);
71void htmlParseElement(htmlParserCtxtPtr ctxt);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000072
Daniel Veillard5cb5ab81999-12-21 15:35:29 +000073htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
74 const char *encoding,
75 htmlSAXHandlerPtr sax,
76 void *userData);
77htmlDocPtr htmlParseDoc (xmlChar *cur,
78 const char *encoding);
79htmlDocPtr htmlSAXParseFile(const char *filename,
80 const char *encoding,
81 htmlSAXHandlerPtr sax,
82 void *userData);
83htmlDocPtr htmlParseFile (const char *filename,
84 const char *encoding);
Daniel Veillard32bc74e2000-07-14 14:49:25 +000085int UTF8ToHtml (unsigned char* out,
86 int *outlen,
87 const unsigned char* in,
88 int *inlen);
Daniel Veillarde010c172000-08-28 10:04:51 +000089int htmlEncodeEntities(unsigned char* out,
90 int *outlen,
91 const unsigned char* in,
92 int *inlen, int quoteChar);
Daniel Veillard47e12f22000-10-15 14:24:25 +000093int htmlIsScriptAttribute(const xmlChar *name);
Daniel Veillarda6d8eb62000-12-27 10:46:47 +000094int htmlHandleOmittedElem(int val);
Daniel Veillardbe70ff71999-07-05 16:50:46 +000095
Daniel Veillard5e5c6231999-12-29 12:49:06 +000096/**
97 * Interfaces for the Push mode
98 */
99void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
100htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
101 void *user_data,
102 const char *chunk,
103 int size,
104 const char *filename,
105 xmlCharEncoding enc);
106int htmlParseChunk (htmlParserCtxtPtr ctxt,
107 const char *chunk,
108 int size,
109 int terminate);
Daniel Veillardf600e251999-12-18 15:32:46 +0000110#ifdef __cplusplus
111}
112#endif
113
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000114#endif /* __HTML_PARSER_H__ */