blob: b129175a08b305678aa48f7ca94fe23cba0732b1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __HTML_PARSER_H__
10#define __HTML_PARSER_H__
11#include <libxml/parser.h>
12
13#ifdef __cplusplus
14extern "C" {
15#endif
16
17/*
18 * Most of the back-end structures from XML and HTML are shared
19 */
20typedef xmlParserCtxt htmlParserCtxt;
21typedef xmlParserCtxtPtr htmlParserCtxtPtr;
22typedef xmlParserNodeInfo htmlParserNodeInfo;
23typedef xmlSAXHandler htmlSAXHandler;
24typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
25typedef xmlParserInput htmlParserInput;
26typedef xmlParserInputPtr htmlParserInputPtr;
27typedef xmlDocPtr htmlDocPtr;
28typedef xmlNodePtr htmlNodePtr;
29
30/*
31 * Internal description of an HTML element
32 */
33typedef struct _htmlElemDesc htmlElemDesc;
34typedef htmlElemDesc *htmlElemDescPtr;
35struct _htmlElemDesc {
36 const char *name; /* The tag name */
37 char startTag; /* Whether the start tag can be implied */
38 char endTag; /* Whether the end tag can be implied */
39 char saveEndTag; /* Whether the end tag should be saved */
40 char empty; /* Is this an empty element ? */
41 char depr; /* Is this a deprecated element ? */
42 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
43 const char *desc; /* the description */
44};
45
46/*
47 * Internal description of an HTML entity
48 */
49typedef struct _htmlEntityDesc htmlEntityDesc;
50typedef htmlEntityDesc *htmlEntityDescPtr;
51struct _htmlEntityDesc {
Daniel Veillard56a4cb82001-03-24 17:00:36 +000052 unsigned int value; /* the UNICODE value for the character */
Owen Taylor3473f882001-02-23 17:55:21 +000053 const char *name; /* The entity name */
54 const char *desc; /* the description */
55};
56
57/*
58 * There is only few public functions.
59 */
60htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
61htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
Daniel Veillard56a4cb82001-03-24 17:00:36 +000062htmlEntityDescPtr htmlEntityValueLookup(unsigned int value);
Owen Taylor3473f882001-02-23 17:55:21 +000063
64int htmlIsAutoClosed(htmlDocPtr doc,
65 htmlNodePtr elem);
66int htmlAutoCloseTag(htmlDocPtr doc,
67 const xmlChar *name,
68 htmlNodePtr elem);
69htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
70 xmlChar **str);
71int htmlParseCharRef(htmlParserCtxtPtr ctxt);
72void htmlParseElement(htmlParserCtxtPtr ctxt);
73
74htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
75 const char *encoding,
76 htmlSAXHandlerPtr sax,
77 void *userData);
78htmlDocPtr htmlParseDoc (xmlChar *cur,
79 const char *encoding);
80htmlDocPtr htmlSAXParseFile(const char *filename,
81 const char *encoding,
82 htmlSAXHandlerPtr sax,
83 void *userData);
84htmlDocPtr htmlParseFile (const char *filename,
85 const char *encoding);
86int UTF8ToHtml (unsigned char* out,
87 int *outlen,
88 const unsigned char* in,
89 int *inlen);
90int htmlEncodeEntities(unsigned char* out,
91 int *outlen,
92 const unsigned char* in,
93 int *inlen, int quoteChar);
94int htmlIsScriptAttribute(const xmlChar *name);
95int htmlHandleOmittedElem(int val);
96
97/**
98 * Interfaces for the Push mode
99 */
100void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
101htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
102 void *user_data,
103 const char *chunk,
104 int size,
105 const char *filename,
106 xmlCharEncoding enc);
107int htmlParseChunk (htmlParserCtxtPtr ctxt,
108 const char *chunk,
109 int size,
110 int terminate);
111#ifdef __cplusplus
112}
113#endif
114
115#endif /* __HTML_PARSER_H__ */