blob: e7949189beebbd062cc5a586373b6efcd817024b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * HTMLparser.h : inf=terface for an HTML 4.0 non-verifying parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __HTML_PARSER_H__
10#define __HTML_PARSER_H__
11#include <libxml/parser.h>
12
13#ifdef __cplusplus
14extern "C" {
15#endif
16
17/*
18 * Most of the back-end structures from XML and HTML are shared
19 */
20typedef xmlParserCtxt htmlParserCtxt;
21typedef xmlParserCtxtPtr htmlParserCtxtPtr;
22typedef xmlParserNodeInfo htmlParserNodeInfo;
23typedef xmlSAXHandler htmlSAXHandler;
24typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
25typedef xmlParserInput htmlParserInput;
26typedef xmlParserInputPtr htmlParserInputPtr;
27typedef xmlDocPtr htmlDocPtr;
28typedef xmlNodePtr htmlNodePtr;
29
30/*
31 * Internal description of an HTML element
32 */
33typedef struct _htmlElemDesc htmlElemDesc;
34typedef htmlElemDesc *htmlElemDescPtr;
35struct _htmlElemDesc {
36 const char *name; /* The tag name */
Daniel Veillard02bb1702001-06-13 21:11:59 +000037 char startTag; /* Whether the start tag can be implied */
38 char endTag; /* Whether the end tag can be implied */
39 char saveEndTag; /* Whether the end tag should be saved */
40 char empty; /* Is this an empty element ? */
41 char depr; /* Is this a deprecated element ? */
42 char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
43 char isinline; /* is this a block 0 or inline 1 element */
Owen Taylor3473f882001-02-23 17:55:21 +000044 const char *desc; /* the description */
45};
46
47/*
48 * Internal description of an HTML entity
49 */
50typedef struct _htmlEntityDesc htmlEntityDesc;
51typedef htmlEntityDesc *htmlEntityDescPtr;
52struct _htmlEntityDesc {
Daniel Veillard56a4cb82001-03-24 17:00:36 +000053 unsigned int value; /* the UNICODE value for the character */
Owen Taylor3473f882001-02-23 17:55:21 +000054 const char *name; /* The entity name */
55 const char *desc; /* the description */
56};
57
58/*
59 * There is only few public functions.
60 */
61htmlElemDescPtr htmlTagLookup (const xmlChar *tag);
62htmlEntityDescPtr htmlEntityLookup(const xmlChar *name);
Daniel Veillard56a4cb82001-03-24 17:00:36 +000063htmlEntityDescPtr htmlEntityValueLookup(unsigned int value);
Owen Taylor3473f882001-02-23 17:55:21 +000064
65int htmlIsAutoClosed(htmlDocPtr doc,
66 htmlNodePtr elem);
67int htmlAutoCloseTag(htmlDocPtr doc,
68 const xmlChar *name,
69 htmlNodePtr elem);
70htmlEntityDescPtr htmlParseEntityRef(htmlParserCtxtPtr ctxt,
71 xmlChar **str);
72int htmlParseCharRef(htmlParserCtxtPtr ctxt);
73void htmlParseElement(htmlParserCtxtPtr ctxt);
74
75htmlDocPtr htmlSAXParseDoc (xmlChar *cur,
76 const char *encoding,
77 htmlSAXHandlerPtr sax,
78 void *userData);
79htmlDocPtr htmlParseDoc (xmlChar *cur,
80 const char *encoding);
81htmlDocPtr htmlSAXParseFile(const char *filename,
82 const char *encoding,
83 htmlSAXHandlerPtr sax,
84 void *userData);
85htmlDocPtr htmlParseFile (const char *filename,
86 const char *encoding);
87int UTF8ToHtml (unsigned char* out,
88 int *outlen,
89 const unsigned char* in,
90 int *inlen);
91int htmlEncodeEntities(unsigned char* out,
92 int *outlen,
93 const unsigned char* in,
94 int *inlen, int quoteChar);
95int htmlIsScriptAttribute(const xmlChar *name);
96int htmlHandleOmittedElem(int val);
97
98/**
99 * Interfaces for the Push mode
100 */
101void htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
102htmlParserCtxtPtr htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
103 void *user_data,
104 const char *chunk,
105 int size,
106 const char *filename,
107 xmlCharEncoding enc);
108int htmlParseChunk (htmlParserCtxtPtr ctxt,
109 const char *chunk,
110 int size,
111 int terminate);
112#ifdef __cplusplus
113}
114#endif
115
116#endif /* __HTML_PARSER_H__ */