blob: f343cdbd0ed4074e0bd8f2863fb1b5ded25ae113 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parserInternals.h : internals routines exported by the parser.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 *
8 * 14 Nov 2000 ht - truncated declaration of xmlParseElementChildrenContentDecl
9 * for VMS
10 *
11 */
12
13#ifndef __XML_PARSER_INTERNALS_H__
14#define __XML_PARSER_INTERNALS_H__
15
16#include <libxml/parser.h>
Daniel Veillard56a4cb82001-03-24 17:00:36 +000017#include <libxml/HTMLparser.h>
Owen Taylor3473f882001-02-23 17:55:21 +000018
19#ifdef __cplusplus
20extern "C" {
21#endif
22
23 /*
24 * Identifiers can be longer, but this will be more costly
25 * at runtime.
26 */
27#define XML_MAX_NAMELEN 100
28
29/*
30 * The parser tries to always have that amount of input ready
31 * one of the point is providing context when reporting errors
32 */
33#define INPUT_CHUNK 250
34
35/************************************************************************
36 * *
37 * UNICODE version of the macros. *
38 * *
39 ************************************************************************/
40/*
41 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
42 * | [#x10000-#x10FFFF]
43 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
44 */
45#define IS_CHAR(c) \
46 ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
47 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
48 (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
49 (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
50
51/*
52 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
53 */
54#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
55 ((c) == 0x0D))
56
57/*
58 * [85] BaseChar ::= ... long list see REC ...
59 */
60#define IS_BASECHAR(c) xmlIsBaseChar(c)
61
62/*
63 * [88] Digit ::= ... long list see REC ...
64 */
65#define IS_DIGIT(c) xmlIsDigit(c)
66
67/*
68 * [87] CombiningChar ::= ... long list see REC ...
69 */
70#define IS_COMBINING(c) xmlIsCombining(c)
71
72/*
73 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
74 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
75 * [#x309D-#x309E] | [#x30FC-#x30FE]
76 */
77#define IS_EXTENDER(c) xmlIsExtender(c)
78
79/*
80 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
81 */
82#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c)
83
84/*
85 * [84] Letter ::= BaseChar | Ideographic
86 */
87#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
88
89
90/*
91 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
92 */
93#define IS_PUBIDCHAR(c) xmlIsPubidChar(c)
94
95#define SKIP_EOL(p) \
96 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
97 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
98
99#define MOVETO_ENDTAG(p) \
100 while ((*p) && (*(p) != '>')) (p)++
101
102#define MOVETO_STARTTAG(p) \
103 while ((*p) && (*(p) != '<')) (p)++
104
105/**
106 * Global vaiables affecting the default parser behaviour.
107 */
108
109LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
110LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
111LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
112LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
113LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue;
114LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue;
115LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue;
116LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue;
117LIBXML_DLL_IMPORT extern xmlChar xmlStringText[];
118LIBXML_DLL_IMPORT extern xmlChar xmlStringTextNoenc[];
119LIBXML_DLL_IMPORT extern xmlChar xmlStringComment[];
120
121/*
122 * Function to finish teh work of the macros where needed
123 */
124int xmlIsBaseChar (int c);
125int xmlIsBlank (int c);
126int xmlIsPubidChar (int c);
127int xmlIsLetter (int c);
128int xmlIsDigit (int c);
129int xmlIsIdeographic(int c);
130int xmlIsCombining (int c);
131int xmlIsExtender (int c);
132int xmlIsCombining (int c);
133int xmlIsChar (int c);
134
135/**
136 * Parser context
137 */
138xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
139xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
140xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
141 int size);
142xmlParserCtxtPtr xmlNewParserCtxt (void);
143xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
144 const xmlChar *ID,
145 const xmlChar *base);
146int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
147 xmlCharEncoding enc);
148int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
149 xmlCharEncodingHandlerPtr handler);
150void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
151
152/**
153 * Entities
154 */
155void xmlHandleEntity (xmlParserCtxtPtr ctxt,
156 xmlEntityPtr entity);
157
158/**
159 * Input Streams
160 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000161xmlParserInputPtr xmlNewStringInputStream (xmlParserCtxtPtr ctxt,
162 const xmlChar *buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000163xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
164 xmlEntityPtr entity);
165void xmlPushInput (xmlParserCtxtPtr ctxt,
166 xmlParserInputPtr input);
167xmlChar xmlPopInput (xmlParserCtxtPtr ctxt);
168void xmlFreeInputStream (xmlParserInputPtr input);
169xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
170 const char *filename);
171xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt);
172
173/**
174 * Namespaces.
175 */
176xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
177 const xmlChar *name,
178 xmlChar **prefix);
179xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
180xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
181 xmlChar **prefix);
182xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
183xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt);
184void xmlParseNamespace (xmlParserCtxtPtr ctxt);
185
186/**
187 * Generic production rules
188 */
189xmlChar * xmlScanName (xmlParserCtxtPtr ctxt);
190xmlChar * xmlParseName (xmlParserCtxtPtr ctxt);
191xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt);
192xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt,
193 xmlChar **orig);
194xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt);
195xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
196xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
197void xmlParseCharData (xmlParserCtxtPtr ctxt,
198 int cdata);
199xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt,
200 xmlChar **publicID,
201 int strict);
202void xmlParseComment (xmlParserCtxtPtr ctxt);
203xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt);
204void xmlParsePI (xmlParserCtxtPtr ctxt);
205void xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
206void xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
207int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
208 xmlChar **value);
209xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt);
210xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
211int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
212 xmlEnumerationPtr *tree);
213int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
214 xmlEnumerationPtr *tree);
215void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
216xmlElementContentPtr xmlParseElementMixedContentDecl
217 (xmlParserCtxtPtr ctxt);
218#ifdef VMS
219xmlElementContentPtr xmlParseElementChildrenContentD
220 (xmlParserCtxtPtr ctxt);
221#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD
222#else
223xmlElementContentPtr xmlParseElementChildrenContentDecl
224 (xmlParserCtxtPtr ctxt);
225#endif
226int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
227 xmlChar *name,
228 xmlElementContentPtr *result);
229int xmlParseElementDecl (xmlParserCtxtPtr ctxt);
230void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
231int xmlParseCharRef (xmlParserCtxtPtr ctxt);
232xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt);
233void xmlParseReference (xmlParserCtxtPtr ctxt);
234void xmlParsePEReference (xmlParserCtxtPtr ctxt);
235void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
236xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
237 xmlChar **value);
238xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
239void xmlParseEndTag (xmlParserCtxtPtr ctxt);
240void xmlParseCDSect (xmlParserCtxtPtr ctxt);
241void xmlParseContent (xmlParserCtxtPtr ctxt);
242void xmlParseElement (xmlParserCtxtPtr ctxt);
243xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
244xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
245xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
246xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
247int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
248void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
249void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
250void xmlParseMisc (xmlParserCtxtPtr ctxt);
251void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
252 const xmlChar *ExternalID,
253 const xmlChar *SystemID);
254/*
255 * Entities substitution
256 */
257#define XML_SUBSTITUTE_NONE 0
258#define XML_SUBSTITUTE_REF 1
259#define XML_SUBSTITUTE_PEREF 2
260#define XML_SUBSTITUTE_BOTH 3
261
262xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
263 int len,
264 int what,
265 xmlChar end,
266 xmlChar end2,
267 xmlChar end3);
268xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
269 const xmlChar *str,
270 int what,
271 xmlChar end,
272 xmlChar end2,
273 xmlChar end3);
274
275/*
276 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
277 */
278int nodePush (xmlParserCtxtPtr ctxt,
279 xmlNodePtr value);
280xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);
281int inputPush (xmlParserCtxtPtr ctxt,
282 xmlParserInputPtr value);
283xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000284xmlChar *namePop (xmlParserCtxtPtr ctxt);
285int namePush (xmlParserCtxtPtr ctxt,
286 xmlChar *value);
Owen Taylor3473f882001-02-23 17:55:21 +0000287
288/*
289 * other comodities shared between parser.c and parserInternals
290 */
291int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
292int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
293 const xmlChar *cur,
294 int *len);
295void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
296void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000297int xmlCheckLanguageID (const xmlChar *lang);
298
299/*
300 * Really core function shared with HTML parser
301 */
302int xmlCurrentChar (xmlParserCtxtPtr ctxt,
303 int *len);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000304int xmlCopyCharMultiByte (xmlChar *out,
305 int val);
Owen Taylor3473f882001-02-23 17:55:21 +0000306int xmlCopyChar (int len,
307 xmlChar *out,
308 int val);
309void xmlNextChar (xmlParserCtxtPtr ctxt);
310void xmlParserInputShrink (xmlParserInputPtr in);
311
312#ifdef LIBXML_HTML_ENABLED
313/*
314 * Actually comes from the HTML parser but launched from the init stuff
315 */
316void htmlInitAutoClose (void);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000317htmlParserCtxtPtr htmlCreateFileParserCtxt(const char *filename,
318 const char *encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000319#endif
320#ifdef __cplusplus
321}
322#endif
323#endif /* __XML_PARSER_INTERNALS_H__ */