blob: 6588bd6cfebfc8217d820ffc15426b8ac08bbef1 [file] [log] [blame]
Daniel Veillard1e346af1999-02-22 10:33:01 +00001/*
2 * parserInternals.h : internals routines exported by the parser.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
Daniel Veillardce6e98d2000-11-25 09:54:49 +00007 *
8 * 14 Nov 2000 ht - truncated declaration of xmlParseElementChildrenContentDecl
9 * for VMS
10 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011 */
12
13#ifndef __XML_PARSER_INTERNALS_H__
14#define __XML_PARSER_INTERNALS_H__
15
Daniel Veillard361d8452000-04-03 19:48:13 +000016#include <libxml/parser.h>
Daniel Veillard1e346af1999-02-22 10:33:01 +000017
18#ifdef __cplusplus
19extern "C" {
20#endif
21
Daniel Veillardb1059e22000-09-16 14:02:43 +000022 /*
23 * Identifiers can be longer, but this will be more costly
24 * at runtime.
25 */
Daniel Veillarde0854c32000-08-27 21:12:29 +000026#define XML_MAX_NAMELEN 100
Daniel Veillardb96e6431999-08-29 21:02:19 +000027
Daniel Veillardb1059e22000-09-16 14:02:43 +000028/*
29 * The parser tries to always have that amount of input ready
30 * one of the point is providing context when reporting errors
31 */
32#define INPUT_CHUNK 250
33
Daniel Veillard64068b31999-03-24 20:42:16 +000034/************************************************************************
35 * *
36 * UNICODE version of the macros. *
37 * *
38 ************************************************************************/
39/*
40 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
41 * | [#x10000-#x10FFFF]
42 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
43 */
44#define IS_CHAR(c) \
Daniel Veillard496a1cf2000-05-03 14:20:55 +000045 (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
46 (((c) >= 0x20) && ((c) <= 0xD7FF)) || \
47 (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
48 (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
Daniel Veillard64068b31999-03-24 20:42:16 +000049
50/*
51 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
52 */
Daniel Veillardfc708e22000-04-08 13:17:27 +000053#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
Daniel Veillard64068b31999-03-24 20:42:16 +000054 ((c) == 0x0D))
55
56/*
57 * [85] BaseChar ::= ... long list see REC ...
Daniel Veillard64068b31999-03-24 20:42:16 +000058 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000059#define IS_BASECHAR(c) xmlIsBaseChar(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000060
61/*
62 * [88] Digit ::= ... long list see REC ...
63 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000064#define IS_DIGIT(c) xmlIsDigit(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000065
66/*
67 * [87] CombiningChar ::= ... long list see REC ...
68 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000069#define IS_COMBINING(c) xmlIsCombining(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000070
71/*
72 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
73 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
74 * [#x309D-#x309E] | [#x30FC-#x30FE]
75 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000076#define IS_EXTENDER(c) xmlIsExtender(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000077
78/*
79 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
80 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000081#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000082
83/*
84 * [84] Letter ::= BaseChar | Ideographic
85 */
86#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
87
Daniel Veillard64068b31999-03-24 20:42:16 +000088
89/*
90 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
91 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000092#define IS_PUBIDCHAR(c) xmlIsPubidChar(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000093
94#define SKIP_EOL(p) \
95 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
96 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
97
98#define MOVETO_ENDTAG(p) \
Daniel Veillardcf461992000-03-14 18:30:20 +000099 while ((*p) && (*(p) != '>')) (p)++
Daniel Veillard64068b31999-03-24 20:42:16 +0000100
101#define MOVETO_STARTTAG(p) \
Daniel Veillardcf461992000-03-14 18:30:20 +0000102 while ((*p) && (*(p) != '<')) (p)++
Daniel Veillard011b63c1999-06-02 17:44:04 +0000103
Daniel Veillardb1059e22000-09-16 14:02:43 +0000104/**
105 * Global vaiables affecting the default parser behaviour.
106 */
107
Daniel Veillardc2def842000-11-07 14:21:01 +0000108LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
109LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
110LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
111LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
112LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue;
113LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue;
114LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue;
Daniel Veillardb1059e22000-09-16 14:02:43 +0000115
Daniel Veillarddd477ce2000-09-10 13:23:08 +0000116/*
117 * Function to finish teh work of the macros where needed
118 */
119int xmlIsBaseChar (int c);
120int xmlIsBlank (int c);
121int xmlIsPubidChar (int c);
122int xmlIsLetter (int c);
123int xmlIsDigit (int c);
124int xmlIsIdeographic(int c);
125int xmlIsCombining (int c);
126int xmlIsExtender (int c);
127int xmlIsCombining (int c);
128int xmlIsChar (int c);
129
Daniel Veillard011b63c1999-06-02 17:44:04 +0000130/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000131 * Parser context
132 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000133xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000134xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
135xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
136 int size);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000137xmlParserCtxtPtr xmlNewParserCtxt (void);
Daniel Veillardcf461992000-03-14 18:30:20 +0000138xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
139 const xmlChar *ID,
140 const xmlChar *base);
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000141int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000142 xmlCharEncoding enc);
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000143int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
144 xmlCharEncodingHandlerPtr handler);
Daniel Veillardcf461992000-03-14 18:30:20 +0000145void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000146
Daniel Veillardbc50b591999-03-01 12:28:53 +0000147/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000148 * Entities
149 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000150void xmlHandleEntity (xmlParserCtxtPtr ctxt,
151 xmlEntityPtr entity);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000152
Daniel Veillardbc50b591999-03-01 12:28:53 +0000153/**
154 * Input Streams
155 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000156xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
157 xmlEntityPtr entity);
158void xmlPushInput (xmlParserCtxtPtr ctxt,
159 xmlParserInputPtr input);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000160xmlChar xmlPopInput (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000161void xmlFreeInputStream (xmlParserInputPtr input);
162xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
163 const char *filename);
Daniel Veillardb1059e22000-09-16 14:02:43 +0000164xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt);
Daniel Veillardbc50b591999-03-01 12:28:53 +0000165
166/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000167 * Namespaces.
168 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000169xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
170 const xmlChar *name,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000171 xmlChar **prefix);
Daniel Veillardcf461992000-03-14 18:30:20 +0000172xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
173xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000174 xmlChar **prefix);
Daniel Veillardcf461992000-03-14 18:30:20 +0000175xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
176xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000177void xmlParseNamespace (xmlParserCtxtPtr ctxt);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000178
Daniel Veillardbc50b591999-03-01 12:28:53 +0000179/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000180 * Generic production rules
181 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000182xmlChar * xmlScanName (xmlParserCtxtPtr ctxt);
183xmlChar * xmlParseName (xmlParserCtxtPtr ctxt);
184xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt);
185xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000186 xmlChar **orig);
Daniel Veillardcf461992000-03-14 18:30:20 +0000187xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt);
188xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
189xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000190void xmlParseCharData (xmlParserCtxtPtr ctxt,
191 int cdata);
Daniel Veillardcf461992000-03-14 18:30:20 +0000192xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000193 xmlChar **publicID,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000194 int strict);
195void xmlParseComment (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000196xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000197void xmlParsePI (xmlParserCtxtPtr ctxt);
198void xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
199void xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
200int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000201 xmlChar **value);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000202xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt);
203xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
204int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
205 xmlEnumerationPtr *tree);
206int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
207 xmlEnumerationPtr *tree);
208void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
209xmlElementContentPtr xmlParseElementMixedContentDecl
210 (xmlParserCtxtPtr ctxt);
Daniel Veillardce6e98d2000-11-25 09:54:49 +0000211#ifdef VMS
212xmlElementContentPtr xmlParseElementChildrenContentD
213 (xmlParserCtxtPtr ctxt);
214#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD
215#else
Daniel Veillardb96e6431999-08-29 21:02:19 +0000216xmlElementContentPtr xmlParseElementChildrenContentDecl
217 (xmlParserCtxtPtr ctxt);
Daniel Veillardce6e98d2000-11-25 09:54:49 +0000218#endif
Daniel Veillardb96e6431999-08-29 21:02:19 +0000219int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000220 xmlChar *name,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000221 xmlElementContentPtr *result);
222int xmlParseElementDecl (xmlParserCtxtPtr ctxt);
223void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
224int xmlParseCharRef (xmlParserCtxtPtr ctxt);
225xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt);
226void xmlParseReference (xmlParserCtxtPtr ctxt);
227void xmlParsePEReference (xmlParserCtxtPtr ctxt);
228void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000229xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000230 xmlChar **value);
Daniel Veillardcf461992000-03-14 18:30:20 +0000231xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000232void xmlParseEndTag (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000233void xmlParseCDSect (xmlParserCtxtPtr ctxt);
234void xmlParseContent (xmlParserCtxtPtr ctxt);
235void xmlParseElement (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000236xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
237xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
238xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
239xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000240int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
241void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000242void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000243void xmlParseMisc (xmlParserCtxtPtr ctxt);
244void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000245 const xmlChar *ExternalID,
246 const xmlChar *SystemID);
Daniel Veillard517752b1999-04-05 12:20:10 +0000247/*
Daniel Veillard011b63c1999-06-02 17:44:04 +0000248 * Entities substitution
249 */
250#define XML_SUBSTITUTE_NONE 0
251#define XML_SUBSTITUTE_REF 1
252#define XML_SUBSTITUTE_PEREF 2
253#define XML_SUBSTITUTE_BOTH 3
254
Daniel Veillardcf461992000-03-14 18:30:20 +0000255xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000256 int len,
257 int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000258 xmlChar end,
259 xmlChar end2,
260 xmlChar end3);
Daniel Veillardcf461992000-03-14 18:30:20 +0000261xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
262 const xmlChar *str,
263 int what,
264 xmlChar end,
265 xmlChar end2,
266 xmlChar end3);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000267
268/*
Daniel Veillard517752b1999-04-05 12:20:10 +0000269 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
270 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000271int nodePush (xmlParserCtxtPtr ctxt,
272 xmlNodePtr value);
273xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);
274int inputPush (xmlParserCtxtPtr ctxt,
275 xmlParserInputPtr value);
276xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000277
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000278/*
Daniel Veillardb1059e22000-09-16 14:02:43 +0000279 * other comodities shared between parser.c and parserInternals
280 */
281int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
282int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
283 const xmlChar *cur,
284 int *len);
285void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
286void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
287xmlChar *namePop (xmlParserCtxtPtr ctxt);
288int xmlCheckLanguageID (const xmlChar *lang);
289
290/*
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000291 * Really core function shared with HTML parser
292 */
293int xmlCurrentChar (xmlParserCtxtPtr ctxt,
294 int *len);
295int xmlCopyChar (int len,
296 xmlChar *out,
297 int val);
298void xmlNextChar (xmlParserCtxtPtr ctxt);
299void xmlParserInputShrink (xmlParserInputPtr in);
Daniel Veillardbc765302000-10-01 18:23:35 +0000300
301#ifdef LIBXML_HTML_ENABLED
302/*
303 * Actually comes from the HTML parser but launched from the init stuff
304 */
305void htmlInitAutoClose (void);
306#endif
Daniel Veillardc08a2c61999-09-08 21:35:25 +0000307#ifdef __cplusplus
308}
309#endif
Daniel Veillard1e346af1999-02-22 10:33:01 +0000310#endif /* __XML_PARSER_INTERNALS_H__ */