blob: c62b298c0d140ba8e4af7a62e538460d499423f1 [file] [log] [blame]
Daniel Veillard1e346af1999-02-22 10:33:01 +00001/*
2 * parserInternals.h : internals routines exported by the parser.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
Daniel Veillardce6e98d2000-11-25 09:54:49 +00007 *
8 * 14 Nov 2000 ht - truncated declaration of xmlParseElementChildrenContentDecl
9 * for VMS
10 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011 */
12
13#ifndef __XML_PARSER_INTERNALS_H__
14#define __XML_PARSER_INTERNALS_H__
15
Daniel Veillard361d8452000-04-03 19:48:13 +000016#include <libxml/parser.h>
Daniel Veillard1e346af1999-02-22 10:33:01 +000017
18#ifdef __cplusplus
19extern "C" {
20#endif
21
Daniel Veillardb1059e22000-09-16 14:02:43 +000022 /*
23 * Identifiers can be longer, but this will be more costly
24 * at runtime.
25 */
Daniel Veillarde0854c32000-08-27 21:12:29 +000026#define XML_MAX_NAMELEN 100
Daniel Veillardb96e6431999-08-29 21:02:19 +000027
Daniel Veillardb1059e22000-09-16 14:02:43 +000028/*
29 * The parser tries to always have that amount of input ready
30 * one of the point is providing context when reporting errors
31 */
32#define INPUT_CHUNK 250
33
Daniel Veillard64068b31999-03-24 20:42:16 +000034/************************************************************************
35 * *
36 * UNICODE version of the macros. *
37 * *
38 ************************************************************************/
39/*
40 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
41 * | [#x10000-#x10FFFF]
42 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
43 */
44#define IS_CHAR(c) \
Daniel Veillard496a1cf2000-05-03 14:20:55 +000045 (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
46 (((c) >= 0x20) && ((c) <= 0xD7FF)) || \
47 (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
48 (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
Daniel Veillard64068b31999-03-24 20:42:16 +000049
50/*
51 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
52 */
Daniel Veillardfc708e22000-04-08 13:17:27 +000053#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
Daniel Veillard64068b31999-03-24 20:42:16 +000054 ((c) == 0x0D))
55
56/*
57 * [85] BaseChar ::= ... long list see REC ...
Daniel Veillard64068b31999-03-24 20:42:16 +000058 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000059#define IS_BASECHAR(c) xmlIsBaseChar(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000060
61/*
62 * [88] Digit ::= ... long list see REC ...
63 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000064#define IS_DIGIT(c) xmlIsDigit(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000065
66/*
67 * [87] CombiningChar ::= ... long list see REC ...
68 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000069#define IS_COMBINING(c) xmlIsCombining(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000070
71/*
72 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
73 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
74 * [#x309D-#x309E] | [#x30FC-#x30FE]
75 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000076#define IS_EXTENDER(c) xmlIsExtender(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000077
78/*
79 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
80 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000081#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000082
83/*
84 * [84] Letter ::= BaseChar | Ideographic
85 */
86#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
87
Daniel Veillard64068b31999-03-24 20:42:16 +000088
89/*
90 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
91 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000092#define IS_PUBIDCHAR(c) xmlIsPubidChar(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000093
94#define SKIP_EOL(p) \
95 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
96 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
97
98#define MOVETO_ENDTAG(p) \
Daniel Veillardcf461992000-03-14 18:30:20 +000099 while ((*p) && (*(p) != '>')) (p)++
Daniel Veillard64068b31999-03-24 20:42:16 +0000100
101#define MOVETO_STARTTAG(p) \
Daniel Veillardcf461992000-03-14 18:30:20 +0000102 while ((*p) && (*(p) != '<')) (p)++
Daniel Veillard011b63c1999-06-02 17:44:04 +0000103
Daniel Veillardb1059e22000-09-16 14:02:43 +0000104/**
105 * Global vaiables affecting the default parser behaviour.
106 */
107
Daniel Veillardc2def842000-11-07 14:21:01 +0000108LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
109LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
110LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
111LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
112LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue;
Daniel Veillard0f2a53c2001-02-05 17:57:33 +0000113LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue;
Daniel Veillardc2def842000-11-07 14:21:01 +0000114LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue;
115LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue;
Daniel Veillardf6eea272001-01-18 12:17:12 +0000116LIBXML_DLL_IMPORT extern xmlChar xmlStringText[];
117LIBXML_DLL_IMPORT extern xmlChar xmlStringTextNoenc[];
118LIBXML_DLL_IMPORT extern xmlChar xmlStringComment[];
Daniel Veillardb1059e22000-09-16 14:02:43 +0000119
Daniel Veillarddd477ce2000-09-10 13:23:08 +0000120/*
121 * Function to finish teh work of the macros where needed
122 */
123int xmlIsBaseChar (int c);
124int xmlIsBlank (int c);
125int xmlIsPubidChar (int c);
126int xmlIsLetter (int c);
127int xmlIsDigit (int c);
128int xmlIsIdeographic(int c);
129int xmlIsCombining (int c);
130int xmlIsExtender (int c);
131int xmlIsCombining (int c);
132int xmlIsChar (int c);
133
Daniel Veillard011b63c1999-06-02 17:44:04 +0000134/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000135 * Parser context
136 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000137xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000138xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
139xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
140 int size);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000141xmlParserCtxtPtr xmlNewParserCtxt (void);
Daniel Veillardcf461992000-03-14 18:30:20 +0000142xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
143 const xmlChar *ID,
144 const xmlChar *base);
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000145int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000146 xmlCharEncoding enc);
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000147int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
148 xmlCharEncodingHandlerPtr handler);
Daniel Veillardcf461992000-03-14 18:30:20 +0000149void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000150
Daniel Veillardbc50b591999-03-01 12:28:53 +0000151/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000152 * Entities
153 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000154void xmlHandleEntity (xmlParserCtxtPtr ctxt,
155 xmlEntityPtr entity);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000156
Daniel Veillardbc50b591999-03-01 12:28:53 +0000157/**
158 * Input Streams
159 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000160xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
161 xmlEntityPtr entity);
162void xmlPushInput (xmlParserCtxtPtr ctxt,
163 xmlParserInputPtr input);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000164xmlChar xmlPopInput (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000165void xmlFreeInputStream (xmlParserInputPtr input);
166xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
167 const char *filename);
Daniel Veillardb1059e22000-09-16 14:02:43 +0000168xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt);
Daniel Veillardbc50b591999-03-01 12:28:53 +0000169
170/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000171 * Namespaces.
172 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000173xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
174 const xmlChar *name,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000175 xmlChar **prefix);
Daniel Veillardcf461992000-03-14 18:30:20 +0000176xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
177xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000178 xmlChar **prefix);
Daniel Veillardcf461992000-03-14 18:30:20 +0000179xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
180xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000181void xmlParseNamespace (xmlParserCtxtPtr ctxt);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000182
Daniel Veillardbc50b591999-03-01 12:28:53 +0000183/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000184 * Generic production rules
185 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000186xmlChar * xmlScanName (xmlParserCtxtPtr ctxt);
187xmlChar * xmlParseName (xmlParserCtxtPtr ctxt);
188xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt);
189xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000190 xmlChar **orig);
Daniel Veillardcf461992000-03-14 18:30:20 +0000191xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt);
192xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
193xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000194void xmlParseCharData (xmlParserCtxtPtr ctxt,
195 int cdata);
Daniel Veillardcf461992000-03-14 18:30:20 +0000196xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000197 xmlChar **publicID,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000198 int strict);
199void xmlParseComment (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000200xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000201void xmlParsePI (xmlParserCtxtPtr ctxt);
202void xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
203void xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
204int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000205 xmlChar **value);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000206xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt);
207xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
208int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
209 xmlEnumerationPtr *tree);
210int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
211 xmlEnumerationPtr *tree);
212void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
213xmlElementContentPtr xmlParseElementMixedContentDecl
214 (xmlParserCtxtPtr ctxt);
Daniel Veillardce6e98d2000-11-25 09:54:49 +0000215#ifdef VMS
216xmlElementContentPtr xmlParseElementChildrenContentD
217 (xmlParserCtxtPtr ctxt);
218#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD
219#else
Daniel Veillardb96e6431999-08-29 21:02:19 +0000220xmlElementContentPtr xmlParseElementChildrenContentDecl
221 (xmlParserCtxtPtr ctxt);
Daniel Veillardce6e98d2000-11-25 09:54:49 +0000222#endif
Daniel Veillardb96e6431999-08-29 21:02:19 +0000223int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000224 xmlChar *name,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000225 xmlElementContentPtr *result);
226int xmlParseElementDecl (xmlParserCtxtPtr ctxt);
227void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
228int xmlParseCharRef (xmlParserCtxtPtr ctxt);
229xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt);
230void xmlParseReference (xmlParserCtxtPtr ctxt);
231void xmlParsePEReference (xmlParserCtxtPtr ctxt);
232void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000233xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000234 xmlChar **value);
Daniel Veillardcf461992000-03-14 18:30:20 +0000235xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000236void xmlParseEndTag (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000237void xmlParseCDSect (xmlParserCtxtPtr ctxt);
238void xmlParseContent (xmlParserCtxtPtr ctxt);
239void xmlParseElement (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000240xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
241xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
242xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
243xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000244int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
245void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000246void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000247void xmlParseMisc (xmlParserCtxtPtr ctxt);
248void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000249 const xmlChar *ExternalID,
250 const xmlChar *SystemID);
Daniel Veillard517752b1999-04-05 12:20:10 +0000251/*
Daniel Veillard011b63c1999-06-02 17:44:04 +0000252 * Entities substitution
253 */
254#define XML_SUBSTITUTE_NONE 0
255#define XML_SUBSTITUTE_REF 1
256#define XML_SUBSTITUTE_PEREF 2
257#define XML_SUBSTITUTE_BOTH 3
258
Daniel Veillardcf461992000-03-14 18:30:20 +0000259xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000260 int len,
261 int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000262 xmlChar end,
263 xmlChar end2,
264 xmlChar end3);
Daniel Veillardcf461992000-03-14 18:30:20 +0000265xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
266 const xmlChar *str,
267 int what,
268 xmlChar end,
269 xmlChar end2,
270 xmlChar end3);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000271
272/*
Daniel Veillard517752b1999-04-05 12:20:10 +0000273 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
274 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000275int nodePush (xmlParserCtxtPtr ctxt,
276 xmlNodePtr value);
277xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);
278int inputPush (xmlParserCtxtPtr ctxt,
279 xmlParserInputPtr value);
280xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000281
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000282/*
Daniel Veillardb1059e22000-09-16 14:02:43 +0000283 * other comodities shared between parser.c and parserInternals
284 */
285int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
286int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
287 const xmlChar *cur,
288 int *len);
289void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
290void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
291xmlChar *namePop (xmlParserCtxtPtr ctxt);
292int xmlCheckLanguageID (const xmlChar *lang);
293
294/*
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000295 * Really core function shared with HTML parser
296 */
297int xmlCurrentChar (xmlParserCtxtPtr ctxt,
298 int *len);
299int xmlCopyChar (int len,
300 xmlChar *out,
301 int val);
302void xmlNextChar (xmlParserCtxtPtr ctxt);
303void xmlParserInputShrink (xmlParserInputPtr in);
Daniel Veillardbc765302000-10-01 18:23:35 +0000304
305#ifdef LIBXML_HTML_ENABLED
306/*
307 * Actually comes from the HTML parser but launched from the init stuff
308 */
309void htmlInitAutoClose (void);
310#endif
Daniel Veillardc08a2c61999-09-08 21:35:25 +0000311#ifdef __cplusplus
312}
313#endif
Daniel Veillard1e346af1999-02-22 10:33:01 +0000314#endif /* __XML_PARSER_INTERNALS_H__ */