blob: 7b85b8aa7bfe222819820957f84b58e2ee570b39 [file] [log] [blame]
Daniel Veillard1e346af1999-02-22 10:33:01 +00001/*
2 * parserInternals.h : internals routines exported by the parser.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifndef __XML_PARSER_INTERNALS_H__
10#define __XML_PARSER_INTERNALS_H__
11
Daniel Veillard361d8452000-04-03 19:48:13 +000012#include <libxml/parser.h>
Daniel Veillard1e346af1999-02-22 10:33:01 +000013
14#ifdef __cplusplus
15extern "C" {
16#endif
17
Daniel Veillardb1059e22000-09-16 14:02:43 +000018 /*
19 * Identifiers can be longer, but this will be more costly
20 * at runtime.
21 */
Daniel Veillarde0854c32000-08-27 21:12:29 +000022#define XML_MAX_NAMELEN 100
Daniel Veillardb96e6431999-08-29 21:02:19 +000023
Daniel Veillardb1059e22000-09-16 14:02:43 +000024/*
25 * The parser tries to always have that amount of input ready
26 * one of the point is providing context when reporting errors
27 */
28#define INPUT_CHUNK 250
29
Daniel Veillard64068b31999-03-24 20:42:16 +000030/************************************************************************
31 * *
32 * UNICODE version of the macros. *
33 * *
34 ************************************************************************/
35/*
36 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
37 * | [#x10000-#x10FFFF]
38 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
39 */
40#define IS_CHAR(c) \
Daniel Veillard496a1cf2000-05-03 14:20:55 +000041 (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
42 (((c) >= 0x20) && ((c) <= 0xD7FF)) || \
43 (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
44 (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
Daniel Veillard64068b31999-03-24 20:42:16 +000045
46/*
47 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
48 */
Daniel Veillardfc708e22000-04-08 13:17:27 +000049#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
Daniel Veillard64068b31999-03-24 20:42:16 +000050 ((c) == 0x0D))
51
52/*
53 * [85] BaseChar ::= ... long list see REC ...
Daniel Veillard64068b31999-03-24 20:42:16 +000054 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000055#define IS_BASECHAR(c) xmlIsBaseChar(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000056
57/*
58 * [88] Digit ::= ... long list see REC ...
59 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000060#define IS_DIGIT(c) xmlIsDigit(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000061
62/*
63 * [87] CombiningChar ::= ... long list see REC ...
64 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000065#define IS_COMBINING(c) xmlIsCombining(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000066
67/*
68 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
69 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
70 * [#x309D-#x309E] | [#x30FC-#x30FE]
71 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000072#define IS_EXTENDER(c) xmlIsExtender(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000073
74/*
75 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
76 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000077#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000078
79/*
80 * [84] Letter ::= BaseChar | Ideographic
81 */
82#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
83
Daniel Veillard64068b31999-03-24 20:42:16 +000084
85/*
86 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
87 */
Daniel Veillarddd477ce2000-09-10 13:23:08 +000088#define IS_PUBIDCHAR(c) xmlIsPubidChar(c)
Daniel Veillard64068b31999-03-24 20:42:16 +000089
90#define SKIP_EOL(p) \
91 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
92 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
93
94#define MOVETO_ENDTAG(p) \
Daniel Veillardcf461992000-03-14 18:30:20 +000095 while ((*p) && (*(p) != '>')) (p)++
Daniel Veillard64068b31999-03-24 20:42:16 +000096
97#define MOVETO_STARTTAG(p) \
Daniel Veillardcf461992000-03-14 18:30:20 +000098 while ((*p) && (*(p) != '<')) (p)++
Daniel Veillard011b63c1999-06-02 17:44:04 +000099
Daniel Veillardb1059e22000-09-16 14:02:43 +0000100/**
101 * Global vaiables affecting the default parser behaviour.
102 */
103
104extern int xmlParserDebugEntities;
105extern int xmlGetWarningsDefaultValue;
106extern int xmlParserDebugEntities;
107extern int xmlSubstituteEntitiesDefaultValue;
108extern int xmlDoValidityCheckingDefaultValue;
109extern int xmlPedanticParserDefaultValue;
110extern int xmlKeepBlanksDefaultValue;
111
Daniel Veillarddd477ce2000-09-10 13:23:08 +0000112/*
113 * Function to finish teh work of the macros where needed
114 */
115int xmlIsBaseChar (int c);
116int xmlIsBlank (int c);
117int xmlIsPubidChar (int c);
118int xmlIsLetter (int c);
119int xmlIsDigit (int c);
120int xmlIsIdeographic(int c);
121int xmlIsCombining (int c);
122int xmlIsExtender (int c);
123int xmlIsCombining (int c);
124int xmlIsChar (int c);
125
Daniel Veillard011b63c1999-06-02 17:44:04 +0000126/**
Daniel Veillardd692aa41999-02-28 21:54:31 +0000127 * Parser context
128 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000129xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000130xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
131xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
132 int size);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000133xmlParserCtxtPtr xmlNewParserCtxt (void);
Daniel Veillardcf461992000-03-14 18:30:20 +0000134xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
135 const xmlChar *ID,
136 const xmlChar *base);
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000137int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000138 xmlCharEncoding enc);
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000139int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
140 xmlCharEncodingHandlerPtr handler);
Daniel Veillardcf461992000-03-14 18:30:20 +0000141void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +0000142
Daniel Veillardbc50b591999-03-01 12:28:53 +0000143/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000144 * Entities
145 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000146void xmlHandleEntity (xmlParserCtxtPtr ctxt,
147 xmlEntityPtr entity);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000148
Daniel Veillardbc50b591999-03-01 12:28:53 +0000149/**
150 * Input Streams
151 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000152xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
153 xmlEntityPtr entity);
154void xmlPushInput (xmlParserCtxtPtr ctxt,
155 xmlParserInputPtr input);
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000156xmlChar xmlPopInput (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000157void xmlFreeInputStream (xmlParserInputPtr input);
158xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
159 const char *filename);
Daniel Veillardb1059e22000-09-16 14:02:43 +0000160xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt);
Daniel Veillardbc50b591999-03-01 12:28:53 +0000161
162/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000163 * Namespaces.
164 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000165xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
166 const xmlChar *name,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000167 xmlChar **prefix);
Daniel Veillardcf461992000-03-14 18:30:20 +0000168xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
169xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000170 xmlChar **prefix);
Daniel Veillardcf461992000-03-14 18:30:20 +0000171xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
172xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000173void xmlParseNamespace (xmlParserCtxtPtr ctxt);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000174
Daniel Veillardbc50b591999-03-01 12:28:53 +0000175/**
Daniel Veillard1e346af1999-02-22 10:33:01 +0000176 * Generic production rules
177 */
Daniel Veillardcf461992000-03-14 18:30:20 +0000178xmlChar * xmlScanName (xmlParserCtxtPtr ctxt);
179xmlChar * xmlParseName (xmlParserCtxtPtr ctxt);
180xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt);
181xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000182 xmlChar **orig);
Daniel Veillardcf461992000-03-14 18:30:20 +0000183xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt);
184xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
185xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000186void xmlParseCharData (xmlParserCtxtPtr ctxt,
187 int cdata);
Daniel Veillardcf461992000-03-14 18:30:20 +0000188xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000189 xmlChar **publicID,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000190 int strict);
191void xmlParseComment (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000192xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000193void xmlParsePI (xmlParserCtxtPtr ctxt);
194void xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
195void xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
196int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000197 xmlChar **value);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000198xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt);
199xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
200int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
201 xmlEnumerationPtr *tree);
202int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
203 xmlEnumerationPtr *tree);
204void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
205xmlElementContentPtr xmlParseElementMixedContentDecl
206 (xmlParserCtxtPtr ctxt);
207xmlElementContentPtr xmlParseElementChildrenContentDecl
208 (xmlParserCtxtPtr ctxt);
209int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000210 xmlChar *name,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000211 xmlElementContentPtr *result);
212int xmlParseElementDecl (xmlParserCtxtPtr ctxt);
213void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
214int xmlParseCharRef (xmlParserCtxtPtr ctxt);
215xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt);
216void xmlParseReference (xmlParserCtxtPtr ctxt);
217void xmlParsePEReference (xmlParserCtxtPtr ctxt);
218void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000219xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000220 xmlChar **value);
Daniel Veillardcf461992000-03-14 18:30:20 +0000221xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000222void xmlParseEndTag (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000223void xmlParseCDSect (xmlParserCtxtPtr ctxt);
224void xmlParseContent (xmlParserCtxtPtr ctxt);
225void xmlParseElement (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000226xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
227xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
228xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
229xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000230int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
231void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000232void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +0000233void xmlParseMisc (xmlParserCtxtPtr ctxt);
234void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000235 const xmlChar *ExternalID,
236 const xmlChar *SystemID);
Daniel Veillard517752b1999-04-05 12:20:10 +0000237/*
Daniel Veillard011b63c1999-06-02 17:44:04 +0000238 * Entities substitution
239 */
240#define XML_SUBSTITUTE_NONE 0
241#define XML_SUBSTITUTE_REF 1
242#define XML_SUBSTITUTE_PEREF 2
243#define XML_SUBSTITUTE_BOTH 3
244
Daniel Veillardcf461992000-03-14 18:30:20 +0000245xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
Daniel Veillardb96e6431999-08-29 21:02:19 +0000246 int len,
247 int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000248 xmlChar end,
249 xmlChar end2,
250 xmlChar end3);
Daniel Veillardcf461992000-03-14 18:30:20 +0000251xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
252 const xmlChar *str,
253 int what,
254 xmlChar end,
255 xmlChar end2,
256 xmlChar end3);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000257
258/*
Daniel Veillard517752b1999-04-05 12:20:10 +0000259 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
260 */
Daniel Veillardb96e6431999-08-29 21:02:19 +0000261int nodePush (xmlParserCtxtPtr ctxt,
262 xmlNodePtr value);
263xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);
264int inputPush (xmlParserCtxtPtr ctxt,
265 xmlParserInputPtr value);
266xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000267
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000268/*
Daniel Veillardb1059e22000-09-16 14:02:43 +0000269 * other comodities shared between parser.c and parserInternals
270 */
271int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
272int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
273 const xmlChar *cur,
274 int *len);
275void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
276void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
277xmlChar *namePop (xmlParserCtxtPtr ctxt);
278int xmlCheckLanguageID (const xmlChar *lang);
279
280/*
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000281 * Really core function shared with HTML parser
282 */
283int xmlCurrentChar (xmlParserCtxtPtr ctxt,
284 int *len);
285int xmlCopyChar (int len,
286 xmlChar *out,
287 int val);
288void xmlNextChar (xmlParserCtxtPtr ctxt);
289void xmlParserInputShrink (xmlParserInputPtr in);
Daniel Veillardbc765302000-10-01 18:23:35 +0000290
291#ifdef LIBXML_HTML_ENABLED
292/*
293 * Actually comes from the HTML parser but launched from the init stuff
294 */
295void htmlInitAutoClose (void);
296#endif
Daniel Veillardc08a2c61999-09-08 21:35:25 +0000297#ifdef __cplusplus
298}
299#endif
Daniel Veillard1e346af1999-02-22 10:33:01 +0000300#endif /* __XML_PARSER_INTERNALS_H__ */