blob: 3fdb8f6ea68413eb46cc8e7c5e103cacabdcdb16 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parserInternals.h : internals routines exported by the parser.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 *
8 * 14 Nov 2000 ht - truncated declaration of xmlParseElementChildrenContentDecl
9 * for VMS
10 *
11 */
12
13#ifndef __XML_PARSER_INTERNALS_H__
14#define __XML_PARSER_INTERNALS_H__
15
16#include <libxml/parser.h>
17
18#ifdef __cplusplus
19extern "C" {
20#endif
21
22 /*
23 * Identifiers can be longer, but this will be more costly
24 * at runtime.
25 */
26#define XML_MAX_NAMELEN 100
27
28/*
29 * The parser tries to always have that amount of input ready
30 * one of the point is providing context when reporting errors
31 */
32#define INPUT_CHUNK 250
33
34/************************************************************************
35 * *
36 * UNICODE version of the macros. *
37 * *
38 ************************************************************************/
39/*
40 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
41 * | [#x10000-#x10FFFF]
42 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
43 */
44#define IS_CHAR(c) \
45 ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
46 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
47 (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
48 (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
49
50/*
51 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
52 */
53#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
54 ((c) == 0x0D))
55
56/*
57 * [85] BaseChar ::= ... long list see REC ...
58 */
59#define IS_BASECHAR(c) xmlIsBaseChar(c)
60
61/*
62 * [88] Digit ::= ... long list see REC ...
63 */
64#define IS_DIGIT(c) xmlIsDigit(c)
65
66/*
67 * [87] CombiningChar ::= ... long list see REC ...
68 */
69#define IS_COMBINING(c) xmlIsCombining(c)
70
71/*
72 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
73 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
74 * [#x309D-#x309E] | [#x30FC-#x30FE]
75 */
76#define IS_EXTENDER(c) xmlIsExtender(c)
77
78/*
79 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
80 */
81#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c)
82
83/*
84 * [84] Letter ::= BaseChar | Ideographic
85 */
86#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
87
88
89/*
90 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
91 */
92#define IS_PUBIDCHAR(c) xmlIsPubidChar(c)
93
94#define SKIP_EOL(p) \
95 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
96 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
97
98#define MOVETO_ENDTAG(p) \
99 while ((*p) && (*(p) != '>')) (p)++
100
101#define MOVETO_STARTTAG(p) \
102 while ((*p) && (*(p) != '<')) (p)++
103
104/**
105 * Global vaiables affecting the default parser behaviour.
106 */
107
108LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
109LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
110LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
111LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
112LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue;
113LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue;
114LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue;
115LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue;
116LIBXML_DLL_IMPORT extern xmlChar xmlStringText[];
117LIBXML_DLL_IMPORT extern xmlChar xmlStringTextNoenc[];
118LIBXML_DLL_IMPORT extern xmlChar xmlStringComment[];
119
120/*
121 * Function to finish teh work of the macros where needed
122 */
123int xmlIsBaseChar (int c);
124int xmlIsBlank (int c);
125int xmlIsPubidChar (int c);
126int xmlIsLetter (int c);
127int xmlIsDigit (int c);
128int xmlIsIdeographic(int c);
129int xmlIsCombining (int c);
130int xmlIsExtender (int c);
131int xmlIsCombining (int c);
132int xmlIsChar (int c);
133
134/**
135 * Parser context
136 */
137xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
138xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
139xmlParserCtxtPtr xmlCreateMemoryParserCtxt(char *buffer,
140 int size);
141xmlParserCtxtPtr xmlNewParserCtxt (void);
142xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
143 const xmlChar *ID,
144 const xmlChar *base);
145int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
146 xmlCharEncoding enc);
147int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
148 xmlCharEncodingHandlerPtr handler);
149void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
150
151/**
152 * Entities
153 */
154void xmlHandleEntity (xmlParserCtxtPtr ctxt,
155 xmlEntityPtr entity);
156
157/**
158 * Input Streams
159 */
160xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
161 xmlEntityPtr entity);
162void xmlPushInput (xmlParserCtxtPtr ctxt,
163 xmlParserInputPtr input);
164xmlChar xmlPopInput (xmlParserCtxtPtr ctxt);
165void xmlFreeInputStream (xmlParserInputPtr input);
166xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
167 const char *filename);
168xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt);
169
170/**
171 * Namespaces.
172 */
173xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
174 const xmlChar *name,
175 xmlChar **prefix);
176xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
177xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
178 xmlChar **prefix);
179xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
180xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt);
181void xmlParseNamespace (xmlParserCtxtPtr ctxt);
182
183/**
184 * Generic production rules
185 */
186xmlChar * xmlScanName (xmlParserCtxtPtr ctxt);
187xmlChar * xmlParseName (xmlParserCtxtPtr ctxt);
188xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt);
189xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt,
190 xmlChar **orig);
191xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt);
192xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
193xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
194void xmlParseCharData (xmlParserCtxtPtr ctxt,
195 int cdata);
196xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt,
197 xmlChar **publicID,
198 int strict);
199void xmlParseComment (xmlParserCtxtPtr ctxt);
200xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt);
201void xmlParsePI (xmlParserCtxtPtr ctxt);
202void xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
203void xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
204int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
205 xmlChar **value);
206xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt);
207xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
208int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
209 xmlEnumerationPtr *tree);
210int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
211 xmlEnumerationPtr *tree);
212void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
213xmlElementContentPtr xmlParseElementMixedContentDecl
214 (xmlParserCtxtPtr ctxt);
215#ifdef VMS
216xmlElementContentPtr xmlParseElementChildrenContentD
217 (xmlParserCtxtPtr ctxt);
218#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD
219#else
220xmlElementContentPtr xmlParseElementChildrenContentDecl
221 (xmlParserCtxtPtr ctxt);
222#endif
223int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
224 xmlChar *name,
225 xmlElementContentPtr *result);
226int xmlParseElementDecl (xmlParserCtxtPtr ctxt);
227void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
228int xmlParseCharRef (xmlParserCtxtPtr ctxt);
229xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt);
230void xmlParseReference (xmlParserCtxtPtr ctxt);
231void xmlParsePEReference (xmlParserCtxtPtr ctxt);
232void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
233xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
234 xmlChar **value);
235xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
236void xmlParseEndTag (xmlParserCtxtPtr ctxt);
237void xmlParseCDSect (xmlParserCtxtPtr ctxt);
238void xmlParseContent (xmlParserCtxtPtr ctxt);
239void xmlParseElement (xmlParserCtxtPtr ctxt);
240xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
241xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
242xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
243xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
244int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
245void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
246void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
247void xmlParseMisc (xmlParserCtxtPtr ctxt);
248void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
249 const xmlChar *ExternalID,
250 const xmlChar *SystemID);
251/*
252 * Entities substitution
253 */
254#define XML_SUBSTITUTE_NONE 0
255#define XML_SUBSTITUTE_REF 1
256#define XML_SUBSTITUTE_PEREF 2
257#define XML_SUBSTITUTE_BOTH 3
258
259xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
260 int len,
261 int what,
262 xmlChar end,
263 xmlChar end2,
264 xmlChar end3);
265xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
266 const xmlChar *str,
267 int what,
268 xmlChar end,
269 xmlChar end2,
270 xmlChar end3);
271
272/*
273 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
274 */
275int nodePush (xmlParserCtxtPtr ctxt,
276 xmlNodePtr value);
277xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);
278int inputPush (xmlParserCtxtPtr ctxt,
279 xmlParserInputPtr value);
280xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);
281
282/*
283 * other comodities shared between parser.c and parserInternals
284 */
285int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
286int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
287 const xmlChar *cur,
288 int *len);
289void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
290void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
291xmlChar *namePop (xmlParserCtxtPtr ctxt);
292int xmlCheckLanguageID (const xmlChar *lang);
293
294/*
295 * Really core function shared with HTML parser
296 */
297int xmlCurrentChar (xmlParserCtxtPtr ctxt,
298 int *len);
299int xmlCopyChar (int len,
300 xmlChar *out,
301 int val);
302void xmlNextChar (xmlParserCtxtPtr ctxt);
303void xmlParserInputShrink (xmlParserInputPtr in);
304
305#ifdef LIBXML_HTML_ENABLED
306/*
307 * Actually comes from the HTML parser but launched from the init stuff
308 */
309void htmlInitAutoClose (void);
310#endif
311#ifdef __cplusplus
312}
313#endif
314#endif /* __XML_PARSER_INTERNALS_H__ */