blob: a7757899dd50fe0f6b1186034e09bf16655eb367 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parserInternals.h : internals routines exported by the parser.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 *
8 * 14 Nov 2000 ht - truncated declaration of xmlParseElementChildrenContentDecl
9 * for VMS
10 *
11 */
12
13#ifndef __XML_PARSER_INTERNALS_H__
14#define __XML_PARSER_INTERNALS_H__
15
16#include <libxml/parser.h>
Daniel Veillard56a4cb82001-03-24 17:00:36 +000017#include <libxml/HTMLparser.h>
Owen Taylor3473f882001-02-23 17:55:21 +000018
19#ifdef __cplusplus
20extern "C" {
21#endif
22
Daniel Veillardbed7b052001-05-19 14:59:49 +000023 /**
24 * XML_MAX_NAMELEN:
25 *
Owen Taylor3473f882001-02-23 17:55:21 +000026 * Identifiers can be longer, but this will be more costly
27 * at runtime.
28 */
29#define XML_MAX_NAMELEN 100
30
Daniel Veillardbed7b052001-05-19 14:59:49 +000031/**
32 * INPUT_CHUNK:
33 *
Owen Taylor3473f882001-02-23 17:55:21 +000034 * The parser tries to always have that amount of input ready
35 * one of the point is providing context when reporting errors
36 */
37#define INPUT_CHUNK 250
38
39/************************************************************************
40 * *
41 * UNICODE version of the macros. *
42 * *
43 ************************************************************************/
Daniel Veillardbed7b052001-05-19 14:59:49 +000044/**
45 * IS_CHAR:
46 * @c: an UNICODE value (int)
47 *
48 * Macro to check the following production in the XML spec
49 *
Owen Taylor3473f882001-02-23 17:55:21 +000050 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
51 * | [#x10000-#x10FFFF]
52 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
53 */
54#define IS_CHAR(c) \
55 ((((c) >= 0x20) && ((c) <= 0xD7FF)) || \
56 ((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
57 (((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
58 (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
59
Daniel Veillardbed7b052001-05-19 14:59:49 +000060/**
61 * IS_BLANK:
62 * @c: an UNICODE value (int)
63 *
64 * Macro to check the following production in the XML spec
65 *
Owen Taylor3473f882001-02-23 17:55:21 +000066 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
67 */
68#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xA) || \
69 ((c) == 0x0D))
70
Daniel Veillardbed7b052001-05-19 14:59:49 +000071/**
72 * IS_BASECHAR:
73 * @c: an UNICODE value (int)
74 *
75 * Macro to check the following production in the XML spec
76 *
Owen Taylor3473f882001-02-23 17:55:21 +000077 * [85] BaseChar ::= ... long list see REC ...
78 */
79#define IS_BASECHAR(c) xmlIsBaseChar(c)
80
Daniel Veillardbed7b052001-05-19 14:59:49 +000081/**
82 * IS_DIGIT:
83 * @c: an UNICODE value (int)
84 *
85 * Macro to check the following production in the XML spec
86 *
Owen Taylor3473f882001-02-23 17:55:21 +000087 * [88] Digit ::= ... long list see REC ...
88 */
89#define IS_DIGIT(c) xmlIsDigit(c)
90
Daniel Veillardbed7b052001-05-19 14:59:49 +000091/**
92 * IS_COMBINING:
93 * @c: an UNICODE value (int)
94 *
95 * Macro to check the following production in the XML spec
96 *
Owen Taylor3473f882001-02-23 17:55:21 +000097 * [87] CombiningChar ::= ... long list see REC ...
98 */
99#define IS_COMBINING(c) xmlIsCombining(c)
100
Daniel Veillardbed7b052001-05-19 14:59:49 +0000101/**
102 * IS_EXTENDER:
103 * @c: an UNICODE value (int)
104 *
105 * Macro to check the following production in the XML spec
106 *
107 *
Owen Taylor3473f882001-02-23 17:55:21 +0000108 * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
109 * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
110 * [#x309D-#x309E] | [#x30FC-#x30FE]
111 */
112#define IS_EXTENDER(c) xmlIsExtender(c)
113
Daniel Veillardbed7b052001-05-19 14:59:49 +0000114/**
115 * IS_IDEOGRAPHIC:
116 * @c: an UNICODE value (int)
117 *
118 * Macro to check the following production in the XML spec
119 *
120 *
Owen Taylor3473f882001-02-23 17:55:21 +0000121 * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
122 */
123#define IS_IDEOGRAPHIC(c) xmlIsIdeographic(c)
124
Daniel Veillardbed7b052001-05-19 14:59:49 +0000125/**
126 * IS_LETTER:
127 * @c: an UNICODE value (int)
128 *
129 * Macro to check the following production in the XML spec
130 *
131 *
Owen Taylor3473f882001-02-23 17:55:21 +0000132 * [84] Letter ::= BaseChar | Ideographic
133 */
134#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
135
136
Daniel Veillardbed7b052001-05-19 14:59:49 +0000137/**
138 * IS_PUBIDCHAR:
139 * @c: an UNICODE value (int)
140 *
141 * Macro to check the following production in the XML spec
142 *
143 *
Owen Taylor3473f882001-02-23 17:55:21 +0000144 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
145 */
146#define IS_PUBIDCHAR(c) xmlIsPubidChar(c)
147
Daniel Veillardbed7b052001-05-19 14:59:49 +0000148/**
149 * SKIP_EOL:
150 * @p: and UTF8 string pointer
151 *
152 * Skips the end of line chars
153 */
Owen Taylor3473f882001-02-23 17:55:21 +0000154#define SKIP_EOL(p) \
155 if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
156 if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
157
Daniel Veillardbed7b052001-05-19 14:59:49 +0000158/**
159 * MOVETO_ENDTAG:
160 * @p: and UTF8 string pointer
161 *
162 * Skips to the next '>' char
163 */
Owen Taylor3473f882001-02-23 17:55:21 +0000164#define MOVETO_ENDTAG(p) \
165 while ((*p) && (*(p) != '>')) (p)++
166
Daniel Veillardbed7b052001-05-19 14:59:49 +0000167/**
168 * MOVETO_STARTTAG:
169 * @p: and UTF8 string pointer
170 *
171 * Skips to the next '<' char
172 */
Owen Taylor3473f882001-02-23 17:55:21 +0000173#define MOVETO_STARTTAG(p) \
174 while ((*p) && (*(p) != '<')) (p)++
175
176/**
177 * Global vaiables affecting the default parser behaviour.
178 */
179
180LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
181LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
182LIBXML_DLL_IMPORT extern int xmlParserDebugEntities;
183LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
184LIBXML_DLL_IMPORT extern int xmlDoValidityCheckingDefaultValue;
185LIBXML_DLL_IMPORT extern int xmlLoadExtDtdDefaultValue;
186LIBXML_DLL_IMPORT extern int xmlPedanticParserDefaultValue;
187LIBXML_DLL_IMPORT extern int xmlKeepBlanksDefaultValue;
188LIBXML_DLL_IMPORT extern xmlChar xmlStringText[];
189LIBXML_DLL_IMPORT extern xmlChar xmlStringTextNoenc[];
190LIBXML_DLL_IMPORT extern xmlChar xmlStringComment[];
191
192/*
193 * Function to finish teh work of the macros where needed
194 */
195int xmlIsBaseChar (int c);
196int xmlIsBlank (int c);
197int xmlIsPubidChar (int c);
198int xmlIsLetter (int c);
199int xmlIsDigit (int c);
200int xmlIsIdeographic(int c);
201int xmlIsCombining (int c);
202int xmlIsExtender (int c);
203int xmlIsCombining (int c);
204int xmlIsChar (int c);
205
206/**
207 * Parser context
208 */
209xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
210xmlParserCtxtPtr xmlCreateFileParserCtxt (const char *filename);
Daniel Veillardfd7ddca2001-05-16 10:57:35 +0000211xmlParserCtxtPtr xmlCreateMemoryParserCtxt(const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000212 int size);
213xmlParserCtxtPtr xmlNewParserCtxt (void);
214xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
215 const xmlChar *ID,
216 const xmlChar *base);
217int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
218 xmlCharEncoding enc);
219int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
220 xmlCharEncodingHandlerPtr handler);
221void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
222
223/**
224 * Entities
225 */
226void xmlHandleEntity (xmlParserCtxtPtr ctxt,
227 xmlEntityPtr entity);
228
229/**
230 * Input Streams
231 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000232xmlParserInputPtr xmlNewStringInputStream (xmlParserCtxtPtr ctxt,
233 const xmlChar *buffer);
Owen Taylor3473f882001-02-23 17:55:21 +0000234xmlParserInputPtr xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,
235 xmlEntityPtr entity);
236void xmlPushInput (xmlParserCtxtPtr ctxt,
237 xmlParserInputPtr input);
238xmlChar xmlPopInput (xmlParserCtxtPtr ctxt);
239void xmlFreeInputStream (xmlParserInputPtr input);
240xmlParserInputPtr xmlNewInputFromFile (xmlParserCtxtPtr ctxt,
241 const char *filename);
242xmlParserInputPtr xmlNewInputStream (xmlParserCtxtPtr ctxt);
243
244/**
245 * Namespaces.
246 */
247xmlChar * xmlSplitQName (xmlParserCtxtPtr ctxt,
248 const xmlChar *name,
249 xmlChar **prefix);
250xmlChar * xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);
251xmlChar * xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,
252 xmlChar **prefix);
253xmlChar * xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);
254xmlChar * xmlParseQuotedString (xmlParserCtxtPtr ctxt);
255void xmlParseNamespace (xmlParserCtxtPtr ctxt);
256
257/**
258 * Generic production rules
259 */
260xmlChar * xmlScanName (xmlParserCtxtPtr ctxt);
261xmlChar * xmlParseName (xmlParserCtxtPtr ctxt);
262xmlChar * xmlParseNmtoken (xmlParserCtxtPtr ctxt);
263xmlChar * xmlParseEntityValue (xmlParserCtxtPtr ctxt,
264 xmlChar **orig);
265xmlChar * xmlParseAttValue (xmlParserCtxtPtr ctxt);
266xmlChar * xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);
267xmlChar * xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);
268void xmlParseCharData (xmlParserCtxtPtr ctxt,
269 int cdata);
270xmlChar * xmlParseExternalID (xmlParserCtxtPtr ctxt,
271 xmlChar **publicID,
272 int strict);
273void xmlParseComment (xmlParserCtxtPtr ctxt);
274xmlChar * xmlParsePITarget (xmlParserCtxtPtr ctxt);
275void xmlParsePI (xmlParserCtxtPtr ctxt);
276void xmlParseNotationDecl (xmlParserCtxtPtr ctxt);
277void xmlParseEntityDecl (xmlParserCtxtPtr ctxt);
278int xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,
279 xmlChar **value);
280xmlEnumerationPtr xmlParseNotationType (xmlParserCtxtPtr ctxt);
281xmlEnumerationPtr xmlParseEnumerationType (xmlParserCtxtPtr ctxt);
282int xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,
283 xmlEnumerationPtr *tree);
284int xmlParseAttributeType (xmlParserCtxtPtr ctxt,
285 xmlEnumerationPtr *tree);
286void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);
287xmlElementContentPtr xmlParseElementMixedContentDecl
288 (xmlParserCtxtPtr ctxt);
289#ifdef VMS
290xmlElementContentPtr xmlParseElementChildrenContentD
291 (xmlParserCtxtPtr ctxt);
292#define xmlParseElementChildrenContentDecl xmlParseElementChildrenContentD
293#else
294xmlElementContentPtr xmlParseElementChildrenContentDecl
295 (xmlParserCtxtPtr ctxt);
296#endif
297int xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,
298 xmlChar *name,
299 xmlElementContentPtr *result);
300int xmlParseElementDecl (xmlParserCtxtPtr ctxt);
301void xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);
302int xmlParseCharRef (xmlParserCtxtPtr ctxt);
303xmlEntityPtr xmlParseEntityRef (xmlParserCtxtPtr ctxt);
304void xmlParseReference (xmlParserCtxtPtr ctxt);
305void xmlParsePEReference (xmlParserCtxtPtr ctxt);
306void xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);
307xmlChar * xmlParseAttribute (xmlParserCtxtPtr ctxt,
308 xmlChar **value);
309xmlChar * xmlParseStartTag (xmlParserCtxtPtr ctxt);
310void xmlParseEndTag (xmlParserCtxtPtr ctxt);
311void xmlParseCDSect (xmlParserCtxtPtr ctxt);
312void xmlParseContent (xmlParserCtxtPtr ctxt);
313void xmlParseElement (xmlParserCtxtPtr ctxt);
314xmlChar * xmlParseVersionNum (xmlParserCtxtPtr ctxt);
315xmlChar * xmlParseVersionInfo (xmlParserCtxtPtr ctxt);
316xmlChar * xmlParseEncName (xmlParserCtxtPtr ctxt);
317xmlChar * xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);
318int xmlParseSDDecl (xmlParserCtxtPtr ctxt);
319void xmlParseXMLDecl (xmlParserCtxtPtr ctxt);
320void xmlParseTextDecl (xmlParserCtxtPtr ctxt);
321void xmlParseMisc (xmlParserCtxtPtr ctxt);
322void xmlParseExternalSubset (xmlParserCtxtPtr ctxt,
323 const xmlChar *ExternalID,
324 const xmlChar *SystemID);
325/*
326 * Entities substitution
327 */
328#define XML_SUBSTITUTE_NONE 0
329#define XML_SUBSTITUTE_REF 1
330#define XML_SUBSTITUTE_PEREF 2
331#define XML_SUBSTITUTE_BOTH 3
332
333xmlChar * xmlDecodeEntities (xmlParserCtxtPtr ctxt,
334 int len,
335 int what,
336 xmlChar end,
337 xmlChar end2,
338 xmlChar end3);
339xmlChar * xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,
340 const xmlChar *str,
341 int what,
342 xmlChar end,
343 xmlChar end2,
344 xmlChar end3);
345
346/*
347 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
348 */
349int nodePush (xmlParserCtxtPtr ctxt,
350 xmlNodePtr value);
351xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);
352int inputPush (xmlParserCtxtPtr ctxt,
353 xmlParserInputPtr value);
354xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000355xmlChar *namePop (xmlParserCtxtPtr ctxt);
356int namePush (xmlParserCtxtPtr ctxt,
357 xmlChar *value);
Owen Taylor3473f882001-02-23 17:55:21 +0000358
359/*
360 * other comodities shared between parser.c and parserInternals
361 */
362int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);
363int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,
364 const xmlChar *cur,
365 int *len);
366void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
367void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000368int xmlCheckLanguageID (const xmlChar *lang);
369
370/*
371 * Really core function shared with HTML parser
372 */
373int xmlCurrentChar (xmlParserCtxtPtr ctxt,
374 int *len);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000375int xmlCopyCharMultiByte (xmlChar *out,
376 int val);
Owen Taylor3473f882001-02-23 17:55:21 +0000377int xmlCopyChar (int len,
378 xmlChar *out,
379 int val);
380void xmlNextChar (xmlParserCtxtPtr ctxt);
381void xmlParserInputShrink (xmlParserInputPtr in);
382
383#ifdef LIBXML_HTML_ENABLED
384/*
385 * Actually comes from the HTML parser but launched from the init stuff
386 */
387void htmlInitAutoClose (void);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000388htmlParserCtxtPtr htmlCreateFileParserCtxt(const char *filename,
389 const char *encoding);
Owen Taylor3473f882001-02-23 17:55:21 +0000390#endif
391#ifdef __cplusplus
392}
393#endif
394#endif /* __XML_PARSER_INTERNALS_H__ */