blob: ac3f055d171a6837077c58ccd2a8a76184ebdc18 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.h : Interfaces, constants and types related to the XML parser.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9#ifndef __XML_PARSER_H__
10#define __XML_PARSER_H__
11
12#include <libxml/tree.h>
Daniel Veillard2fdbd322003-08-18 12:15:38 +000013#include <libxml/dict.h>
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <libxml/valid.h>
Owen Taylor3473f882001-02-23 17:55:21 +000015#include <libxml/entities.h>
Owen Taylor3473f882001-02-23 17:55:21 +000016
17#ifdef __cplusplus
18extern "C" {
19#endif
20
Daniel Veillard5e2dace2001-07-18 19:30:27 +000021/**
22 * XML_DEFAULT_VERSION:
23 *
24 * The default version of XML used: 1.0
Owen Taylor3473f882001-02-23 17:55:21 +000025 */
26#define XML_DEFAULT_VERSION "1.0"
27
28/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000029 * xmlParserInput:
30 *
Daniel Veillard61f26172002-03-12 18:46:39 +000031 * An xmlParserInput is an input flow for the XML processor.
Owen Taylor3473f882001-02-23 17:55:21 +000032 * Each entity parsed is associated an xmlParserInput (except the
33 * few predefined ones). This is the case both for internal entities
34 * - in which case the flow is already completely in memory - or
35 * external entities - in which case we use the buf structure for
36 * progressive reading and I18N conversions to the internal UTF-8 format.
37 */
38
Daniel Veillard9d06d302002-01-22 18:15:52 +000039/**
40 * xmlParserInputDeallocate:
41 * @str: the string to deallocate
42 *
Daniel Veillard61f26172002-03-12 18:46:39 +000043 * Callback for freeing some parser input allocations.
Daniel Veillard9d06d302002-01-22 18:15:52 +000044 */
45typedef void (* xmlParserInputDeallocate)(xmlChar *str);
Daniel Veillard5e2dace2001-07-18 19:30:27 +000046
Owen Taylor3473f882001-02-23 17:55:21 +000047struct _xmlParserInput {
48 /* Input buffer */
49 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
50
51 const char *filename; /* The file analyzed, if any */
Daniel Veillard60087f32001-10-10 09:45:09 +000052 const char *directory; /* the directory/base of the file */
Owen Taylor3473f882001-02-23 17:55:21 +000053 const xmlChar *base; /* Base of the array to parse */
54 const xmlChar *cur; /* Current char being parsed */
Daniel Veillardcbaf3992001-12-31 16:16:02 +000055 const xmlChar *end; /* end of the array to parse */
Owen Taylor3473f882001-02-23 17:55:21 +000056 int length; /* length if known */
57 int line; /* Current line */
58 int col; /* Current column */
Daniel Veillard3e59fc52003-04-18 12:34:58 +000059 /*
60 * NOTE: consumed is only tested for equality in the parser code,
61 * so even if there is an overflow this should not give troubles
62 * for parsing very large instances.
63 */
64 unsigned long consumed; /* How many xmlChars already consumed */
Owen Taylor3473f882001-02-23 17:55:21 +000065 xmlParserInputDeallocate free; /* function to deallocate the base */
66 const xmlChar *encoding; /* the encoding string for entity */
67 const xmlChar *version; /* the version string for entity */
68 int standalone; /* Was that entity marked standalone */
69};
70
71/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000072 * xmlParserNodeInfo:
73 *
Daniel Veillard61f26172002-03-12 18:46:39 +000074 * The parser can be asked to collect Node informations, i.e. at what
Owen Taylor3473f882001-02-23 17:55:21 +000075 * place in the file they were detected.
76 * NOTE: This is off by default and not very well tested.
77 */
78typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
79typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
80
81struct _xmlParserNodeInfo {
82 const struct _xmlNode* node;
83 /* Position & line # that text that created the node begins & ends on */
84 unsigned long begin_pos;
85 unsigned long begin_line;
86 unsigned long end_pos;
87 unsigned long end_line;
88};
89
90typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
91typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
92struct _xmlParserNodeInfoSeq {
93 unsigned long maximum;
94 unsigned long length;
95 xmlParserNodeInfo* buffer;
96};
97
98/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000099 * xmlParserInputState:
100 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000101 * The parser is now working also as a state based parser.
102 * The recursive one use the state info for entities processing.
Owen Taylor3473f882001-02-23 17:55:21 +0000103 */
104typedef enum {
105 XML_PARSER_EOF = -1, /* nothing is to be parsed */
106 XML_PARSER_START = 0, /* nothing has been parsed */
107 XML_PARSER_MISC, /* Misc* before int subset */
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000108 XML_PARSER_PI, /* Within a processing instruction */
Owen Taylor3473f882001-02-23 17:55:21 +0000109 XML_PARSER_DTD, /* within some DTD content */
110 XML_PARSER_PROLOG, /* Misc* after internal subset */
111 XML_PARSER_COMMENT, /* within a comment */
112 XML_PARSER_START_TAG, /* within a start tag */
113 XML_PARSER_CONTENT, /* within the content */
114 XML_PARSER_CDATA_SECTION, /* within a CDATA section */
115 XML_PARSER_END_TAG, /* within a closing tag */
116 XML_PARSER_ENTITY_DECL, /* within an entity declaration */
117 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
118 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
119 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
120 XML_PARSER_EPILOG, /* the Misc* after the last end tag */
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000121 XML_PARSER_IGNORE, /* within an IGNORED section */
122 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
Owen Taylor3473f882001-02-23 17:55:21 +0000123} xmlParserInputState;
124
125/**
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000126 * XML_DETECT_IDS:
127 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000128 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
129 * Use it to initialize xmlLoadExtDtdDefaultValue.
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000130 */
131#define XML_DETECT_IDS 2
132
133/**
134 * XML_COMPLETE_ATTRS:
135 *
136 * Bit in the loadsubset context field to tell to do complete the
Daniel Veillard61f26172002-03-12 18:46:39 +0000137 * elements attributes lists with the ones defaulted from the DTDs.
138 * Use it to initialize xmlLoadExtDtdDefaultValue.
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000139 */
140#define XML_COMPLETE_ATTRS 4
141
142/**
Daniel Veillardef8dd7b2003-03-23 12:02:56 +0000143 * XML_SKIP_IDS:
144 *
145 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
146 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
147 */
148#define XML_SKIP_IDS 8
149
150/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000151 * xmlParserCtxt:
152 *
Owen Taylor3473f882001-02-23 17:55:21 +0000153 * The parser context.
Daniel Veillard61f26172002-03-12 18:46:39 +0000154 * NOTE This doesn't completely define the parser state, the (current ?)
Owen Taylor3473f882001-02-23 17:55:21 +0000155 * design of the parser uses recursive function calls since this allow
156 * and easy mapping from the production rules of the specification
157 * to the actual code. The drawback is that the actual function call
158 * also reflect the parser state. However most of the parsing routines
159 * takes as the only argument the parser context pointer, so migrating
160 * to a state based parser for progressive parsing shouldn't be too hard.
161 */
Owen Taylor3473f882001-02-23 17:55:21 +0000162struct _xmlParserCtxt {
163 struct _xmlSAXHandler *sax; /* The SAX handler */
164 void *userData; /* For SAX interface only, used by DOM build */
165 xmlDocPtr myDoc; /* the document being built */
166 int wellFormed; /* is the document well formed */
167 int replaceEntities; /* shall we replace entities ? */
168 const xmlChar *version; /* the XML version string */
169 const xmlChar *encoding; /* the declared encoding, if any */
170 int standalone; /* standalone document */
171 int html; /* an HTML(1)/Docbook(2) document */
172
173 /* Input stream stack */
174 xmlParserInputPtr input; /* Current input stream */
175 int inputNr; /* Number of current input streams */
176 int inputMax; /* Max number of input streams */
177 xmlParserInputPtr *inputTab; /* stack of inputs */
178
179 /* Node analysis stack only used for DOM building */
180 xmlNodePtr node; /* Current parsed Node */
181 int nodeNr; /* Depth of the parsing stack */
182 int nodeMax; /* Max depth of the parsing stack */
183 xmlNodePtr *nodeTab; /* array of nodes */
184
185 int record_info; /* Whether node info should be kept */
186 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
187
188 int errNo; /* error code */
189
190 int hasExternalSubset; /* reference and external subset */
191 int hasPErefs; /* the internal subset has PE refs */
192 int external; /* are we parsing an external entity */
193
194 int valid; /* is the document valid */
195 int validate; /* shall we try to validate ? */
196 xmlValidCtxt vctxt; /* The validity context */
197
198 xmlParserInputState instate; /* current type of input */
199 int token; /* next char look-ahead */
200
201 char *directory; /* the data directory */
202
203 /* Node name stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000204 const xmlChar *name; /* Current parsed Node */
Owen Taylor3473f882001-02-23 17:55:21 +0000205 int nameNr; /* Depth of the parsing stack */
206 int nameMax; /* Max depth of the parsing stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000207 const xmlChar * *nameTab; /* array of nodes */
Owen Taylor3473f882001-02-23 17:55:21 +0000208
209 long nbChars; /* number of xmlChar processed */
210 long checkIndex; /* used by progressive parsing lookup */
211 int keepBlanks; /* ugly but ... */
212 int disableSAX; /* SAX callbacks are disabled */
213 int inSubset; /* Parsing is in int 1/ext 2 subset */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000214 const xmlChar * intSubName; /* name of subset */
Owen Taylor3473f882001-02-23 17:55:21 +0000215 xmlChar * extSubURI; /* URI of external subset */
216 xmlChar * extSubSystem; /* SYSTEM ID of external subset */
217
218 /* xml:space values */
219 int * space; /* Should the parser preserve spaces */
220 int spaceNr; /* Depth of the parsing stack */
221 int spaceMax; /* Max depth of the parsing stack */
222 int * spaceTab; /* array of space infos */
223
224 int depth; /* to prevent entity substitution loops */
225 xmlParserInputPtr entity; /* used to check entities boundaries */
226 int charset; /* encoding of the in-memory content
227 actually an xmlCharEncoding */
228 int nodelen; /* Those two fields are there to */
229 int nodemem; /* Speed up large node parsing */
230 int pedantic; /* signal pedantic warnings */
231 void *_private; /* For user data, libxml won't touch it */
232
233 int loadsubset; /* should the external subset be loaded */
Daniel Veillardd9bad132001-07-23 19:39:43 +0000234 int linenumbers; /* set line number in element content */
Daniel Veillard5d90b6c2001-08-22 14:29:45 +0000235 void *catalogs; /* document's own catalog */
Daniel Veillarddad3f682002-11-17 16:47:27 +0000236 int recovery; /* run in recovery mode */
Daniel Veillarda880b122003-04-21 21:36:41 +0000237 int progressive; /* is this a progressive parsing */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000238 xmlDictPtr dict; /* dictionnary for the parser */
Daniel Veillard6155d8a2003-08-19 15:01:28 +0000239 const xmlChar * *atts; /* array for the attributes callbacks */
240 int maxatts; /* the size of the array */
Owen Taylor3473f882001-02-23 17:55:21 +0000241};
242
243/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000244 * xmlSAXLocator:
245 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000246 * A SAX Locator.
Owen Taylor3473f882001-02-23 17:55:21 +0000247 */
Owen Taylor3473f882001-02-23 17:55:21 +0000248struct _xmlSAXLocator {
249 const xmlChar *(*getPublicId)(void *ctx);
250 const xmlChar *(*getSystemId)(void *ctx);
251 int (*getLineNumber)(void *ctx);
252 int (*getColumnNumber)(void *ctx);
253};
254
255/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000256 * xmlSAXHandler:
257 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000258 * A SAX handler is bunch of callbacks called by the parser when processing
Owen Taylor3473f882001-02-23 17:55:21 +0000259 * of the input generate data or structure informations.
260 */
261
Daniel Veillard9d06d302002-01-22 18:15:52 +0000262/**
263 * resolveEntitySAXFunc:
264 * @ctx: the user data (XML parser context)
265 * @publicId: The public ID of the entity
266 * @systemId: The system ID of the entity
267 *
268 * Callback:
269 * The entity loader, to control the loading of external entities,
270 * the application can either:
271 * - override this resolveEntity() callback in the SAX block
272 * - or better use the xmlSetExternalEntityLoader() function to
273 * set up it's own entity resolution routine
274 *
275 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
276 */
Owen Taylor3473f882001-02-23 17:55:21 +0000277typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000278 const xmlChar *publicId,
279 const xmlChar *systemId);
280/**
281 * internalSubsetSAXFunc:
282 * @ctx: the user data (XML parser context)
283 * @name: the root element name
284 * @ExternalID: the external ID
285 * @SystemID: the SYSTEM ID (e.g. filename or URL)
286 *
287 * Callback on internal subset declaration.
288 */
289typedef void (*internalSubsetSAXFunc) (void *ctx,
290 const xmlChar *name,
291 const xmlChar *ExternalID,
292 const xmlChar *SystemID);
293/**
294 * externalSubsetSAXFunc:
295 * @ctx: the user data (XML parser context)
296 * @name: the root element name
297 * @ExternalID: the external ID
298 * @SystemID: the SYSTEM ID (e.g. filename or URL)
299 *
300 * Callback on external subset declaration.
301 */
302typedef void (*externalSubsetSAXFunc) (void *ctx,
303 const xmlChar *name,
304 const xmlChar *ExternalID,
305 const xmlChar *SystemID);
306/**
307 * getEntitySAXFunc:
308 * @ctx: the user data (XML parser context)
309 * @name: The entity name
310 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000311 * Get an entity by name.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000312 *
313 * Returns the xmlEntityPtr if found.
314 */
Owen Taylor3473f882001-02-23 17:55:21 +0000315typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000316 const xmlChar *name);
317/**
318 * getParameterEntitySAXFunc:
319 * @ctx: the user data (XML parser context)
320 * @name: The entity name
321 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000322 * Get a parameter entity by name.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000323 *
324 * Returns the xmlEntityPtr if found.
325 */
Owen Taylor3473f882001-02-23 17:55:21 +0000326typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000327 const xmlChar *name);
328/**
329 * entityDeclSAXFunc:
330 * @ctx: the user data (XML parser context)
331 * @name: the entity name
332 * @type: the entity type
333 * @publicId: The public ID of the entity
334 * @systemId: The system ID of the entity
335 * @content: the entity value (without processing).
336 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000337 * An entity definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000338 */
Owen Taylor3473f882001-02-23 17:55:21 +0000339typedef void (*entityDeclSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000340 const xmlChar *name,
341 int type,
342 const xmlChar *publicId,
343 const xmlChar *systemId,
344 xmlChar *content);
345/**
346 * notationDeclSAXFunc:
347 * @ctx: the user data (XML parser context)
348 * @name: The name of the notation
349 * @publicId: The public ID of the entity
350 * @systemId: The system ID of the entity
351 *
352 * What to do when a notation declaration has been parsed.
353 */
354typedef void (*notationDeclSAXFunc)(void *ctx,
355 const xmlChar *name,
356 const xmlChar *publicId,
357 const xmlChar *systemId);
358/**
359 * attributeDeclSAXFunc:
360 * @ctx: the user data (XML parser context)
361 * @elem: the name of the element
362 * @fullname: the attribute name
363 * @type: the attribute type
364 * @def: the type of default value
365 * @defaultValue: the attribute default value
366 * @tree: the tree of enumerated value set
367 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000368 * An attribute definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000369 */
370typedef void (*attributeDeclSAXFunc)(void *ctx,
371 const xmlChar *elem,
372 const xmlChar *fullname,
373 int type,
374 int def,
375 const xmlChar *defaultValue,
376 xmlEnumerationPtr tree);
377/**
378 * elementDeclSAXFunc:
379 * @ctx: the user data (XML parser context)
380 * @name: the element name
381 * @type: the element type
382 * @content: the element value tree
383 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000384 * An element definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000385 */
386typedef void (*elementDeclSAXFunc)(void *ctx,
387 const xmlChar *name,
388 int type,
389 xmlElementContentPtr content);
390/**
391 * unparsedEntityDeclSAXFunc:
392 * @ctx: the user data (XML parser context)
393 * @name: The name of the entity
394 * @publicId: The public ID of the entity
395 * @systemId: The system ID of the entity
396 * @notationName: the name of the notation
397 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000398 * What to do when an unparsed entity declaration is parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000399 */
Owen Taylor3473f882001-02-23 17:55:21 +0000400typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000401 const xmlChar *name,
402 const xmlChar *publicId,
403 const xmlChar *systemId,
404 const xmlChar *notationName);
405/**
406 * setDocumentLocatorSAXFunc:
407 * @ctx: the user data (XML parser context)
408 * @loc: A SAX Locator
409 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000410 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000411 * Everything is available on the context, so this is useless in our case.
412 */
Owen Taylor3473f882001-02-23 17:55:21 +0000413typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000414 xmlSAXLocatorPtr loc);
415/**
416 * startDocumentSAXFunc:
417 * @ctx: the user data (XML parser context)
418 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000419 * Called when the document start being processed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000420 */
Owen Taylor3473f882001-02-23 17:55:21 +0000421typedef void (*startDocumentSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000422/**
423 * endDocumentSAXFunc:
424 * @ctx: the user data (XML parser context)
425 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000426 * Called when the document end has been detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000427 */
Owen Taylor3473f882001-02-23 17:55:21 +0000428typedef void (*endDocumentSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000429/**
430 * startElementSAXFunc:
431 * @ctx: the user data (XML parser context)
432 * @name: The element name, including namespace prefix
433 * @atts: An array of name/value attributes pairs, NULL terminated
434 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000435 * Called when an opening tag has been processed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000436 */
437typedef void (*startElementSAXFunc) (void *ctx,
438 const xmlChar *name,
439 const xmlChar **atts);
440/**
441 * endElementSAXFunc:
442 * @ctx: the user data (XML parser context)
443 * @name: The element name
444 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000445 * Called when the end of an element has been detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000446 */
447typedef void (*endElementSAXFunc) (void *ctx,
448 const xmlChar *name);
449/**
450 * attributeSAXFunc:
451 * @ctx: the user data (XML parser context)
452 * @name: The attribute name, including namespace prefix
453 * @value: The attribute value
454 *
455 * Handle an attribute that has been read by the parser.
456 * The default handling is to convert the attribute into an
457 * DOM subtree and past it in a new xmlAttr element added to
458 * the element.
459 */
460typedef void (*attributeSAXFunc) (void *ctx,
461 const xmlChar *name,
462 const xmlChar *value);
463/**
464 * referenceSAXFunc:
465 * @ctx: the user data (XML parser context)
466 * @name: The entity name
467 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000468 * Called when an entity reference is detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000469 */
470typedef void (*referenceSAXFunc) (void *ctx,
471 const xmlChar *name);
472/**
473 * charactersSAXFunc:
474 * @ctx: the user data (XML parser context)
475 * @ch: a xmlChar string
476 * @len: the number of xmlChar
477 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000478 * Receiving some chars from the parser.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000479 */
480typedef void (*charactersSAXFunc) (void *ctx,
481 const xmlChar *ch,
482 int len);
483/**
484 * ignorableWhitespaceSAXFunc:
485 * @ctx: the user data (XML parser context)
486 * @ch: a xmlChar string
487 * @len: the number of xmlChar
488 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000489 * Receiving some ignorable whitespaces from the parser.
490 * UNUSED: by default the DOM building will use characters.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000491 */
Owen Taylor3473f882001-02-23 17:55:21 +0000492typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000493 const xmlChar *ch,
494 int len);
495/**
496 * processingInstructionSAXFunc:
497 * @ctx: the user data (XML parser context)
498 * @target: the target name
499 * @data: the PI data's
500 *
501 * A processing instruction has been parsed.
502 */
Owen Taylor3473f882001-02-23 17:55:21 +0000503typedef void (*processingInstructionSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000504 const xmlChar *target,
505 const xmlChar *data);
506/**
507 * commentSAXFunc:
508 * @ctx: the user data (XML parser context)
509 * @value: the comment content
510 *
511 * A comment has been parsed.
512 */
513typedef void (*commentSAXFunc) (void *ctx,
514 const xmlChar *value);
515/**
516 * cdataBlockSAXFunc:
517 * @ctx: the user data (XML parser context)
518 * @value: The pcdata content
519 * @len: the block length
520 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000521 * Called when a pcdata block has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000522 */
523typedef void (*cdataBlockSAXFunc) (
524 void *ctx,
525 const xmlChar *value,
526 int len);
527/**
528 * warningSAXFunc:
529 * @ctx: an XML parser context
530 * @msg: the message to display/transmit
531 * @...: extra parameters for the message display
532 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000533 * Display and format a warning messages, callback.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000534 */
535typedef void (*warningSAXFunc) (void *ctx,
536 const char *msg, ...);
537/**
538 * errorSAXFunc:
539 * @ctx: an XML parser context
540 * @msg: the message to display/transmit
541 * @...: extra parameters for the message display
542 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000543 * Display and format an error messages, callback.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000544 */
545typedef void (*errorSAXFunc) (void *ctx,
546 const char *msg, ...);
547/**
548 * fatalErrorSAXFunc:
549 * @ctx: an XML parser context
550 * @msg: the message to display/transmit
551 * @...: extra parameters for the message display
552 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000553 * Display and format fatal error messages, callback.
Daniel Veillard0821b152002-11-12 20:57:47 +0000554 * Note: so far fatalError() SAX callbacks are not used, error()
555 * get all the callbacks for errors.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000556 */
557typedef void (*fatalErrorSAXFunc) (void *ctx,
558 const char *msg, ...);
559/**
560 * isStandaloneSAXFunc:
561 * @ctx: the user data (XML parser context)
562 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000563 * Is this document tagged standalone?
Daniel Veillard9d06d302002-01-22 18:15:52 +0000564 *
565 * Returns 1 if true
566 */
Owen Taylor3473f882001-02-23 17:55:21 +0000567typedef int (*isStandaloneSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000568/**
569 * hasInternalSubsetSAXFunc:
570 * @ctx: the user data (XML parser context)
571 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000572 * Does this document has an internal subset.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000573 *
574 * Returns 1 if true
575 */
Owen Taylor3473f882001-02-23 17:55:21 +0000576typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000577/**
578 * hasExternalSubsetSAXFunc:
579 * @ctx: the user data (XML parser context)
580 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000581 * Does this document has an external subset?
Daniel Veillard9d06d302002-01-22 18:15:52 +0000582 *
583 * Returns 1 if true
584 */
Owen Taylor3473f882001-02-23 17:55:21 +0000585typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
586
Owen Taylor3473f882001-02-23 17:55:21 +0000587struct _xmlSAXHandler {
588 internalSubsetSAXFunc internalSubset;
589 isStandaloneSAXFunc isStandalone;
590 hasInternalSubsetSAXFunc hasInternalSubset;
591 hasExternalSubsetSAXFunc hasExternalSubset;
592 resolveEntitySAXFunc resolveEntity;
593 getEntitySAXFunc getEntity;
594 entityDeclSAXFunc entityDecl;
595 notationDeclSAXFunc notationDecl;
596 attributeDeclSAXFunc attributeDecl;
597 elementDeclSAXFunc elementDecl;
598 unparsedEntityDeclSAXFunc unparsedEntityDecl;
599 setDocumentLocatorSAXFunc setDocumentLocator;
600 startDocumentSAXFunc startDocument;
601 endDocumentSAXFunc endDocument;
602 startElementSAXFunc startElement;
603 endElementSAXFunc endElement;
604 referenceSAXFunc reference;
605 charactersSAXFunc characters;
606 ignorableWhitespaceSAXFunc ignorableWhitespace;
607 processingInstructionSAXFunc processingInstruction;
608 commentSAXFunc comment;
609 warningSAXFunc warning;
610 errorSAXFunc error;
Daniel Veillard0821b152002-11-12 20:57:47 +0000611 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
Owen Taylor3473f882001-02-23 17:55:21 +0000612 getParameterEntitySAXFunc getParameterEntity;
613 cdataBlockSAXFunc cdataBlock;
614 externalSubsetSAXFunc externalSubset;
Daniel Veillardd0463562001-10-13 09:15:48 +0000615 int initialized;
Owen Taylor3473f882001-02-23 17:55:21 +0000616};
617
618/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000619 * xmlExternalEntityLoader:
620 * @URL: The System ID of the resource requested
621 * @ID: The Public ID of the resource requested
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000622 * @context: the XML parser context
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000623 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000624 * External entity loaders types.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000625 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000626 * Returns the entity input parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000627 */
Daniel Veillard9d06d302002-01-22 18:15:52 +0000628typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
629 const char *ID,
630 xmlParserCtxtPtr context);
Owen Taylor3473f882001-02-23 17:55:21 +0000631
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000632/*
Owen Taylor3473f882001-02-23 17:55:21 +0000633 * Global variables: just the default SAX interface tables and XML
634 * version infos.
635 */
Daniel Veillard0ba59232002-02-10 13:20:39 +0000636#if 0
Owen Taylor3473f882001-02-23 17:55:21 +0000637LIBXML_DLL_IMPORT extern const char *xmlParserVersion;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000638#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000639
Daniel Veillard0ba59232002-02-10 13:20:39 +0000640/*
Owen Taylor3473f882001-02-23 17:55:21 +0000641LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator;
642LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler;
643LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler;
Daniel Veillardeae522a2001-04-23 13:41:34 +0000644LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000645 */
Owen Taylor3473f882001-02-23 17:55:21 +0000646
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000647/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000648 * Entity substitution default behavior.
Owen Taylor3473f882001-02-23 17:55:21 +0000649 */
650
Daniel Veillard0ba59232002-02-10 13:20:39 +0000651#if 0
652LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +0000653LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000654#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000655
Daniel Veillard6c4ffaf2002-02-11 08:54:05 +0000656#ifdef __cplusplus
657}
658#endif
659#include <libxml/encoding.h>
660#include <libxml/xmlIO.h>
661#include <libxml/globals.h>
662#ifdef __cplusplus
663extern "C" {
664#endif
665
Owen Taylor3473f882001-02-23 17:55:21 +0000666
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000667/*
Owen Taylor3473f882001-02-23 17:55:21 +0000668 * Init/Cleanup
669 */
670void xmlInitParser (void);
671void xmlCleanupParser (void);
672
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000673/*
Owen Taylor3473f882001-02-23 17:55:21 +0000674 * Input functions
675 */
676int xmlParserInputRead (xmlParserInputPtr in,
677 int len);
678int xmlParserInputGrow (xmlParserInputPtr in,
679 int len);
680
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000681/*
Owen Taylor3473f882001-02-23 17:55:21 +0000682 * xmlChar handling
683 */
684xmlChar * xmlStrdup (const xmlChar *cur);
685xmlChar * xmlStrndup (const xmlChar *cur,
686 int len);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000687xmlChar * xmlCharStrndup (const char *cur,
688 int len);
689xmlChar * xmlCharStrdup (const char *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000690xmlChar * xmlStrsub (const xmlChar *str,
691 int start,
692 int len);
693const xmlChar * xmlStrchr (const xmlChar *str,
694 xmlChar val);
695const xmlChar * xmlStrstr (const xmlChar *str,
Daniel Veillard77044732001-06-29 21:31:07 +0000696 const xmlChar *val);
Owen Taylor3473f882001-02-23 17:55:21 +0000697const xmlChar * xmlStrcasestr (const xmlChar *str,
698 xmlChar *val);
699int xmlStrcmp (const xmlChar *str1,
700 const xmlChar *str2);
701int xmlStrncmp (const xmlChar *str1,
702 const xmlChar *str2,
703 int len);
704int xmlStrcasecmp (const xmlChar *str1,
705 const xmlChar *str2);
706int xmlStrncasecmp (const xmlChar *str1,
707 const xmlChar *str2,
708 int len);
709int xmlStrEqual (const xmlChar *str1,
710 const xmlChar *str2);
711int xmlStrlen (const xmlChar *str);
712xmlChar * xmlStrcat (xmlChar *cur,
713 const xmlChar *add);
714xmlChar * xmlStrncat (xmlChar *cur,
715 const xmlChar *add,
716 int len);
717
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000718/*
Owen Taylor3473f882001-02-23 17:55:21 +0000719 * Basic parsing Interfaces
720 */
721xmlDocPtr xmlParseDoc (xmlChar *cur);
Daniel Veillard50822cb2001-07-26 20:05:51 +0000722xmlDocPtr xmlParseMemory (const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000723 int size);
724xmlDocPtr xmlParseFile (const char *filename);
725int xmlSubstituteEntitiesDefault(int val);
726int xmlKeepBlanksDefault (int val);
727void xmlStopParser (xmlParserCtxtPtr ctxt);
728int xmlPedanticParserDefault(int val);
Daniel Veillardd9bad132001-07-23 19:39:43 +0000729int xmlLineNumbersDefault (int val);
Owen Taylor3473f882001-02-23 17:55:21 +0000730
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000731/*
Owen Taylor3473f882001-02-23 17:55:21 +0000732 * Recovery mode
733 */
734xmlDocPtr xmlRecoverDoc (xmlChar *cur);
Daniel Veillard50822cb2001-07-26 20:05:51 +0000735xmlDocPtr xmlRecoverMemory (const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000736 int size);
737xmlDocPtr xmlRecoverFile (const char *filename);
738
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000739/*
Owen Taylor3473f882001-02-23 17:55:21 +0000740 * Less common routines and SAX interfaces
741 */
742int xmlParseDocument (xmlParserCtxtPtr ctxt);
743int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
744xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
745 xmlChar *cur,
746 int recovery);
747int xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
748 void *user_data,
749 const char *filename);
750int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
751 void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +0000752 const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000753 int size);
754xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
Daniel Veillard50822cb2001-07-26 20:05:51 +0000755 const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000756 int size,
757 int recovery);
Daniel Veillard8606bbb2002-11-12 12:36:52 +0000758xmlDocPtr xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
759 const char *buffer,
760 int size,
761 int recovery,
762 void *data);
Owen Taylor3473f882001-02-23 17:55:21 +0000763xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
764 const char *filename,
765 int recovery);
Daniel Veillarda293c322001-10-02 13:54:14 +0000766xmlDocPtr xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
767 const char *filename,
768 int recovery,
769 void *data);
Owen Taylor3473f882001-02-23 17:55:21 +0000770xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax,
771 const char *filename);
772xmlDocPtr xmlParseEntity (const char *filename);
773xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
774 const xmlChar *SystemID);
775xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
776 const xmlChar *ExternalID,
777 const xmlChar *SystemID);
778xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax,
779 xmlParserInputBufferPtr input,
780 xmlCharEncoding enc);
781int xmlParseBalancedChunkMemory(xmlDocPtr doc,
782 xmlSAXHandlerPtr sax,
783 void *user_data,
784 int depth,
785 const xmlChar *string,
Daniel Veillardcda96922001-08-21 10:56:31 +0000786 xmlNodePtr *lst);
Daniel Veillard58e44c92002-08-02 22:19:49 +0000787int xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
788 xmlSAXHandlerPtr sax,
789 void *user_data,
790 int depth,
791 const xmlChar *string,
792 xmlNodePtr *lst,
793 int recover);
Owen Taylor3473f882001-02-23 17:55:21 +0000794int xmlParseExternalEntity (xmlDocPtr doc,
795 xmlSAXHandlerPtr sax,
796 void *user_data,
797 int depth,
798 const xmlChar *URL,
799 const xmlChar *ID,
Daniel Veillardcda96922001-08-21 10:56:31 +0000800 xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000801int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
802 const xmlChar *URL,
803 const xmlChar *ID,
Daniel Veillardcda96922001-08-21 10:56:31 +0000804 xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000805
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000806/*
Owen Taylor3473f882001-02-23 17:55:21 +0000807 * Parser contexts handling.
808 */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +0000809int xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000810void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
811void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
812void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
813 const xmlChar* buffer,
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000814 const char *filename);
Owen Taylor3473f882001-02-23 17:55:21 +0000815xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
816
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000817/*
Owen Taylor3473f882001-02-23 17:55:21 +0000818 * Reading/setting optional parsing features.
819 */
820
821int xmlGetFeaturesList (int *len,
822 const char **result);
823int xmlGetFeature (xmlParserCtxtPtr ctxt,
824 const char *name,
825 void *result);
826int xmlSetFeature (xmlParserCtxtPtr ctxt,
827 const char *name,
828 void *value);
829
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000830/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000831 * Interfaces for the Push mode.
Owen Taylor3473f882001-02-23 17:55:21 +0000832 */
833xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
834 void *user_data,
835 const char *chunk,
836 int size,
837 const char *filename);
838int xmlParseChunk (xmlParserCtxtPtr ctxt,
839 const char *chunk,
840 int size,
841 int terminate);
842
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000843/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000844 * Special I/O mode.
Owen Taylor3473f882001-02-23 17:55:21 +0000845 */
846
847xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
848 void *user_data,
849 xmlInputReadCallback ioread,
850 xmlInputCloseCallback ioclose,
851 void *ioctx,
852 xmlCharEncoding enc);
853
854xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
855 xmlParserInputBufferPtr input,
856 xmlCharEncoding enc);
857
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000858/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000859 * Node infos.
Owen Taylor3473f882001-02-23 17:55:21 +0000860 */
861const xmlParserNodeInfo*
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000862 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
863 const xmlNodePtr node);
Owen Taylor3473f882001-02-23 17:55:21 +0000864void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
865void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000866unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
867 const xmlNodePtr node);
Owen Taylor3473f882001-02-23 17:55:21 +0000868void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000869 const xmlParserNodeInfoPtr info);
Owen Taylor3473f882001-02-23 17:55:21 +0000870
871/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000872 * External entities handling actually implemented in xmlIO.
Owen Taylor3473f882001-02-23 17:55:21 +0000873 */
874
875void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
876xmlExternalEntityLoader
877 xmlGetExternalEntityLoader(void);
878xmlParserInputPtr
879 xmlLoadExternalEntity (const char *URL,
880 const char *ID,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000881 xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000882
883#ifdef __cplusplus
884}
885#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000886#endif /* __XML_PARSER_H__ */
887