blob: d623e73f1e3ca0b72f12e802e7929430a74572cf [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.h : Interfaces, constants and types related to the XML parser.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9#ifndef __XML_PARSER_H__
10#define __XML_PARSER_H__
11
12#include <libxml/tree.h>
13#include <libxml/valid.h>
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <libxml/entities.h>
Owen Taylor3473f882001-02-23 17:55:21 +000015
16#ifdef __cplusplus
17extern "C" {
18#endif
19
Daniel Veillard5e2dace2001-07-18 19:30:27 +000020/**
21 * XML_DEFAULT_VERSION:
22 *
23 * The default version of XML used: 1.0
Owen Taylor3473f882001-02-23 17:55:21 +000024 */
25#define XML_DEFAULT_VERSION "1.0"
26
27/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000028 * xmlParserInput:
29 *
Daniel Veillard61f26172002-03-12 18:46:39 +000030 * An xmlParserInput is an input flow for the XML processor.
Owen Taylor3473f882001-02-23 17:55:21 +000031 * Each entity parsed is associated an xmlParserInput (except the
32 * few predefined ones). This is the case both for internal entities
33 * - in which case the flow is already completely in memory - or
34 * external entities - in which case we use the buf structure for
35 * progressive reading and I18N conversions to the internal UTF-8 format.
36 */
37
Daniel Veillard9d06d302002-01-22 18:15:52 +000038/**
39 * xmlParserInputDeallocate:
40 * @str: the string to deallocate
41 *
Daniel Veillard61f26172002-03-12 18:46:39 +000042 * Callback for freeing some parser input allocations.
Daniel Veillard9d06d302002-01-22 18:15:52 +000043 */
44typedef void (* xmlParserInputDeallocate)(xmlChar *str);
Daniel Veillard5e2dace2001-07-18 19:30:27 +000045
Owen Taylor3473f882001-02-23 17:55:21 +000046struct _xmlParserInput {
47 /* Input buffer */
48 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
49
50 const char *filename; /* The file analyzed, if any */
Daniel Veillard60087f32001-10-10 09:45:09 +000051 const char *directory; /* the directory/base of the file */
Owen Taylor3473f882001-02-23 17:55:21 +000052 const xmlChar *base; /* Base of the array to parse */
53 const xmlChar *cur; /* Current char being parsed */
Daniel Veillardcbaf3992001-12-31 16:16:02 +000054 const xmlChar *end; /* end of the array to parse */
Owen Taylor3473f882001-02-23 17:55:21 +000055 int length; /* length if known */
56 int line; /* Current line */
57 int col; /* Current column */
Daniel Veillard3e59fc52003-04-18 12:34:58 +000058 /*
59 * NOTE: consumed is only tested for equality in the parser code,
60 * so even if there is an overflow this should not give troubles
61 * for parsing very large instances.
62 */
63 unsigned long consumed; /* How many xmlChars already consumed */
Owen Taylor3473f882001-02-23 17:55:21 +000064 xmlParserInputDeallocate free; /* function to deallocate the base */
65 const xmlChar *encoding; /* the encoding string for entity */
66 const xmlChar *version; /* the version string for entity */
67 int standalone; /* Was that entity marked standalone */
68};
69
70/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000071 * xmlParserNodeInfo:
72 *
Daniel Veillard61f26172002-03-12 18:46:39 +000073 * The parser can be asked to collect Node informations, i.e. at what
Owen Taylor3473f882001-02-23 17:55:21 +000074 * place in the file they were detected.
75 * NOTE: This is off by default and not very well tested.
76 */
77typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
78typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
79
80struct _xmlParserNodeInfo {
81 const struct _xmlNode* node;
82 /* Position & line # that text that created the node begins & ends on */
83 unsigned long begin_pos;
84 unsigned long begin_line;
85 unsigned long end_pos;
86 unsigned long end_line;
87};
88
89typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
90typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
91struct _xmlParserNodeInfoSeq {
92 unsigned long maximum;
93 unsigned long length;
94 xmlParserNodeInfo* buffer;
95};
96
97/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000098 * xmlParserInputState:
99 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000100 * The parser is now working also as a state based parser.
101 * The recursive one use the state info for entities processing.
Owen Taylor3473f882001-02-23 17:55:21 +0000102 */
103typedef enum {
104 XML_PARSER_EOF = -1, /* nothing is to be parsed */
105 XML_PARSER_START = 0, /* nothing has been parsed */
106 XML_PARSER_MISC, /* Misc* before int subset */
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000107 XML_PARSER_PI, /* Within a processing instruction */
Owen Taylor3473f882001-02-23 17:55:21 +0000108 XML_PARSER_DTD, /* within some DTD content */
109 XML_PARSER_PROLOG, /* Misc* after internal subset */
110 XML_PARSER_COMMENT, /* within a comment */
111 XML_PARSER_START_TAG, /* within a start tag */
112 XML_PARSER_CONTENT, /* within the content */
113 XML_PARSER_CDATA_SECTION, /* within a CDATA section */
114 XML_PARSER_END_TAG, /* within a closing tag */
115 XML_PARSER_ENTITY_DECL, /* within an entity declaration */
116 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
117 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
118 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
119 XML_PARSER_EPILOG, /* the Misc* after the last end tag */
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000120 XML_PARSER_IGNORE, /* within an IGNORED section */
121 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
Owen Taylor3473f882001-02-23 17:55:21 +0000122} xmlParserInputState;
123
124/**
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000125 * XML_DETECT_IDS:
126 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000127 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
128 * Use it to initialize xmlLoadExtDtdDefaultValue.
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000129 */
130#define XML_DETECT_IDS 2
131
132/**
133 * XML_COMPLETE_ATTRS:
134 *
135 * Bit in the loadsubset context field to tell to do complete the
Daniel Veillard61f26172002-03-12 18:46:39 +0000136 * elements attributes lists with the ones defaulted from the DTDs.
137 * Use it to initialize xmlLoadExtDtdDefaultValue.
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000138 */
139#define XML_COMPLETE_ATTRS 4
140
141/**
Daniel Veillardef8dd7b2003-03-23 12:02:56 +0000142 * XML_SKIP_IDS:
143 *
144 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
145 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
146 */
147#define XML_SKIP_IDS 8
148
149/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000150 * xmlParserCtxt:
151 *
Owen Taylor3473f882001-02-23 17:55:21 +0000152 * The parser context.
Daniel Veillard61f26172002-03-12 18:46:39 +0000153 * NOTE This doesn't completely define the parser state, the (current ?)
Owen Taylor3473f882001-02-23 17:55:21 +0000154 * design of the parser uses recursive function calls since this allow
155 * and easy mapping from the production rules of the specification
156 * to the actual code. The drawback is that the actual function call
157 * also reflect the parser state. However most of the parsing routines
158 * takes as the only argument the parser context pointer, so migrating
159 * to a state based parser for progressive parsing shouldn't be too hard.
160 */
Owen Taylor3473f882001-02-23 17:55:21 +0000161struct _xmlParserCtxt {
162 struct _xmlSAXHandler *sax; /* The SAX handler */
163 void *userData; /* For SAX interface only, used by DOM build */
164 xmlDocPtr myDoc; /* the document being built */
165 int wellFormed; /* is the document well formed */
166 int replaceEntities; /* shall we replace entities ? */
167 const xmlChar *version; /* the XML version string */
168 const xmlChar *encoding; /* the declared encoding, if any */
169 int standalone; /* standalone document */
170 int html; /* an HTML(1)/Docbook(2) document */
171
172 /* Input stream stack */
173 xmlParserInputPtr input; /* Current input stream */
174 int inputNr; /* Number of current input streams */
175 int inputMax; /* Max number of input streams */
176 xmlParserInputPtr *inputTab; /* stack of inputs */
177
178 /* Node analysis stack only used for DOM building */
179 xmlNodePtr node; /* Current parsed Node */
180 int nodeNr; /* Depth of the parsing stack */
181 int nodeMax; /* Max depth of the parsing stack */
182 xmlNodePtr *nodeTab; /* array of nodes */
183
184 int record_info; /* Whether node info should be kept */
185 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
186
187 int errNo; /* error code */
188
189 int hasExternalSubset; /* reference and external subset */
190 int hasPErefs; /* the internal subset has PE refs */
191 int external; /* are we parsing an external entity */
192
193 int valid; /* is the document valid */
194 int validate; /* shall we try to validate ? */
195 xmlValidCtxt vctxt; /* The validity context */
196
197 xmlParserInputState instate; /* current type of input */
198 int token; /* next char look-ahead */
199
200 char *directory; /* the data directory */
201
202 /* Node name stack */
203 xmlChar *name; /* Current parsed Node */
204 int nameNr; /* Depth of the parsing stack */
205 int nameMax; /* Max depth of the parsing stack */
206 xmlChar * *nameTab; /* array of nodes */
207
208 long nbChars; /* number of xmlChar processed */
209 long checkIndex; /* used by progressive parsing lookup */
210 int keepBlanks; /* ugly but ... */
211 int disableSAX; /* SAX callbacks are disabled */
212 int inSubset; /* Parsing is in int 1/ext 2 subset */
213 xmlChar * intSubName; /* name of subset */
214 xmlChar * extSubURI; /* URI of external subset */
215 xmlChar * extSubSystem; /* SYSTEM ID of external subset */
216
217 /* xml:space values */
218 int * space; /* Should the parser preserve spaces */
219 int spaceNr; /* Depth of the parsing stack */
220 int spaceMax; /* Max depth of the parsing stack */
221 int * spaceTab; /* array of space infos */
222
223 int depth; /* to prevent entity substitution loops */
224 xmlParserInputPtr entity; /* used to check entities boundaries */
225 int charset; /* encoding of the in-memory content
226 actually an xmlCharEncoding */
227 int nodelen; /* Those two fields are there to */
228 int nodemem; /* Speed up large node parsing */
229 int pedantic; /* signal pedantic warnings */
230 void *_private; /* For user data, libxml won't touch it */
231
232 int loadsubset; /* should the external subset be loaded */
Daniel Veillardd9bad132001-07-23 19:39:43 +0000233 int linenumbers; /* set line number in element content */
Daniel Veillard5d90b6c2001-08-22 14:29:45 +0000234 void *catalogs; /* document's own catalog */
Daniel Veillarddad3f682002-11-17 16:47:27 +0000235 int recovery; /* run in recovery mode */
Owen Taylor3473f882001-02-23 17:55:21 +0000236};
237
238/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000239 * xmlSAXLocator:
240 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000241 * A SAX Locator.
Owen Taylor3473f882001-02-23 17:55:21 +0000242 */
Owen Taylor3473f882001-02-23 17:55:21 +0000243struct _xmlSAXLocator {
244 const xmlChar *(*getPublicId)(void *ctx);
245 const xmlChar *(*getSystemId)(void *ctx);
246 int (*getLineNumber)(void *ctx);
247 int (*getColumnNumber)(void *ctx);
248};
249
250/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000251 * xmlSAXHandler:
252 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000253 * A SAX handler is bunch of callbacks called by the parser when processing
Owen Taylor3473f882001-02-23 17:55:21 +0000254 * of the input generate data or structure informations.
255 */
256
Daniel Veillard9d06d302002-01-22 18:15:52 +0000257/**
258 * resolveEntitySAXFunc:
259 * @ctx: the user data (XML parser context)
260 * @publicId: The public ID of the entity
261 * @systemId: The system ID of the entity
262 *
263 * Callback:
264 * The entity loader, to control the loading of external entities,
265 * the application can either:
266 * - override this resolveEntity() callback in the SAX block
267 * - or better use the xmlSetExternalEntityLoader() function to
268 * set up it's own entity resolution routine
269 *
270 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
271 */
Owen Taylor3473f882001-02-23 17:55:21 +0000272typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000273 const xmlChar *publicId,
274 const xmlChar *systemId);
275/**
276 * internalSubsetSAXFunc:
277 * @ctx: the user data (XML parser context)
278 * @name: the root element name
279 * @ExternalID: the external ID
280 * @SystemID: the SYSTEM ID (e.g. filename or URL)
281 *
282 * Callback on internal subset declaration.
283 */
284typedef void (*internalSubsetSAXFunc) (void *ctx,
285 const xmlChar *name,
286 const xmlChar *ExternalID,
287 const xmlChar *SystemID);
288/**
289 * externalSubsetSAXFunc:
290 * @ctx: the user data (XML parser context)
291 * @name: the root element name
292 * @ExternalID: the external ID
293 * @SystemID: the SYSTEM ID (e.g. filename or URL)
294 *
295 * Callback on external subset declaration.
296 */
297typedef void (*externalSubsetSAXFunc) (void *ctx,
298 const xmlChar *name,
299 const xmlChar *ExternalID,
300 const xmlChar *SystemID);
301/**
302 * getEntitySAXFunc:
303 * @ctx: the user data (XML parser context)
304 * @name: The entity name
305 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000306 * Get an entity by name.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000307 *
308 * Returns the xmlEntityPtr if found.
309 */
Owen Taylor3473f882001-02-23 17:55:21 +0000310typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000311 const xmlChar *name);
312/**
313 * getParameterEntitySAXFunc:
314 * @ctx: the user data (XML parser context)
315 * @name: The entity name
316 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000317 * Get a parameter entity by name.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000318 *
319 * Returns the xmlEntityPtr if found.
320 */
Owen Taylor3473f882001-02-23 17:55:21 +0000321typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000322 const xmlChar *name);
323/**
324 * entityDeclSAXFunc:
325 * @ctx: the user data (XML parser context)
326 * @name: the entity name
327 * @type: the entity type
328 * @publicId: The public ID of the entity
329 * @systemId: The system ID of the entity
330 * @content: the entity value (without processing).
331 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000332 * An entity definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000333 */
Owen Taylor3473f882001-02-23 17:55:21 +0000334typedef void (*entityDeclSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000335 const xmlChar *name,
336 int type,
337 const xmlChar *publicId,
338 const xmlChar *systemId,
339 xmlChar *content);
340/**
341 * notationDeclSAXFunc:
342 * @ctx: the user data (XML parser context)
343 * @name: The name of the notation
344 * @publicId: The public ID of the entity
345 * @systemId: The system ID of the entity
346 *
347 * What to do when a notation declaration has been parsed.
348 */
349typedef void (*notationDeclSAXFunc)(void *ctx,
350 const xmlChar *name,
351 const xmlChar *publicId,
352 const xmlChar *systemId);
353/**
354 * attributeDeclSAXFunc:
355 * @ctx: the user data (XML parser context)
356 * @elem: the name of the element
357 * @fullname: the attribute name
358 * @type: the attribute type
359 * @def: the type of default value
360 * @defaultValue: the attribute default value
361 * @tree: the tree of enumerated value set
362 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000363 * An attribute definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000364 */
365typedef void (*attributeDeclSAXFunc)(void *ctx,
366 const xmlChar *elem,
367 const xmlChar *fullname,
368 int type,
369 int def,
370 const xmlChar *defaultValue,
371 xmlEnumerationPtr tree);
372/**
373 * elementDeclSAXFunc:
374 * @ctx: the user data (XML parser context)
375 * @name: the element name
376 * @type: the element type
377 * @content: the element value tree
378 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000379 * An element definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000380 */
381typedef void (*elementDeclSAXFunc)(void *ctx,
382 const xmlChar *name,
383 int type,
384 xmlElementContentPtr content);
385/**
386 * unparsedEntityDeclSAXFunc:
387 * @ctx: the user data (XML parser context)
388 * @name: The name of the entity
389 * @publicId: The public ID of the entity
390 * @systemId: The system ID of the entity
391 * @notationName: the name of the notation
392 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000393 * What to do when an unparsed entity declaration is parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000394 */
Owen Taylor3473f882001-02-23 17:55:21 +0000395typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000396 const xmlChar *name,
397 const xmlChar *publicId,
398 const xmlChar *systemId,
399 const xmlChar *notationName);
400/**
401 * setDocumentLocatorSAXFunc:
402 * @ctx: the user data (XML parser context)
403 * @loc: A SAX Locator
404 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000405 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000406 * Everything is available on the context, so this is useless in our case.
407 */
Owen Taylor3473f882001-02-23 17:55:21 +0000408typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000409 xmlSAXLocatorPtr loc);
410/**
411 * startDocumentSAXFunc:
412 * @ctx: the user data (XML parser context)
413 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000414 * Called when the document start being processed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000415 */
Owen Taylor3473f882001-02-23 17:55:21 +0000416typedef void (*startDocumentSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000417/**
418 * endDocumentSAXFunc:
419 * @ctx: the user data (XML parser context)
420 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000421 * Called when the document end has been detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000422 */
Owen Taylor3473f882001-02-23 17:55:21 +0000423typedef void (*endDocumentSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000424/**
425 * startElementSAXFunc:
426 * @ctx: the user data (XML parser context)
427 * @name: The element name, including namespace prefix
428 * @atts: An array of name/value attributes pairs, NULL terminated
429 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000430 * Called when an opening tag has been processed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000431 */
432typedef void (*startElementSAXFunc) (void *ctx,
433 const xmlChar *name,
434 const xmlChar **atts);
435/**
436 * endElementSAXFunc:
437 * @ctx: the user data (XML parser context)
438 * @name: The element name
439 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000440 * Called when the end of an element has been detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000441 */
442typedef void (*endElementSAXFunc) (void *ctx,
443 const xmlChar *name);
444/**
445 * attributeSAXFunc:
446 * @ctx: the user data (XML parser context)
447 * @name: The attribute name, including namespace prefix
448 * @value: The attribute value
449 *
450 * Handle an attribute that has been read by the parser.
451 * The default handling is to convert the attribute into an
452 * DOM subtree and past it in a new xmlAttr element added to
453 * the element.
454 */
455typedef void (*attributeSAXFunc) (void *ctx,
456 const xmlChar *name,
457 const xmlChar *value);
458/**
459 * referenceSAXFunc:
460 * @ctx: the user data (XML parser context)
461 * @name: The entity name
462 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000463 * Called when an entity reference is detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000464 */
465typedef void (*referenceSAXFunc) (void *ctx,
466 const xmlChar *name);
467/**
468 * charactersSAXFunc:
469 * @ctx: the user data (XML parser context)
470 * @ch: a xmlChar string
471 * @len: the number of xmlChar
472 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000473 * Receiving some chars from the parser.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000474 */
475typedef void (*charactersSAXFunc) (void *ctx,
476 const xmlChar *ch,
477 int len);
478/**
479 * ignorableWhitespaceSAXFunc:
480 * @ctx: the user data (XML parser context)
481 * @ch: a xmlChar string
482 * @len: the number of xmlChar
483 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000484 * Receiving some ignorable whitespaces from the parser.
485 * UNUSED: by default the DOM building will use characters.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000486 */
Owen Taylor3473f882001-02-23 17:55:21 +0000487typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000488 const xmlChar *ch,
489 int len);
490/**
491 * processingInstructionSAXFunc:
492 * @ctx: the user data (XML parser context)
493 * @target: the target name
494 * @data: the PI data's
495 *
496 * A processing instruction has been parsed.
497 */
Owen Taylor3473f882001-02-23 17:55:21 +0000498typedef void (*processingInstructionSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000499 const xmlChar *target,
500 const xmlChar *data);
501/**
502 * commentSAXFunc:
503 * @ctx: the user data (XML parser context)
504 * @value: the comment content
505 *
506 * A comment has been parsed.
507 */
508typedef void (*commentSAXFunc) (void *ctx,
509 const xmlChar *value);
510/**
511 * cdataBlockSAXFunc:
512 * @ctx: the user data (XML parser context)
513 * @value: The pcdata content
514 * @len: the block length
515 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000516 * Called when a pcdata block has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000517 */
518typedef void (*cdataBlockSAXFunc) (
519 void *ctx,
520 const xmlChar *value,
521 int len);
522/**
523 * warningSAXFunc:
524 * @ctx: an XML parser context
525 * @msg: the message to display/transmit
526 * @...: extra parameters for the message display
527 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000528 * Display and format a warning messages, callback.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000529 */
530typedef void (*warningSAXFunc) (void *ctx,
531 const char *msg, ...);
532/**
533 * errorSAXFunc:
534 * @ctx: an XML parser context
535 * @msg: the message to display/transmit
536 * @...: extra parameters for the message display
537 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000538 * Display and format an error messages, callback.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000539 */
540typedef void (*errorSAXFunc) (void *ctx,
541 const char *msg, ...);
542/**
543 * fatalErrorSAXFunc:
544 * @ctx: an XML parser context
545 * @msg: the message to display/transmit
546 * @...: extra parameters for the message display
547 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000548 * Display and format fatal error messages, callback.
Daniel Veillard0821b152002-11-12 20:57:47 +0000549 * Note: so far fatalError() SAX callbacks are not used, error()
550 * get all the callbacks for errors.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000551 */
552typedef void (*fatalErrorSAXFunc) (void *ctx,
553 const char *msg, ...);
554/**
555 * isStandaloneSAXFunc:
556 * @ctx: the user data (XML parser context)
557 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000558 * Is this document tagged standalone?
Daniel Veillard9d06d302002-01-22 18:15:52 +0000559 *
560 * Returns 1 if true
561 */
Owen Taylor3473f882001-02-23 17:55:21 +0000562typedef int (*isStandaloneSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000563/**
564 * hasInternalSubsetSAXFunc:
565 * @ctx: the user data (XML parser context)
566 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000567 * Does this document has an internal subset.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000568 *
569 * Returns 1 if true
570 */
Owen Taylor3473f882001-02-23 17:55:21 +0000571typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000572/**
573 * hasExternalSubsetSAXFunc:
574 * @ctx: the user data (XML parser context)
575 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000576 * Does this document has an external subset?
Daniel Veillard9d06d302002-01-22 18:15:52 +0000577 *
578 * Returns 1 if true
579 */
Owen Taylor3473f882001-02-23 17:55:21 +0000580typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
581
Owen Taylor3473f882001-02-23 17:55:21 +0000582struct _xmlSAXHandler {
583 internalSubsetSAXFunc internalSubset;
584 isStandaloneSAXFunc isStandalone;
585 hasInternalSubsetSAXFunc hasInternalSubset;
586 hasExternalSubsetSAXFunc hasExternalSubset;
587 resolveEntitySAXFunc resolveEntity;
588 getEntitySAXFunc getEntity;
589 entityDeclSAXFunc entityDecl;
590 notationDeclSAXFunc notationDecl;
591 attributeDeclSAXFunc attributeDecl;
592 elementDeclSAXFunc elementDecl;
593 unparsedEntityDeclSAXFunc unparsedEntityDecl;
594 setDocumentLocatorSAXFunc setDocumentLocator;
595 startDocumentSAXFunc startDocument;
596 endDocumentSAXFunc endDocument;
597 startElementSAXFunc startElement;
598 endElementSAXFunc endElement;
599 referenceSAXFunc reference;
600 charactersSAXFunc characters;
601 ignorableWhitespaceSAXFunc ignorableWhitespace;
602 processingInstructionSAXFunc processingInstruction;
603 commentSAXFunc comment;
604 warningSAXFunc warning;
605 errorSAXFunc error;
Daniel Veillard0821b152002-11-12 20:57:47 +0000606 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
Owen Taylor3473f882001-02-23 17:55:21 +0000607 getParameterEntitySAXFunc getParameterEntity;
608 cdataBlockSAXFunc cdataBlock;
609 externalSubsetSAXFunc externalSubset;
Daniel Veillardd0463562001-10-13 09:15:48 +0000610 int initialized;
Owen Taylor3473f882001-02-23 17:55:21 +0000611};
612
613/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000614 * xmlExternalEntityLoader:
615 * @URL: The System ID of the resource requested
616 * @ID: The Public ID of the resource requested
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000617 * @context: the XML parser context
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000618 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000619 * External entity loaders types.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000620 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000621 * Returns the entity input parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000622 */
Daniel Veillard9d06d302002-01-22 18:15:52 +0000623typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
624 const char *ID,
625 xmlParserCtxtPtr context);
Owen Taylor3473f882001-02-23 17:55:21 +0000626
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000627/*
Owen Taylor3473f882001-02-23 17:55:21 +0000628 * Global variables: just the default SAX interface tables and XML
629 * version infos.
630 */
Daniel Veillard0ba59232002-02-10 13:20:39 +0000631#if 0
Owen Taylor3473f882001-02-23 17:55:21 +0000632LIBXML_DLL_IMPORT extern const char *xmlParserVersion;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000633#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000634
Daniel Veillard0ba59232002-02-10 13:20:39 +0000635/*
Owen Taylor3473f882001-02-23 17:55:21 +0000636LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator;
637LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler;
638LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler;
Daniel Veillardeae522a2001-04-23 13:41:34 +0000639LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000640 */
Owen Taylor3473f882001-02-23 17:55:21 +0000641
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000642/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000643 * Entity substitution default behavior.
Owen Taylor3473f882001-02-23 17:55:21 +0000644 */
645
Daniel Veillard0ba59232002-02-10 13:20:39 +0000646#if 0
647LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +0000648LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000649#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000650
Daniel Veillard6c4ffaf2002-02-11 08:54:05 +0000651#ifdef __cplusplus
652}
653#endif
654#include <libxml/encoding.h>
655#include <libxml/xmlIO.h>
656#include <libxml/globals.h>
657#ifdef __cplusplus
658extern "C" {
659#endif
660
Owen Taylor3473f882001-02-23 17:55:21 +0000661
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000662/*
Owen Taylor3473f882001-02-23 17:55:21 +0000663 * Init/Cleanup
664 */
665void xmlInitParser (void);
666void xmlCleanupParser (void);
667
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000668/*
Owen Taylor3473f882001-02-23 17:55:21 +0000669 * Input functions
670 */
671int xmlParserInputRead (xmlParserInputPtr in,
672 int len);
673int xmlParserInputGrow (xmlParserInputPtr in,
674 int len);
675
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000676/*
Owen Taylor3473f882001-02-23 17:55:21 +0000677 * xmlChar handling
678 */
679xmlChar * xmlStrdup (const xmlChar *cur);
680xmlChar * xmlStrndup (const xmlChar *cur,
681 int len);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000682xmlChar * xmlCharStrndup (const char *cur,
683 int len);
684xmlChar * xmlCharStrdup (const char *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000685xmlChar * xmlStrsub (const xmlChar *str,
686 int start,
687 int len);
688const xmlChar * xmlStrchr (const xmlChar *str,
689 xmlChar val);
690const xmlChar * xmlStrstr (const xmlChar *str,
Daniel Veillard77044732001-06-29 21:31:07 +0000691 const xmlChar *val);
Owen Taylor3473f882001-02-23 17:55:21 +0000692const xmlChar * xmlStrcasestr (const xmlChar *str,
693 xmlChar *val);
694int xmlStrcmp (const xmlChar *str1,
695 const xmlChar *str2);
696int xmlStrncmp (const xmlChar *str1,
697 const xmlChar *str2,
698 int len);
699int xmlStrcasecmp (const xmlChar *str1,
700 const xmlChar *str2);
701int xmlStrncasecmp (const xmlChar *str1,
702 const xmlChar *str2,
703 int len);
704int xmlStrEqual (const xmlChar *str1,
705 const xmlChar *str2);
706int xmlStrlen (const xmlChar *str);
707xmlChar * xmlStrcat (xmlChar *cur,
708 const xmlChar *add);
709xmlChar * xmlStrncat (xmlChar *cur,
710 const xmlChar *add,
711 int len);
712
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000713/*
Owen Taylor3473f882001-02-23 17:55:21 +0000714 * Basic parsing Interfaces
715 */
716xmlDocPtr xmlParseDoc (xmlChar *cur);
Daniel Veillard50822cb2001-07-26 20:05:51 +0000717xmlDocPtr xmlParseMemory (const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000718 int size);
719xmlDocPtr xmlParseFile (const char *filename);
720int xmlSubstituteEntitiesDefault(int val);
721int xmlKeepBlanksDefault (int val);
722void xmlStopParser (xmlParserCtxtPtr ctxt);
723int xmlPedanticParserDefault(int val);
Daniel Veillardd9bad132001-07-23 19:39:43 +0000724int xmlLineNumbersDefault (int val);
Owen Taylor3473f882001-02-23 17:55:21 +0000725
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000726/*
Owen Taylor3473f882001-02-23 17:55:21 +0000727 * Recovery mode
728 */
729xmlDocPtr xmlRecoverDoc (xmlChar *cur);
Daniel Veillard50822cb2001-07-26 20:05:51 +0000730xmlDocPtr xmlRecoverMemory (const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000731 int size);
732xmlDocPtr xmlRecoverFile (const char *filename);
733
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000734/*
Owen Taylor3473f882001-02-23 17:55:21 +0000735 * Less common routines and SAX interfaces
736 */
737int xmlParseDocument (xmlParserCtxtPtr ctxt);
738int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
739xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
740 xmlChar *cur,
741 int recovery);
742int xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
743 void *user_data,
744 const char *filename);
745int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
746 void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +0000747 const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000748 int size);
749xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
Daniel Veillard50822cb2001-07-26 20:05:51 +0000750 const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000751 int size,
752 int recovery);
Daniel Veillard8606bbb2002-11-12 12:36:52 +0000753xmlDocPtr xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
754 const char *buffer,
755 int size,
756 int recovery,
757 void *data);
Owen Taylor3473f882001-02-23 17:55:21 +0000758xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
759 const char *filename,
760 int recovery);
Daniel Veillarda293c322001-10-02 13:54:14 +0000761xmlDocPtr xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
762 const char *filename,
763 int recovery,
764 void *data);
Owen Taylor3473f882001-02-23 17:55:21 +0000765xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax,
766 const char *filename);
767xmlDocPtr xmlParseEntity (const char *filename);
768xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
769 const xmlChar *SystemID);
770xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
771 const xmlChar *ExternalID,
772 const xmlChar *SystemID);
773xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax,
774 xmlParserInputBufferPtr input,
775 xmlCharEncoding enc);
776int xmlParseBalancedChunkMemory(xmlDocPtr doc,
777 xmlSAXHandlerPtr sax,
778 void *user_data,
779 int depth,
780 const xmlChar *string,
Daniel Veillardcda96922001-08-21 10:56:31 +0000781 xmlNodePtr *lst);
Daniel Veillard58e44c92002-08-02 22:19:49 +0000782int xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
783 xmlSAXHandlerPtr sax,
784 void *user_data,
785 int depth,
786 const xmlChar *string,
787 xmlNodePtr *lst,
788 int recover);
Owen Taylor3473f882001-02-23 17:55:21 +0000789int xmlParseExternalEntity (xmlDocPtr doc,
790 xmlSAXHandlerPtr sax,
791 void *user_data,
792 int depth,
793 const xmlChar *URL,
794 const xmlChar *ID,
Daniel Veillardcda96922001-08-21 10:56:31 +0000795 xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000796int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
797 const xmlChar *URL,
798 const xmlChar *ID,
Daniel Veillardcda96922001-08-21 10:56:31 +0000799 xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000800
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000801/*
Owen Taylor3473f882001-02-23 17:55:21 +0000802 * Parser contexts handling.
803 */
804void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
805void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
806void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
807void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
808 const xmlChar* buffer,
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000809 const char *filename);
Owen Taylor3473f882001-02-23 17:55:21 +0000810xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
811
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000812/*
Owen Taylor3473f882001-02-23 17:55:21 +0000813 * Reading/setting optional parsing features.
814 */
815
816int xmlGetFeaturesList (int *len,
817 const char **result);
818int xmlGetFeature (xmlParserCtxtPtr ctxt,
819 const char *name,
820 void *result);
821int xmlSetFeature (xmlParserCtxtPtr ctxt,
822 const char *name,
823 void *value);
824
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000825/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000826 * Interfaces for the Push mode.
Owen Taylor3473f882001-02-23 17:55:21 +0000827 */
828xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
829 void *user_data,
830 const char *chunk,
831 int size,
832 const char *filename);
833int xmlParseChunk (xmlParserCtxtPtr ctxt,
834 const char *chunk,
835 int size,
836 int terminate);
837
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000838/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000839 * Special I/O mode.
Owen Taylor3473f882001-02-23 17:55:21 +0000840 */
841
842xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
843 void *user_data,
844 xmlInputReadCallback ioread,
845 xmlInputCloseCallback ioclose,
846 void *ioctx,
847 xmlCharEncoding enc);
848
849xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
850 xmlParserInputBufferPtr input,
851 xmlCharEncoding enc);
852
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000853/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000854 * Node infos.
Owen Taylor3473f882001-02-23 17:55:21 +0000855 */
856const xmlParserNodeInfo*
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000857 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
858 const xmlNodePtr node);
Owen Taylor3473f882001-02-23 17:55:21 +0000859void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
860void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000861unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
862 const xmlNodePtr node);
Owen Taylor3473f882001-02-23 17:55:21 +0000863void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000864 const xmlParserNodeInfoPtr info);
Owen Taylor3473f882001-02-23 17:55:21 +0000865
866/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000867 * External entities handling actually implemented in xmlIO.
Owen Taylor3473f882001-02-23 17:55:21 +0000868 */
869
870void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
871xmlExternalEntityLoader
872 xmlGetExternalEntityLoader(void);
873xmlParserInputPtr
874 xmlLoadExternalEntity (const char *URL,
875 const char *ID,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000876 xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000877
878#ifdef __cplusplus
879}
880#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000881#endif /* __XML_PARSER_H__ */
882