blob: b08038453c7c9e00a611a785f51629edef566e38 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.h : Interfaces, constants and types related to the XML parser.
3 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillardc5d64342001-06-24 12:13:24 +00006 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +00007 */
8
9#ifndef __XML_PARSER_H__
10#define __XML_PARSER_H__
11
12#include <libxml/tree.h>
Daniel Veillard2fdbd322003-08-18 12:15:38 +000013#include <libxml/dict.h>
Owen Taylor3473f882001-02-23 17:55:21 +000014#include <libxml/valid.h>
Owen Taylor3473f882001-02-23 17:55:21 +000015#include <libxml/entities.h>
Owen Taylor3473f882001-02-23 17:55:21 +000016
17#ifdef __cplusplus
18extern "C" {
19#endif
20
Daniel Veillard5e2dace2001-07-18 19:30:27 +000021/**
22 * XML_DEFAULT_VERSION:
23 *
24 * The default version of XML used: 1.0
Owen Taylor3473f882001-02-23 17:55:21 +000025 */
26#define XML_DEFAULT_VERSION "1.0"
27
28/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000029 * xmlParserInput:
30 *
Daniel Veillard61f26172002-03-12 18:46:39 +000031 * An xmlParserInput is an input flow for the XML processor.
Owen Taylor3473f882001-02-23 17:55:21 +000032 * Each entity parsed is associated an xmlParserInput (except the
33 * few predefined ones). This is the case both for internal entities
34 * - in which case the flow is already completely in memory - or
35 * external entities - in which case we use the buf structure for
36 * progressive reading and I18N conversions to the internal UTF-8 format.
37 */
38
Daniel Veillard9d06d302002-01-22 18:15:52 +000039/**
40 * xmlParserInputDeallocate:
41 * @str: the string to deallocate
42 *
Daniel Veillard61f26172002-03-12 18:46:39 +000043 * Callback for freeing some parser input allocations.
Daniel Veillard9d06d302002-01-22 18:15:52 +000044 */
45typedef void (* xmlParserInputDeallocate)(xmlChar *str);
Daniel Veillard5e2dace2001-07-18 19:30:27 +000046
Owen Taylor3473f882001-02-23 17:55:21 +000047struct _xmlParserInput {
48 /* Input buffer */
49 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
50
51 const char *filename; /* The file analyzed, if any */
Daniel Veillard60087f32001-10-10 09:45:09 +000052 const char *directory; /* the directory/base of the file */
Owen Taylor3473f882001-02-23 17:55:21 +000053 const xmlChar *base; /* Base of the array to parse */
54 const xmlChar *cur; /* Current char being parsed */
Daniel Veillardcbaf3992001-12-31 16:16:02 +000055 const xmlChar *end; /* end of the array to parse */
Owen Taylor3473f882001-02-23 17:55:21 +000056 int length; /* length if known */
57 int line; /* Current line */
58 int col; /* Current column */
Daniel Veillard3e59fc52003-04-18 12:34:58 +000059 /*
60 * NOTE: consumed is only tested for equality in the parser code,
61 * so even if there is an overflow this should not give troubles
62 * for parsing very large instances.
63 */
64 unsigned long consumed; /* How many xmlChars already consumed */
Owen Taylor3473f882001-02-23 17:55:21 +000065 xmlParserInputDeallocate free; /* function to deallocate the base */
66 const xmlChar *encoding; /* the encoding string for entity */
67 const xmlChar *version; /* the version string for entity */
68 int standalone; /* Was that entity marked standalone */
69};
70
71/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000072 * xmlParserNodeInfo:
73 *
Daniel Veillard61f26172002-03-12 18:46:39 +000074 * The parser can be asked to collect Node informations, i.e. at what
Owen Taylor3473f882001-02-23 17:55:21 +000075 * place in the file they were detected.
76 * NOTE: This is off by default and not very well tested.
77 */
78typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
79typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
80
81struct _xmlParserNodeInfo {
82 const struct _xmlNode* node;
83 /* Position & line # that text that created the node begins & ends on */
84 unsigned long begin_pos;
85 unsigned long begin_line;
86 unsigned long end_pos;
87 unsigned long end_line;
88};
89
90typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
91typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
92struct _xmlParserNodeInfoSeq {
93 unsigned long maximum;
94 unsigned long length;
95 xmlParserNodeInfo* buffer;
96};
97
98/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +000099 * xmlParserInputState:
100 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000101 * The parser is now working also as a state based parser.
102 * The recursive one use the state info for entities processing.
Owen Taylor3473f882001-02-23 17:55:21 +0000103 */
104typedef enum {
105 XML_PARSER_EOF = -1, /* nothing is to be parsed */
106 XML_PARSER_START = 0, /* nothing has been parsed */
107 XML_PARSER_MISC, /* Misc* before int subset */
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000108 XML_PARSER_PI, /* Within a processing instruction */
Owen Taylor3473f882001-02-23 17:55:21 +0000109 XML_PARSER_DTD, /* within some DTD content */
110 XML_PARSER_PROLOG, /* Misc* after internal subset */
111 XML_PARSER_COMMENT, /* within a comment */
112 XML_PARSER_START_TAG, /* within a start tag */
113 XML_PARSER_CONTENT, /* within the content */
114 XML_PARSER_CDATA_SECTION, /* within a CDATA section */
115 XML_PARSER_END_TAG, /* within a closing tag */
116 XML_PARSER_ENTITY_DECL, /* within an entity declaration */
117 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
118 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
119 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
120 XML_PARSER_EPILOG, /* the Misc* after the last end tag */
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000121 XML_PARSER_IGNORE, /* within an IGNORED section */
122 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */
Owen Taylor3473f882001-02-23 17:55:21 +0000123} xmlParserInputState;
124
125/**
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000126 * XML_DETECT_IDS:
127 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000128 * Bit in the loadsubset context field to tell to do ID/REFs lookups.
129 * Use it to initialize xmlLoadExtDtdDefaultValue.
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000130 */
131#define XML_DETECT_IDS 2
132
133/**
134 * XML_COMPLETE_ATTRS:
135 *
136 * Bit in the loadsubset context field to tell to do complete the
Daniel Veillard61f26172002-03-12 18:46:39 +0000137 * elements attributes lists with the ones defaulted from the DTDs.
138 * Use it to initialize xmlLoadExtDtdDefaultValue.
Daniel Veillardd16df9f2001-05-23 13:44:21 +0000139 */
140#define XML_COMPLETE_ATTRS 4
141
142/**
Daniel Veillardef8dd7b2003-03-23 12:02:56 +0000143 * XML_SKIP_IDS:
144 *
145 * Bit in the loadsubset context field to tell to not do ID/REFs registration.
146 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
147 */
148#define XML_SKIP_IDS 8
149
150/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000151 * xmlParserCtxt:
152 *
Owen Taylor3473f882001-02-23 17:55:21 +0000153 * The parser context.
Daniel Veillard61f26172002-03-12 18:46:39 +0000154 * NOTE This doesn't completely define the parser state, the (current ?)
Owen Taylor3473f882001-02-23 17:55:21 +0000155 * design of the parser uses recursive function calls since this allow
156 * and easy mapping from the production rules of the specification
157 * to the actual code. The drawback is that the actual function call
158 * also reflect the parser state. However most of the parsing routines
159 * takes as the only argument the parser context pointer, so migrating
160 * to a state based parser for progressive parsing shouldn't be too hard.
161 */
Owen Taylor3473f882001-02-23 17:55:21 +0000162struct _xmlParserCtxt {
163 struct _xmlSAXHandler *sax; /* The SAX handler */
164 void *userData; /* For SAX interface only, used by DOM build */
165 xmlDocPtr myDoc; /* the document being built */
166 int wellFormed; /* is the document well formed */
167 int replaceEntities; /* shall we replace entities ? */
168 const xmlChar *version; /* the XML version string */
169 const xmlChar *encoding; /* the declared encoding, if any */
170 int standalone; /* standalone document */
171 int html; /* an HTML(1)/Docbook(2) document */
172
173 /* Input stream stack */
174 xmlParserInputPtr input; /* Current input stream */
175 int inputNr; /* Number of current input streams */
176 int inputMax; /* Max number of input streams */
177 xmlParserInputPtr *inputTab; /* stack of inputs */
178
179 /* Node analysis stack only used for DOM building */
180 xmlNodePtr node; /* Current parsed Node */
181 int nodeNr; /* Depth of the parsing stack */
182 int nodeMax; /* Max depth of the parsing stack */
183 xmlNodePtr *nodeTab; /* array of nodes */
184
185 int record_info; /* Whether node info should be kept */
186 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
187
188 int errNo; /* error code */
189
190 int hasExternalSubset; /* reference and external subset */
191 int hasPErefs; /* the internal subset has PE refs */
192 int external; /* are we parsing an external entity */
193
194 int valid; /* is the document valid */
195 int validate; /* shall we try to validate ? */
196 xmlValidCtxt vctxt; /* The validity context */
197
198 xmlParserInputState instate; /* current type of input */
199 int token; /* next char look-ahead */
200
201 char *directory; /* the data directory */
202
203 /* Node name stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000204 const xmlChar *name; /* Current parsed Node */
Owen Taylor3473f882001-02-23 17:55:21 +0000205 int nameNr; /* Depth of the parsing stack */
206 int nameMax; /* Max depth of the parsing stack */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000207 const xmlChar * *nameTab; /* array of nodes */
Owen Taylor3473f882001-02-23 17:55:21 +0000208
209 long nbChars; /* number of xmlChar processed */
210 long checkIndex; /* used by progressive parsing lookup */
211 int keepBlanks; /* ugly but ... */
212 int disableSAX; /* SAX callbacks are disabled */
213 int inSubset; /* Parsing is in int 1/ext 2 subset */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000214 const xmlChar * intSubName; /* name of subset */
Owen Taylor3473f882001-02-23 17:55:21 +0000215 xmlChar * extSubURI; /* URI of external subset */
216 xmlChar * extSubSystem; /* SYSTEM ID of external subset */
217
218 /* xml:space values */
219 int * space; /* Should the parser preserve spaces */
220 int spaceNr; /* Depth of the parsing stack */
221 int spaceMax; /* Max depth of the parsing stack */
222 int * spaceTab; /* array of space infos */
223
224 int depth; /* to prevent entity substitution loops */
225 xmlParserInputPtr entity; /* used to check entities boundaries */
226 int charset; /* encoding of the in-memory content
227 actually an xmlCharEncoding */
228 int nodelen; /* Those two fields are there to */
229 int nodemem; /* Speed up large node parsing */
230 int pedantic; /* signal pedantic warnings */
231 void *_private; /* For user data, libxml won't touch it */
232
233 int loadsubset; /* should the external subset be loaded */
Daniel Veillardd9bad132001-07-23 19:39:43 +0000234 int linenumbers; /* set line number in element content */
Daniel Veillard5d90b6c2001-08-22 14:29:45 +0000235 void *catalogs; /* document's own catalog */
Daniel Veillarddad3f682002-11-17 16:47:27 +0000236 int recovery; /* run in recovery mode */
Daniel Veillarda880b122003-04-21 21:36:41 +0000237 int progressive; /* is this a progressive parsing */
Daniel Veillard2fdbd322003-08-18 12:15:38 +0000238 xmlDictPtr dict; /* dictionnary for the parser */
Owen Taylor3473f882001-02-23 17:55:21 +0000239};
240
241/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000242 * xmlSAXLocator:
243 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000244 * A SAX Locator.
Owen Taylor3473f882001-02-23 17:55:21 +0000245 */
Owen Taylor3473f882001-02-23 17:55:21 +0000246struct _xmlSAXLocator {
247 const xmlChar *(*getPublicId)(void *ctx);
248 const xmlChar *(*getSystemId)(void *ctx);
249 int (*getLineNumber)(void *ctx);
250 int (*getColumnNumber)(void *ctx);
251};
252
253/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000254 * xmlSAXHandler:
255 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000256 * A SAX handler is bunch of callbacks called by the parser when processing
Owen Taylor3473f882001-02-23 17:55:21 +0000257 * of the input generate data or structure informations.
258 */
259
Daniel Veillard9d06d302002-01-22 18:15:52 +0000260/**
261 * resolveEntitySAXFunc:
262 * @ctx: the user data (XML parser context)
263 * @publicId: The public ID of the entity
264 * @systemId: The system ID of the entity
265 *
266 * Callback:
267 * The entity loader, to control the loading of external entities,
268 * the application can either:
269 * - override this resolveEntity() callback in the SAX block
270 * - or better use the xmlSetExternalEntityLoader() function to
271 * set up it's own entity resolution routine
272 *
273 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
274 */
Owen Taylor3473f882001-02-23 17:55:21 +0000275typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000276 const xmlChar *publicId,
277 const xmlChar *systemId);
278/**
279 * internalSubsetSAXFunc:
280 * @ctx: the user data (XML parser context)
281 * @name: the root element name
282 * @ExternalID: the external ID
283 * @SystemID: the SYSTEM ID (e.g. filename or URL)
284 *
285 * Callback on internal subset declaration.
286 */
287typedef void (*internalSubsetSAXFunc) (void *ctx,
288 const xmlChar *name,
289 const xmlChar *ExternalID,
290 const xmlChar *SystemID);
291/**
292 * externalSubsetSAXFunc:
293 * @ctx: the user data (XML parser context)
294 * @name: the root element name
295 * @ExternalID: the external ID
296 * @SystemID: the SYSTEM ID (e.g. filename or URL)
297 *
298 * Callback on external subset declaration.
299 */
300typedef void (*externalSubsetSAXFunc) (void *ctx,
301 const xmlChar *name,
302 const xmlChar *ExternalID,
303 const xmlChar *SystemID);
304/**
305 * getEntitySAXFunc:
306 * @ctx: the user data (XML parser context)
307 * @name: The entity name
308 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000309 * Get an entity by name.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000310 *
311 * Returns the xmlEntityPtr if found.
312 */
Owen Taylor3473f882001-02-23 17:55:21 +0000313typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000314 const xmlChar *name);
315/**
316 * getParameterEntitySAXFunc:
317 * @ctx: the user data (XML parser context)
318 * @name: The entity name
319 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000320 * Get a parameter entity by name.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000321 *
322 * Returns the xmlEntityPtr if found.
323 */
Owen Taylor3473f882001-02-23 17:55:21 +0000324typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000325 const xmlChar *name);
326/**
327 * entityDeclSAXFunc:
328 * @ctx: the user data (XML parser context)
329 * @name: the entity name
330 * @type: the entity type
331 * @publicId: The public ID of the entity
332 * @systemId: The system ID of the entity
333 * @content: the entity value (without processing).
334 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000335 * An entity definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000336 */
Owen Taylor3473f882001-02-23 17:55:21 +0000337typedef void (*entityDeclSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000338 const xmlChar *name,
339 int type,
340 const xmlChar *publicId,
341 const xmlChar *systemId,
342 xmlChar *content);
343/**
344 * notationDeclSAXFunc:
345 * @ctx: the user data (XML parser context)
346 * @name: The name of the notation
347 * @publicId: The public ID of the entity
348 * @systemId: The system ID of the entity
349 *
350 * What to do when a notation declaration has been parsed.
351 */
352typedef void (*notationDeclSAXFunc)(void *ctx,
353 const xmlChar *name,
354 const xmlChar *publicId,
355 const xmlChar *systemId);
356/**
357 * attributeDeclSAXFunc:
358 * @ctx: the user data (XML parser context)
359 * @elem: the name of the element
360 * @fullname: the attribute name
361 * @type: the attribute type
362 * @def: the type of default value
363 * @defaultValue: the attribute default value
364 * @tree: the tree of enumerated value set
365 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000366 * An attribute definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000367 */
368typedef void (*attributeDeclSAXFunc)(void *ctx,
369 const xmlChar *elem,
370 const xmlChar *fullname,
371 int type,
372 int def,
373 const xmlChar *defaultValue,
374 xmlEnumerationPtr tree);
375/**
376 * elementDeclSAXFunc:
377 * @ctx: the user data (XML parser context)
378 * @name: the element name
379 * @type: the element type
380 * @content: the element value tree
381 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000382 * An element definition has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000383 */
384typedef void (*elementDeclSAXFunc)(void *ctx,
385 const xmlChar *name,
386 int type,
387 xmlElementContentPtr content);
388/**
389 * unparsedEntityDeclSAXFunc:
390 * @ctx: the user data (XML parser context)
391 * @name: The name of the entity
392 * @publicId: The public ID of the entity
393 * @systemId: The system ID of the entity
394 * @notationName: the name of the notation
395 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000396 * What to do when an unparsed entity declaration is parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000397 */
Owen Taylor3473f882001-02-23 17:55:21 +0000398typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000399 const xmlChar *name,
400 const xmlChar *publicId,
401 const xmlChar *systemId,
402 const xmlChar *notationName);
403/**
404 * setDocumentLocatorSAXFunc:
405 * @ctx: the user data (XML parser context)
406 * @loc: A SAX Locator
407 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000408 * Receive the document locator at startup, actually xmlDefaultSAXLocator.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000409 * Everything is available on the context, so this is useless in our case.
410 */
Owen Taylor3473f882001-02-23 17:55:21 +0000411typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000412 xmlSAXLocatorPtr loc);
413/**
414 * startDocumentSAXFunc:
415 * @ctx: the user data (XML parser context)
416 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000417 * Called when the document start being processed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000418 */
Owen Taylor3473f882001-02-23 17:55:21 +0000419typedef void (*startDocumentSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000420/**
421 * endDocumentSAXFunc:
422 * @ctx: the user data (XML parser context)
423 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000424 * Called when the document end has been detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000425 */
Owen Taylor3473f882001-02-23 17:55:21 +0000426typedef void (*endDocumentSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000427/**
428 * startElementSAXFunc:
429 * @ctx: the user data (XML parser context)
430 * @name: The element name, including namespace prefix
431 * @atts: An array of name/value attributes pairs, NULL terminated
432 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000433 * Called when an opening tag has been processed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000434 */
435typedef void (*startElementSAXFunc) (void *ctx,
436 const xmlChar *name,
437 const xmlChar **atts);
438/**
439 * endElementSAXFunc:
440 * @ctx: the user data (XML parser context)
441 * @name: The element name
442 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000443 * Called when the end of an element has been detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000444 */
445typedef void (*endElementSAXFunc) (void *ctx,
446 const xmlChar *name);
447/**
448 * attributeSAXFunc:
449 * @ctx: the user data (XML parser context)
450 * @name: The attribute name, including namespace prefix
451 * @value: The attribute value
452 *
453 * Handle an attribute that has been read by the parser.
454 * The default handling is to convert the attribute into an
455 * DOM subtree and past it in a new xmlAttr element added to
456 * the element.
457 */
458typedef void (*attributeSAXFunc) (void *ctx,
459 const xmlChar *name,
460 const xmlChar *value);
461/**
462 * referenceSAXFunc:
463 * @ctx: the user data (XML parser context)
464 * @name: The entity name
465 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000466 * Called when an entity reference is detected.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000467 */
468typedef void (*referenceSAXFunc) (void *ctx,
469 const xmlChar *name);
470/**
471 * charactersSAXFunc:
472 * @ctx: the user data (XML parser context)
473 * @ch: a xmlChar string
474 * @len: the number of xmlChar
475 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000476 * Receiving some chars from the parser.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000477 */
478typedef void (*charactersSAXFunc) (void *ctx,
479 const xmlChar *ch,
480 int len);
481/**
482 * ignorableWhitespaceSAXFunc:
483 * @ctx: the user data (XML parser context)
484 * @ch: a xmlChar string
485 * @len: the number of xmlChar
486 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000487 * Receiving some ignorable whitespaces from the parser.
488 * UNUSED: by default the DOM building will use characters.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000489 */
Owen Taylor3473f882001-02-23 17:55:21 +0000490typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000491 const xmlChar *ch,
492 int len);
493/**
494 * processingInstructionSAXFunc:
495 * @ctx: the user data (XML parser context)
496 * @target: the target name
497 * @data: the PI data's
498 *
499 * A processing instruction has been parsed.
500 */
Owen Taylor3473f882001-02-23 17:55:21 +0000501typedef void (*processingInstructionSAXFunc) (void *ctx,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000502 const xmlChar *target,
503 const xmlChar *data);
504/**
505 * commentSAXFunc:
506 * @ctx: the user data (XML parser context)
507 * @value: the comment content
508 *
509 * A comment has been parsed.
510 */
511typedef void (*commentSAXFunc) (void *ctx,
512 const xmlChar *value);
513/**
514 * cdataBlockSAXFunc:
515 * @ctx: the user data (XML parser context)
516 * @value: The pcdata content
517 * @len: the block length
518 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000519 * Called when a pcdata block has been parsed.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000520 */
521typedef void (*cdataBlockSAXFunc) (
522 void *ctx,
523 const xmlChar *value,
524 int len);
525/**
526 * warningSAXFunc:
527 * @ctx: an XML parser context
528 * @msg: the message to display/transmit
529 * @...: extra parameters for the message display
530 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000531 * Display and format a warning messages, callback.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000532 */
533typedef void (*warningSAXFunc) (void *ctx,
534 const char *msg, ...);
535/**
536 * errorSAXFunc:
537 * @ctx: an XML parser context
538 * @msg: the message to display/transmit
539 * @...: extra parameters for the message display
540 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000541 * Display and format an error messages, callback.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000542 */
543typedef void (*errorSAXFunc) (void *ctx,
544 const char *msg, ...);
545/**
546 * fatalErrorSAXFunc:
547 * @ctx: an XML parser context
548 * @msg: the message to display/transmit
549 * @...: extra parameters for the message display
550 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000551 * Display and format fatal error messages, callback.
Daniel Veillard0821b152002-11-12 20:57:47 +0000552 * Note: so far fatalError() SAX callbacks are not used, error()
553 * get all the callbacks for errors.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000554 */
555typedef void (*fatalErrorSAXFunc) (void *ctx,
556 const char *msg, ...);
557/**
558 * isStandaloneSAXFunc:
559 * @ctx: the user data (XML parser context)
560 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000561 * Is this document tagged standalone?
Daniel Veillard9d06d302002-01-22 18:15:52 +0000562 *
563 * Returns 1 if true
564 */
Owen Taylor3473f882001-02-23 17:55:21 +0000565typedef int (*isStandaloneSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000566/**
567 * hasInternalSubsetSAXFunc:
568 * @ctx: the user data (XML parser context)
569 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000570 * Does this document has an internal subset.
Daniel Veillard9d06d302002-01-22 18:15:52 +0000571 *
572 * Returns 1 if true
573 */
Owen Taylor3473f882001-02-23 17:55:21 +0000574typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
Daniel Veillard9d06d302002-01-22 18:15:52 +0000575/**
576 * hasExternalSubsetSAXFunc:
577 * @ctx: the user data (XML parser context)
578 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000579 * Does this document has an external subset?
Daniel Veillard9d06d302002-01-22 18:15:52 +0000580 *
581 * Returns 1 if true
582 */
Owen Taylor3473f882001-02-23 17:55:21 +0000583typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
584
Owen Taylor3473f882001-02-23 17:55:21 +0000585struct _xmlSAXHandler {
586 internalSubsetSAXFunc internalSubset;
587 isStandaloneSAXFunc isStandalone;
588 hasInternalSubsetSAXFunc hasInternalSubset;
589 hasExternalSubsetSAXFunc hasExternalSubset;
590 resolveEntitySAXFunc resolveEntity;
591 getEntitySAXFunc getEntity;
592 entityDeclSAXFunc entityDecl;
593 notationDeclSAXFunc notationDecl;
594 attributeDeclSAXFunc attributeDecl;
595 elementDeclSAXFunc elementDecl;
596 unparsedEntityDeclSAXFunc unparsedEntityDecl;
597 setDocumentLocatorSAXFunc setDocumentLocator;
598 startDocumentSAXFunc startDocument;
599 endDocumentSAXFunc endDocument;
600 startElementSAXFunc startElement;
601 endElementSAXFunc endElement;
602 referenceSAXFunc reference;
603 charactersSAXFunc characters;
604 ignorableWhitespaceSAXFunc ignorableWhitespace;
605 processingInstructionSAXFunc processingInstruction;
606 commentSAXFunc comment;
607 warningSAXFunc warning;
608 errorSAXFunc error;
Daniel Veillard0821b152002-11-12 20:57:47 +0000609 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
Owen Taylor3473f882001-02-23 17:55:21 +0000610 getParameterEntitySAXFunc getParameterEntity;
611 cdataBlockSAXFunc cdataBlock;
612 externalSubsetSAXFunc externalSubset;
Daniel Veillardd0463562001-10-13 09:15:48 +0000613 int initialized;
Owen Taylor3473f882001-02-23 17:55:21 +0000614};
615
616/**
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000617 * xmlExternalEntityLoader:
618 * @URL: The System ID of the resource requested
619 * @ID: The Public ID of the resource requested
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000620 * @context: the XML parser context
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000621 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000622 * External entity loaders types.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000623 *
Daniel Veillard61f26172002-03-12 18:46:39 +0000624 * Returns the entity input parser.
Owen Taylor3473f882001-02-23 17:55:21 +0000625 */
Daniel Veillard9d06d302002-01-22 18:15:52 +0000626typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
627 const char *ID,
628 xmlParserCtxtPtr context);
Owen Taylor3473f882001-02-23 17:55:21 +0000629
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000630/*
Owen Taylor3473f882001-02-23 17:55:21 +0000631 * Global variables: just the default SAX interface tables and XML
632 * version infos.
633 */
Daniel Veillard0ba59232002-02-10 13:20:39 +0000634#if 0
Owen Taylor3473f882001-02-23 17:55:21 +0000635LIBXML_DLL_IMPORT extern const char *xmlParserVersion;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000636#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000637
Daniel Veillard0ba59232002-02-10 13:20:39 +0000638/*
Owen Taylor3473f882001-02-23 17:55:21 +0000639LIBXML_DLL_IMPORT extern xmlSAXLocator xmlDefaultSAXLocator;
640LIBXML_DLL_IMPORT extern xmlSAXHandler xmlDefaultSAXHandler;
641LIBXML_DLL_IMPORT extern xmlSAXHandler htmlDefaultSAXHandler;
Daniel Veillardeae522a2001-04-23 13:41:34 +0000642LIBXML_DLL_IMPORT extern xmlSAXHandler docbDefaultSAXHandler;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000643 */
Owen Taylor3473f882001-02-23 17:55:21 +0000644
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000645/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000646 * Entity substitution default behavior.
Owen Taylor3473f882001-02-23 17:55:21 +0000647 */
648
Daniel Veillard0ba59232002-02-10 13:20:39 +0000649#if 0
650LIBXML_DLL_IMPORT extern int xmlSubstituteEntitiesDefaultValue;
Owen Taylor3473f882001-02-23 17:55:21 +0000651LIBXML_DLL_IMPORT extern int xmlGetWarningsDefaultValue;
Daniel Veillard0ba59232002-02-10 13:20:39 +0000652#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000653
Daniel Veillard6c4ffaf2002-02-11 08:54:05 +0000654#ifdef __cplusplus
655}
656#endif
657#include <libxml/encoding.h>
658#include <libxml/xmlIO.h>
659#include <libxml/globals.h>
660#ifdef __cplusplus
661extern "C" {
662#endif
663
Owen Taylor3473f882001-02-23 17:55:21 +0000664
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000665/*
Owen Taylor3473f882001-02-23 17:55:21 +0000666 * Init/Cleanup
667 */
668void xmlInitParser (void);
669void xmlCleanupParser (void);
670
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000671/*
Owen Taylor3473f882001-02-23 17:55:21 +0000672 * Input functions
673 */
674int xmlParserInputRead (xmlParserInputPtr in,
675 int len);
676int xmlParserInputGrow (xmlParserInputPtr in,
677 int len);
678
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000679/*
Owen Taylor3473f882001-02-23 17:55:21 +0000680 * xmlChar handling
681 */
682xmlChar * xmlStrdup (const xmlChar *cur);
683xmlChar * xmlStrndup (const xmlChar *cur,
684 int len);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000685xmlChar * xmlCharStrndup (const char *cur,
686 int len);
687xmlChar * xmlCharStrdup (const char *cur);
Owen Taylor3473f882001-02-23 17:55:21 +0000688xmlChar * xmlStrsub (const xmlChar *str,
689 int start,
690 int len);
691const xmlChar * xmlStrchr (const xmlChar *str,
692 xmlChar val);
693const xmlChar * xmlStrstr (const xmlChar *str,
Daniel Veillard77044732001-06-29 21:31:07 +0000694 const xmlChar *val);
Owen Taylor3473f882001-02-23 17:55:21 +0000695const xmlChar * xmlStrcasestr (const xmlChar *str,
696 xmlChar *val);
697int xmlStrcmp (const xmlChar *str1,
698 const xmlChar *str2);
699int xmlStrncmp (const xmlChar *str1,
700 const xmlChar *str2,
701 int len);
702int xmlStrcasecmp (const xmlChar *str1,
703 const xmlChar *str2);
704int xmlStrncasecmp (const xmlChar *str1,
705 const xmlChar *str2,
706 int len);
707int xmlStrEqual (const xmlChar *str1,
708 const xmlChar *str2);
709int xmlStrlen (const xmlChar *str);
710xmlChar * xmlStrcat (xmlChar *cur,
711 const xmlChar *add);
712xmlChar * xmlStrncat (xmlChar *cur,
713 const xmlChar *add,
714 int len);
715
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000716/*
Owen Taylor3473f882001-02-23 17:55:21 +0000717 * Basic parsing Interfaces
718 */
719xmlDocPtr xmlParseDoc (xmlChar *cur);
Daniel Veillard50822cb2001-07-26 20:05:51 +0000720xmlDocPtr xmlParseMemory (const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000721 int size);
722xmlDocPtr xmlParseFile (const char *filename);
723int xmlSubstituteEntitiesDefault(int val);
724int xmlKeepBlanksDefault (int val);
725void xmlStopParser (xmlParserCtxtPtr ctxt);
726int xmlPedanticParserDefault(int val);
Daniel Veillardd9bad132001-07-23 19:39:43 +0000727int xmlLineNumbersDefault (int val);
Owen Taylor3473f882001-02-23 17:55:21 +0000728
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000729/*
Owen Taylor3473f882001-02-23 17:55:21 +0000730 * Recovery mode
731 */
732xmlDocPtr xmlRecoverDoc (xmlChar *cur);
Daniel Veillard50822cb2001-07-26 20:05:51 +0000733xmlDocPtr xmlRecoverMemory (const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000734 int size);
735xmlDocPtr xmlRecoverFile (const char *filename);
736
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000737/*
Owen Taylor3473f882001-02-23 17:55:21 +0000738 * Less common routines and SAX interfaces
739 */
740int xmlParseDocument (xmlParserCtxtPtr ctxt);
741int xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt);
742xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
743 xmlChar *cur,
744 int recovery);
745int xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
746 void *user_data,
747 const char *filename);
748int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
749 void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +0000750 const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000751 int size);
752xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
Daniel Veillard50822cb2001-07-26 20:05:51 +0000753 const char *buffer,
Owen Taylor3473f882001-02-23 17:55:21 +0000754 int size,
755 int recovery);
Daniel Veillard8606bbb2002-11-12 12:36:52 +0000756xmlDocPtr xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
757 const char *buffer,
758 int size,
759 int recovery,
760 void *data);
Owen Taylor3473f882001-02-23 17:55:21 +0000761xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
762 const char *filename,
763 int recovery);
Daniel Veillarda293c322001-10-02 13:54:14 +0000764xmlDocPtr xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
765 const char *filename,
766 int recovery,
767 void *data);
Owen Taylor3473f882001-02-23 17:55:21 +0000768xmlDocPtr xmlSAXParseEntity (xmlSAXHandlerPtr sax,
769 const char *filename);
770xmlDocPtr xmlParseEntity (const char *filename);
771xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
772 const xmlChar *SystemID);
773xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
774 const xmlChar *ExternalID,
775 const xmlChar *SystemID);
776xmlDtdPtr xmlIOParseDTD (xmlSAXHandlerPtr sax,
777 xmlParserInputBufferPtr input,
778 xmlCharEncoding enc);
779int xmlParseBalancedChunkMemory(xmlDocPtr doc,
780 xmlSAXHandlerPtr sax,
781 void *user_data,
782 int depth,
783 const xmlChar *string,
Daniel Veillardcda96922001-08-21 10:56:31 +0000784 xmlNodePtr *lst);
Daniel Veillard58e44c92002-08-02 22:19:49 +0000785int xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
786 xmlSAXHandlerPtr sax,
787 void *user_data,
788 int depth,
789 const xmlChar *string,
790 xmlNodePtr *lst,
791 int recover);
Owen Taylor3473f882001-02-23 17:55:21 +0000792int xmlParseExternalEntity (xmlDocPtr doc,
793 xmlSAXHandlerPtr sax,
794 void *user_data,
795 int depth,
796 const xmlChar *URL,
797 const xmlChar *ID,
Daniel Veillardcda96922001-08-21 10:56:31 +0000798 xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000799int xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
800 const xmlChar *URL,
801 const xmlChar *ID,
Daniel Veillardcda96922001-08-21 10:56:31 +0000802 xmlNodePtr *lst);
Owen Taylor3473f882001-02-23 17:55:21 +0000803
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000804/*
Owen Taylor3473f882001-02-23 17:55:21 +0000805 * Parser contexts handling.
806 */
Daniel Veillarda76fe5c2003-04-24 16:06:47 +0000807int xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000808void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
809void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
810void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
811 const xmlChar* buffer,
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000812 const char *filename);
Owen Taylor3473f882001-02-23 17:55:21 +0000813xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
814
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000815/*
Owen Taylor3473f882001-02-23 17:55:21 +0000816 * Reading/setting optional parsing features.
817 */
818
819int xmlGetFeaturesList (int *len,
820 const char **result);
821int xmlGetFeature (xmlParserCtxtPtr ctxt,
822 const char *name,
823 void *result);
824int xmlSetFeature (xmlParserCtxtPtr ctxt,
825 const char *name,
826 void *value);
827
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000828/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000829 * Interfaces for the Push mode.
Owen Taylor3473f882001-02-23 17:55:21 +0000830 */
831xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
832 void *user_data,
833 const char *chunk,
834 int size,
835 const char *filename);
836int xmlParseChunk (xmlParserCtxtPtr ctxt,
837 const char *chunk,
838 int size,
839 int terminate);
840
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000841/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000842 * Special I/O mode.
Owen Taylor3473f882001-02-23 17:55:21 +0000843 */
844
845xmlParserCtxtPtr xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax,
846 void *user_data,
847 xmlInputReadCallback ioread,
848 xmlInputCloseCallback ioclose,
849 void *ioctx,
850 xmlCharEncoding enc);
851
852xmlParserInputPtr xmlNewIOInputStream (xmlParserCtxtPtr ctxt,
853 xmlParserInputBufferPtr input,
854 xmlCharEncoding enc);
855
Daniel Veillardf69bb4b2001-05-19 13:24:56 +0000856/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000857 * Node infos.
Owen Taylor3473f882001-02-23 17:55:21 +0000858 */
859const xmlParserNodeInfo*
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000860 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt,
861 const xmlNodePtr node);
Owen Taylor3473f882001-02-23 17:55:21 +0000862void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
863void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000864unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
865 const xmlNodePtr node);
Owen Taylor3473f882001-02-23 17:55:21 +0000866void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
Daniel Veillard963d2ae2002-01-20 22:08:18 +0000867 const xmlParserNodeInfoPtr info);
Owen Taylor3473f882001-02-23 17:55:21 +0000868
869/*
Daniel Veillard61f26172002-03-12 18:46:39 +0000870 * External entities handling actually implemented in xmlIO.
Owen Taylor3473f882001-02-23 17:55:21 +0000871 */
872
873void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
874xmlExternalEntityLoader
875 xmlGetExternalEntityLoader(void);
876xmlParserInputPtr
877 xmlLoadExternalEntity (const char *URL,
878 const char *ID,
Daniel Veillard9d06d302002-01-22 18:15:52 +0000879 xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +0000880
881#ifdef __cplusplus
882}
883#endif
Owen Taylor3473f882001-02-23 17:55:21 +0000884#endif /* __XML_PARSER_H__ */
885