blob: 6659cdf76384931b85fb838f755e70049e0c36f2 [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
Daniel Veillardb05deb71999-08-10 19:04:08 +00002 * parser.h : Interfaces, constants and types related to the XML parser.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003 *
4 * See Copyright for the status of this software.
5 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +00007 */
8
9#ifndef __XML_PARSER_H__
10#define __XML_PARSER_H__
11
12#include "tree.h"
Daniel Veillardb05deb71999-08-10 19:04:08 +000013#include "valid.h"
Daniel Veillard14fff061999-06-22 21:49:07 +000014#include "xmlIO.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000015
16#ifdef __cplusplus
17extern "C" {
18#endif
19
20/*
21 * Constants.
22 */
23#define XML_DEFAULT_VERSION "1.0"
24
Daniel Veillardb05deb71999-08-10 19:04:08 +000025/**
26 * an xmlParserInput is an input flow for the XML processor.
27 * Each entity parsed is associated an xmlParserInput (except the
28 * few predefined ones). This is the case both for internal entities
29 * - in which case the flow is already completely in memory - or
30 * external entities - in which case we use the buf structure for
31 * progressive reading and I18N conversions to the internal UTF-8 format.
32 */
33
Daniel Veillardd692aa41999-02-28 21:54:31 +000034typedef void (* xmlParserInputDeallocate)(CHAR *);
Daniel Veillard260a68f1998-08-13 03:39:55 +000035typedef struct xmlParserInput {
Daniel Veillard14fff061999-06-22 21:49:07 +000036 /* Input buffer */
37 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
38
Daniel Veillard260a68f1998-08-13 03:39:55 +000039 const char *filename; /* The file analyzed, if any */
Daniel Veillardb05deb71999-08-10 19:04:08 +000040 const char *directory; /* the directory/base of teh file */
Daniel Veillard260a68f1998-08-13 03:39:55 +000041 const CHAR *base; /* Base of the array to parse */
42 const CHAR *cur; /* Current char being parsed */
43 int line; /* Current line */
44 int col; /* Current column */
Daniel Veillarde2d034d1999-07-27 19:52:06 +000045 int consumed; /* How many CHARs were already consumed */
Daniel Veillardd692aa41999-02-28 21:54:31 +000046 xmlParserInputDeallocate free; /* function to deallocate the base */
Daniel Veillard1e346af1999-02-22 10:33:01 +000047} xmlParserInput;
48typedef xmlParserInput *xmlParserInputPtr;
Daniel Veillard260a68f1998-08-13 03:39:55 +000049
Daniel Veillardb05deb71999-08-10 19:04:08 +000050/**
51 * the parser can be asked to collect Node informations, i.e. at what
52 * place in the file they were detected.
53 * NOTE: This is off by default and not very well tested.
54 */
Daniel Veillard1e346af1999-02-22 10:33:01 +000055typedef struct _xmlParserNodeInfo {
Daniel Veillard260a68f1998-08-13 03:39:55 +000056 const struct xmlNode* node;
57 /* Position & line # that text that created the node begins & ends on */
58 unsigned long begin_pos;
59 unsigned long begin_line;
60 unsigned long end_pos;
61 unsigned long end_line;
Daniel Veillard1e346af1999-02-22 10:33:01 +000062} _xmlParserNodeInfo;
63typedef _xmlParserNodeInfo xmlParserNodeInfo;
Daniel Veillard260a68f1998-08-13 03:39:55 +000064
65typedef struct xmlParserNodeInfoSeq {
66 unsigned long maximum;
67 unsigned long length;
68 xmlParserNodeInfo* buffer;
Daniel Veillard1e346af1999-02-22 10:33:01 +000069} _xmlParserNodeInfoSeq;
70typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
71typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
Daniel Veillard260a68f1998-08-13 03:39:55 +000072
Daniel Veillardb05deb71999-08-10 19:04:08 +000073/**
74 * The parser is not a state based parser, but we need to maintain
75 * minimum state informations, especially for entities processing.
76 */
77typedef enum xmlParserInputState {
78 XML_PARSER_EOF = 0,
79 XML_PARSER_PROLOG,
80 XML_PARSER_CONTENT,
81 XML_PARSER_ENTITY_DECL,
82 XML_PARSER_ENTITY_VALUE,
83 XML_PARSER_ATTRIBUTE_VALUE,
84 XML_PARSER_DTD,
85 XML_PARSER_EPILOG,
86 XML_PARSER_COMMENT,
87 XML_PARSER_CDATA_SECTION,
88} xmlParserInputState;
89
90/**
91 * The parser context.
92 * NOTE This doesn't completely defines the parser state, the (current ?)
93 * design of the parser uses recursive function calls since this allow
94 * and easy mapping from the production rules of the specification
95 * to the actual code. The drawback is that the actual function call
96 * also reflect the parser state. However most of the parsing routines
97 * takes as the only argument the parser context pointer, so migrating
98 * to a state based parser for progressive parsing shouldn't be too hard.
99 */
Daniel Veillard1e346af1999-02-22 10:33:01 +0000100typedef struct _xmlParserCtxt {
Daniel Veillard260a68f1998-08-13 03:39:55 +0000101 struct xmlSAXHandler *sax; /* The SAX handler */
Daniel Veillard517752b1999-04-05 12:20:10 +0000102 void *userData; /* the document being built */
103 xmlDocPtr myDoc; /* the document being built */
Daniel Veillard011b63c1999-06-02 17:44:04 +0000104 int replaceEntities; /* shall we replace entities ? */
Daniel Veillardb05deb71999-08-10 19:04:08 +0000105 const CHAR *version; /* the XML version string */
106 const CHAR *encoding; /* encoding, if any */
107 int standalone; /* standalone document */
108 int hasExternalSubset; /* reference and external subset */
109 int hasPErefs; /* the internal subset has PE refs */
110 int html; /* are we parsing an HTML document */
111 int external; /* are we parsing an external entity */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000112
Daniel Veillardb05deb71999-08-10 19:04:08 +0000113 int wellFormed; /* is the document well formed */
114 int valid; /* is the document valid */
115 int validate; /* shall we try to validate ? */
116 xmlValidCtxt vctxt; /* The validity context */
117
118 xmlParserInputState instate; /* current type of input */
119 int token; /* next char look-ahead */
120
121 char *directory; /* the data directory */
122
Daniel Veillard260a68f1998-08-13 03:39:55 +0000123 /* Input stream stack */
124 xmlParserInputPtr input; /* Current input stream */
125 int inputNr; /* Number of current input streams */
126 int inputMax; /* Max number of input streams */
127 xmlParserInputPtr *inputTab; /* stack of inputs */
128
Daniel Veillardb05deb71999-08-10 19:04:08 +0000129 /* Node analysis stack only used for DOM building */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000130 xmlNodePtr node; /* Current parsed Node */
131 int nodeNr; /* Depth of the parsing stack */
132 int nodeMax; /* Max depth of the parsing stack */
133 xmlNodePtr *nodeTab; /* array of nodes */
134
135 int record_info; /* Whether node info should be kept */
136 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
Daniel Veillard1e346af1999-02-22 10:33:01 +0000137} _xmlParserCtxt;
138typedef _xmlParserCtxt xmlParserCtxt;
139typedef xmlParserCtxt *xmlParserCtxtPtr;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000140
Daniel Veillardb05deb71999-08-10 19:04:08 +0000141/**
Daniel Veillard260a68f1998-08-13 03:39:55 +0000142 * a SAX Locator.
143 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000144typedef struct xmlSAXLocator {
Daniel Veillard27d88741999-05-29 11:51:49 +0000145 const CHAR *(*getPublicId)(void *ctx);
146 const CHAR *(*getSystemId)(void *ctx);
147 int (*getLineNumber)(void *ctx);
148 int (*getColumnNumber)(void *ctx);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000149} _xmlSAXLocator;
150typedef _xmlSAXLocator xmlSAXLocator;
151typedef xmlSAXLocator *xmlSAXLocatorPtr;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000152
Daniel Veillardb05deb71999-08-10 19:04:08 +0000153/**
154 * a SAX handler is bunch of callbacks called by the parser when processing
155 * of the input generate data or structure informations.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000156 */
157
Daniel Veillard517752b1999-04-05 12:20:10 +0000158#include "entities.h"
159
Daniel Veillard27d88741999-05-29 11:51:49 +0000160typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000161 const CHAR *publicId, const CHAR *systemId);
Daniel Veillard27d88741999-05-29 11:51:49 +0000162typedef void (*internalSubsetSAXFunc) (void *ctx, const CHAR *name,
Daniel Veillard517752b1999-04-05 12:20:10 +0000163 const CHAR *ExternalID, const CHAR *SystemID);
Daniel Veillard27d88741999-05-29 11:51:49 +0000164typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
Daniel Veillard517752b1999-04-05 12:20:10 +0000165 const CHAR *name);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000166typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
167 const CHAR *name);
Daniel Veillard27d88741999-05-29 11:51:49 +0000168typedef void (*entityDeclSAXFunc) (void *ctx,
Daniel Veillard517752b1999-04-05 12:20:10 +0000169 const CHAR *name, int type, const CHAR *publicId,
170 const CHAR *systemId, CHAR *content);
Daniel Veillard27d88741999-05-29 11:51:49 +0000171typedef void (*notationDeclSAXFunc)(void *ctx, const CHAR *name,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000172 const CHAR *publicId, const CHAR *systemId);
Daniel Veillard27d88741999-05-29 11:51:49 +0000173typedef void (*attributeDeclSAXFunc)(void *ctx, const CHAR *elem,
Daniel Veillard517752b1999-04-05 12:20:10 +0000174 const CHAR *name, int type, int def,
175 const CHAR *defaultValue, xmlEnumerationPtr tree);
Daniel Veillard27d88741999-05-29 11:51:49 +0000176typedef void (*elementDeclSAXFunc)(void *ctx, const CHAR *name,
Daniel Veillard517752b1999-04-05 12:20:10 +0000177 int type, xmlElementContentPtr content);
Daniel Veillard27d88741999-05-29 11:51:49 +0000178typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000179 const CHAR *name, const CHAR *publicId,
180 const CHAR *systemId, const CHAR *notationName);
Daniel Veillard27d88741999-05-29 11:51:49 +0000181typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000182 xmlSAXLocatorPtr loc);
Daniel Veillard27d88741999-05-29 11:51:49 +0000183typedef void (*startDocumentSAXFunc) (void *ctx);
184typedef void (*endDocumentSAXFunc) (void *ctx);
185typedef void (*startElementSAXFunc) (void *ctx, const CHAR *name,
Daniel Veillard517752b1999-04-05 12:20:10 +0000186 const CHAR **atts);
Daniel Veillard27d88741999-05-29 11:51:49 +0000187typedef void (*endElementSAXFunc) (void *ctx, const CHAR *name);
188typedef void (*attributeSAXFunc) (void *ctx, const CHAR *name,
Daniel Veillard11e00581998-10-24 18:27:49 +0000189 const CHAR *value);
Daniel Veillard27d88741999-05-29 11:51:49 +0000190typedef void (*referenceSAXFunc) (void *ctx, const CHAR *name);
191typedef void (*charactersSAXFunc) (void *ctx, const CHAR *ch,
Daniel Veillard517752b1999-04-05 12:20:10 +0000192 int len);
Daniel Veillard27d88741999-05-29 11:51:49 +0000193typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
Daniel Veillard517752b1999-04-05 12:20:10 +0000194 const CHAR *ch, int len);
Daniel Veillard27d88741999-05-29 11:51:49 +0000195typedef void (*processingInstructionSAXFunc) (void *ctx,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000196 const CHAR *target, const CHAR *data);
Daniel Veillard27d88741999-05-29 11:51:49 +0000197typedef void (*commentSAXFunc) (void *ctx, const CHAR *value);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000198typedef void (*cdataBlockSAXFunc) (void *ctx, const CHAR *value, int len);
Daniel Veillard27d88741999-05-29 11:51:49 +0000199typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
200typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
201typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
202typedef int (*isStandaloneSAXFunc) (void *ctx);
203typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
204typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000205
206typedef struct xmlSAXHandler {
Daniel Veillard517752b1999-04-05 12:20:10 +0000207 internalSubsetSAXFunc internalSubset;
208 isStandaloneSAXFunc isStandalone;
209 hasInternalSubsetSAXFunc hasInternalSubset;
210 hasExternalSubsetSAXFunc hasExternalSubset;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000211 resolveEntitySAXFunc resolveEntity;
Daniel Veillard517752b1999-04-05 12:20:10 +0000212 getEntitySAXFunc getEntity;
213 entityDeclSAXFunc entityDecl;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000214 notationDeclSAXFunc notationDecl;
Daniel Veillard517752b1999-04-05 12:20:10 +0000215 attributeDeclSAXFunc attributeDecl;
216 elementDeclSAXFunc elementDecl;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000217 unparsedEntityDeclSAXFunc unparsedEntityDecl;
218 setDocumentLocatorSAXFunc setDocumentLocator;
219 startDocumentSAXFunc startDocument;
220 endDocumentSAXFunc endDocument;
221 startElementSAXFunc startElement;
222 endElementSAXFunc endElement;
Daniel Veillard517752b1999-04-05 12:20:10 +0000223 referenceSAXFunc reference;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000224 charactersSAXFunc characters;
225 ignorableWhitespaceSAXFunc ignorableWhitespace;
226 processingInstructionSAXFunc processingInstruction;
Daniel Veillard517752b1999-04-05 12:20:10 +0000227 commentSAXFunc comment;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000228 warningSAXFunc warning;
229 errorSAXFunc error;
230 fatalErrorSAXFunc fatalError;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000231 getParameterEntitySAXFunc getParameterEntity;
232 cdataBlockSAXFunc cdataBlock;
Daniel Veillard1e346af1999-02-22 10:33:01 +0000233} xmlSAXHandler;
234typedef xmlSAXHandler *xmlSAXHandlerPtr;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000235
Daniel Veillardb05deb71999-08-10 19:04:08 +0000236/**
237 * Global variables: just the default SAX interface tables and XML version infos.
Daniel Veillard260a68f1998-08-13 03:39:55 +0000238 */
Daniel Veillard14fff061999-06-22 21:49:07 +0000239extern const char *xmlParserVersion;
240
Daniel Veillard151b1b01998-09-23 00:49:46 +0000241extern xmlSAXLocator xmlDefaultSAXLocator;
242extern xmlSAXHandler xmlDefaultSAXHandler;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000243extern xmlSAXHandler htmlDefaultSAXHandler;
Daniel Veillard260a68f1998-08-13 03:39:55 +0000244
Daniel Veillardccb09631998-10-27 06:21:04 +0000245#include "entities.h"
Daniel Veillardd109e371999-03-05 06:26:45 +0000246#include "xml-error.h"
Daniel Veillardccb09631998-10-27 06:21:04 +0000247
Daniel Veillardb05deb71999-08-10 19:04:08 +0000248/**
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000249 * Input functions
250 */
251
252int xmlParserInputRead(xmlParserInputPtr in, int len);
253int xmlParserInputGrow(xmlParserInputPtr in, int len);
254
Daniel Veillardb05deb71999-08-10 19:04:08 +0000255/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000256 * CHAR handling
Daniel Veillard260a68f1998-08-13 03:39:55 +0000257 */
Daniel Veillard1e346af1999-02-22 10:33:01 +0000258CHAR *xmlStrdup(const CHAR *cur);
259CHAR *xmlStrndup(const CHAR *cur, int len);
Daniel Veillard1566d3a1999-07-15 14:24:29 +0000260CHAR *xmlStrsub(const CHAR *str, int start, int len);
261const CHAR *xmlStrchr(const CHAR *str, CHAR val);
262const CHAR *xmlStrstr(const CHAR *str, CHAR *val);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000263int xmlStrcmp(const CHAR *str1, const CHAR *str2);
264int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len);
265int xmlStrlen(const CHAR *str);
266CHAR *xmlStrcat(CHAR *cur, const CHAR *add);
267CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000268
Daniel Veillardb05deb71999-08-10 19:04:08 +0000269/**
270 * Basic parsing Interfaces
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000271 */
Daniel Veillard1e346af1999-02-22 10:33:01 +0000272xmlDocPtr xmlParseDoc(CHAR *cur);
273xmlDocPtr xmlParseMemory(char *buffer, int size);
274xmlDocPtr xmlParseFile(const char *filename);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000275int xmlSubstituteEntitiesDefault(int val);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000276
Daniel Veillardb05deb71999-08-10 19:04:08 +0000277/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000278 * Recovery mode
279 */
Daniel Veillard1e346af1999-02-22 10:33:01 +0000280xmlDocPtr xmlRecoverDoc(CHAR *cur);
281xmlDocPtr xmlRecoverMemory(char *buffer, int size);
282xmlDocPtr xmlRecoverFile(const char *filename);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000283
Daniel Veillardb05deb71999-08-10 19:04:08 +0000284/**
285 * Less common routines and SAX interfaces
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000286 */
Daniel Veillard1e346af1999-02-22 10:33:01 +0000287int xmlParseDocument(xmlParserCtxtPtr ctxt);
288xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery);
289xmlDocPtr xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000290 int size, int recovery);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000291xmlDocPtr xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +0000292 int recovery);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000293xmlDtdPtr xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID);
294xmlDtdPtr xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
295 const CHAR *SystemID);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000296void xmlInitParserCtxt(xmlParserCtxtPtr ctxt);
297void xmlClearParserCtxt(xmlParserCtxtPtr ctxt);
298void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +0000299 const char* filename);
300
Daniel Veillard1e346af1999-02-22 10:33:01 +0000301const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctxt,
302 const xmlNode* node);
303void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq);
304void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000305unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
306 const xmlNode* node);
Daniel Veillard1e346af1999-02-22 10:33:01 +0000307void xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
308 const xmlParserNodeInfo* info);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000309
Daniel Veillard1e346af1999-02-22 10:33:01 +0000310void xmlDefaultSAXHandlerInit(void);
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000311void htmlDefaultSAXHandlerInit(void);
Daniel Veillard260a68f1998-08-13 03:39:55 +0000312#ifdef __cplusplus
313}
314#endif
315
316#endif /* __XML_PARSER_H__ */
317