Huge commit: 1.5.0, XML validation, Xpath, bugfixes, examples .... Daniel
diff --git a/include/libxml/entities.h b/include/libxml/entities.h
index 8604057..180a40c 100644
--- a/include/libxml/entities.h
+++ b/include/libxml/entities.h
@@ -66,6 +66,7 @@
xmlEntityPtr xmlGetPredefinedEntity(const CHAR *name);
xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name);
xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name);
+xmlEntityPtr xmlGetParameterEntity(xmlDocPtr doc, const CHAR *name);
const CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input);
CHAR *xmlEncodeEntitiesReentrant(xmlDocPtr doc, const CHAR *input);
xmlEntitiesTablePtr xmlCreateEntitiesTable(void);
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 43d183e..6659cdf 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -1,5 +1,5 @@
/*
- * parser.h : constants and stuff related to the XML parser.
+ * parser.h : Interfaces, constants and types related to the XML parser.
*
* See Copyright for the status of this software.
*
@@ -10,6 +10,7 @@
#define __XML_PARSER_H__
#include "tree.h"
+#include "valid.h"
#include "xmlIO.h"
#ifdef __cplusplus
@@ -21,12 +22,22 @@
*/
#define XML_DEFAULT_VERSION "1.0"
+/**
+ * an xmlParserInput is an input flow for the XML processor.
+ * Each entity parsed is associated an xmlParserInput (except the
+ * few predefined ones). This is the case both for internal entities
+ * - in which case the flow is already completely in memory - or
+ * external entities - in which case we use the buf structure for
+ * progressive reading and I18N conversions to the internal UTF-8 format.
+ */
+
typedef void (* xmlParserInputDeallocate)(CHAR *);
typedef struct xmlParserInput {
/* Input buffer */
xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
const char *filename; /* The file analyzed, if any */
+ const char *directory; /* the directory/base of teh file */
const CHAR *base; /* Base of the array to parse */
const CHAR *cur; /* Current char being parsed */
int line; /* Current line */
@@ -36,6 +47,11 @@
} xmlParserInput;
typedef xmlParserInput *xmlParserInputPtr;
+/**
+ * the parser can be asked to collect Node informations, i.e. at what
+ * place in the file they were detected.
+ * NOTE: This is off by default and not very well tested.
+ */
typedef struct _xmlParserNodeInfo {
const struct xmlNode* node;
/* Position & line # that text that created the node begins & ends on */
@@ -54,24 +70,63 @@
typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
+/**
+ * The parser is not a state based parser, but we need to maintain
+ * minimum state informations, especially for entities processing.
+ */
+typedef enum xmlParserInputState {
+ XML_PARSER_EOF = 0,
+ XML_PARSER_PROLOG,
+ XML_PARSER_CONTENT,
+ XML_PARSER_ENTITY_DECL,
+ XML_PARSER_ENTITY_VALUE,
+ XML_PARSER_ATTRIBUTE_VALUE,
+ XML_PARSER_DTD,
+ XML_PARSER_EPILOG,
+ XML_PARSER_COMMENT,
+ XML_PARSER_CDATA_SECTION,
+} xmlParserInputState;
+
+/**
+ * The parser context.
+ * NOTE This doesn't completely defines the parser state, the (current ?)
+ * design of the parser uses recursive function calls since this allow
+ * and easy mapping from the production rules of the specification
+ * to the actual code. The drawback is that the actual function call
+ * also reflect the parser state. However most of the parsing routines
+ * takes as the only argument the parser context pointer, so migrating
+ * to a state based parser for progressive parsing shouldn't be too hard.
+ */
typedef struct _xmlParserCtxt {
struct xmlSAXHandler *sax; /* The SAX handler */
void *userData; /* the document being built */
xmlDocPtr myDoc; /* the document being built */
- int wellFormed; /* is the document well formed */
int replaceEntities; /* shall we replace entities ? */
- const CHAR *version; /* the XML version string */
- const CHAR *encoding; /* encoding, if any */
- int standalone; /* standalone document */
- int html; /* are we parsing an HTML document */
+ const CHAR *version; /* the XML version string */
+ const CHAR *encoding; /* encoding, if any */
+ int standalone; /* standalone document */
+ int hasExternalSubset; /* reference and external subset */
+ int hasPErefs; /* the internal subset has PE refs */
+ int html; /* are we parsing an HTML document */
+ int external; /* are we parsing an external entity */
+ int wellFormed; /* is the document well formed */
+ int valid; /* is the document valid */
+ int validate; /* shall we try to validate ? */
+ xmlValidCtxt vctxt; /* The validity context */
+
+ xmlParserInputState instate; /* current type of input */
+ int token; /* next char look-ahead */
+
+ char *directory; /* the data directory */
+
/* Input stream stack */
xmlParserInputPtr input; /* Current input stream */
int inputNr; /* Number of current input streams */
int inputMax; /* Max number of input streams */
xmlParserInputPtr *inputTab; /* stack of inputs */
- /* Node analysis stack */
+ /* Node analysis stack only used for DOM building */
xmlNodePtr node; /* Current parsed Node */
int nodeNr; /* Depth of the parsing stack */
int nodeMax; /* Max depth of the parsing stack */
@@ -83,10 +138,9 @@
typedef _xmlParserCtxt xmlParserCtxt;
typedef xmlParserCtxt *xmlParserCtxtPtr;
-/*
+/**
* a SAX Locator.
*/
-
typedef struct xmlSAXLocator {
const CHAR *(*getPublicId)(void *ctx);
const CHAR *(*getSystemId)(void *ctx);
@@ -96,8 +150,9 @@
typedef _xmlSAXLocator xmlSAXLocator;
typedef xmlSAXLocator *xmlSAXLocatorPtr;
-/*
- * a SAX Exception.
+/**
+ * a SAX handler is bunch of callbacks called by the parser when processing
+ * of the input generate data or structure informations.
*/
#include "entities.h"
@@ -108,6 +163,8 @@
const CHAR *ExternalID, const CHAR *SystemID);
typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
const CHAR *name);
+typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
+ const CHAR *name);
typedef void (*entityDeclSAXFunc) (void *ctx,
const CHAR *name, int type, const CHAR *publicId,
const CHAR *systemId, CHAR *content);
@@ -138,6 +195,7 @@
typedef void (*processingInstructionSAXFunc) (void *ctx,
const CHAR *target, const CHAR *data);
typedef void (*commentSAXFunc) (void *ctx, const CHAR *value);
+typedef void (*cdataBlockSAXFunc) (void *ctx, const CHAR *value, int len);
typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
@@ -170,12 +228,13 @@
warningSAXFunc warning;
errorSAXFunc error;
fatalErrorSAXFunc fatalError;
+ getParameterEntitySAXFunc getParameterEntity;
+ cdataBlockSAXFunc cdataBlock;
} xmlSAXHandler;
typedef xmlSAXHandler *xmlSAXHandlerPtr;
-/*
- * Global variables: just the SAX interface tables we are looking for full
- * reentrancy of the code and version infos.
+/**
+ * Global variables: just the default SAX interface tables and XML version infos.
*/
extern const char *xmlParserVersion;
@@ -186,14 +245,14 @@
#include "entities.h"
#include "xml-error.h"
-/*
+/**
* Input functions
*/
int xmlParserInputRead(xmlParserInputPtr in, int len);
int xmlParserInputGrow(xmlParserInputPtr in, int len);
-/*
+/**
* CHAR handling
*/
CHAR *xmlStrdup(const CHAR *cur);
@@ -207,23 +266,23 @@
CHAR *xmlStrcat(CHAR *cur, const CHAR *add);
CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len);
-/*
- * Interfaces
+/**
+ * Basic parsing Interfaces
*/
xmlDocPtr xmlParseDoc(CHAR *cur);
xmlDocPtr xmlParseMemory(char *buffer, int size);
xmlDocPtr xmlParseFile(const char *filename);
int xmlSubstituteEntitiesDefault(int val);
-/*
+/**
* Recovery mode
*/
xmlDocPtr xmlRecoverDoc(CHAR *cur);
xmlDocPtr xmlRecoverMemory(char *buffer, int size);
xmlDocPtr xmlRecoverFile(const char *filename);
-/*
- * Internal routines
+/**
+ * Less common routines and SAX interfaces
*/
int xmlParseDocument(xmlParserCtxtPtr ctxt);
xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery);
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index b107718..ae8e2cc 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -18,6 +18,26 @@
/**
* A few macros needed to help building the parser.
*/
+/* #define UNICODE */
+
+#ifdef UNICODE
+typedef unsigned long CHARVAL;
+
+#define NEXTCHARVAL(p) (unsigned long) \
+ ((*(p) == 0) ? (unsigned long) 0 : \
+ ((*(p) < 0x80) ? (unsigned long) (*(p)++) : \
+ (*(p) < 0xC0) ? (unsigned long) 0 : \
+ (*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) : \
+ (*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
+ (*(p)++ & 0x3F)) : \
+ (*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 + \
+ (*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
+#else
+typedef unsigned char CHARVAL;
+
+#define NEXTCHARVAL(p) (unsigned long) *(p);
+#define SKIPCHARVAL(p) (p)++;
+#endif
#ifdef UNICODE
/************************************************************************
@@ -402,7 +422,6 @@
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
#else
-#ifndef USE_UTF_8
/************************************************************************
* *
* 8bits / ISO-Latin version of the macros. *
@@ -453,15 +472,6 @@
*/
#define IS_EXTENDER(c) ((c) == 0xb7)
-#else /* USE_UTF_8 */
-/************************************************************************
- * *
- * 8bits / UTF-8 version of the macros. *
- * *
- ************************************************************************/
-
-TODO !!!
-#endif /* USE_UTF_8 */
#endif /* !UNICODE */
/*
@@ -513,6 +523,10 @@
xmlCreateMemoryParserCtxt(char *buffer, int size);
void
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt);
+xmlParserCtxtPtr
+xmlNewParserCtxt();
+void
+xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc);
/**
* Entities
@@ -554,6 +568,8 @@
* Generic production rules
*/
CHAR *
+xmlScanName(xmlParserCtxtPtr ctxt);
+CHAR *
xmlParseName(xmlParserCtxtPtr ctxt);
CHAR *
xmlParseNmtoken(xmlParserCtxtPtr ctxt);
@@ -638,6 +654,9 @@
xmlParseXMLDecl(xmlParserCtxtPtr ctxt);
void
xmlParseMisc(xmlParserCtxtPtr ctxt);
+void
+xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
+ const CHAR *SystemID);
/*
* Entities substitution
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index fddabb7..a627d92 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -97,6 +97,7 @@
typedef struct xmlAttribute {
const CHAR *elem; /* Element holding the attribute */
const CHAR *name; /* Attribute name */
+ struct xmlAttribute *next; /* list of attributes of an element */
xmlAttributeType type; /* The type */
xmlAttributeDefault def; /* the default */
const CHAR *defaultValue;/* or the default value */
@@ -138,9 +139,10 @@
} xmlElementTypeVal;
typedef struct xmlElement {
- const CHAR *name; /* Element name */
- xmlElementTypeVal type; /* The type */
+ const CHAR *name; /* Element name */
+ xmlElementTypeVal type; /* The type */
xmlElementContentPtr content; /* the allowed element content */
+ xmlAttributePtr attributes; /* List of the declared attributes */
} xmlElement;
typedef xmlElement *xmlElementPtr;
@@ -188,7 +190,7 @@
#endif
xmlElementType type; /* XML_ATTRIBUTE_NODE, must be third ! */
struct xmlNode *node; /* attr->node link */
- struct xmlAttr *next; /* parent->childs link */
+ struct xmlAttr *next; /* attribute list link */
const CHAR *name; /* the name of the property */
struct xmlNode *val; /* the value of the property */
} xmlAttr;
@@ -310,6 +312,7 @@
xmlNodePtr xmlNewTextLen(const CHAR *content, int len);
xmlNodePtr xmlNewDocComment(xmlDocPtr doc, const CHAR *content);
xmlNodePtr xmlNewComment(const CHAR *content);
+xmlNodePtr xmlNewCDataBlock(xmlDocPtr doc, const CHAR *content, int len);
xmlNodePtr xmlNewReference(xmlDocPtr doc, const CHAR *name);
xmlNodePtr xmlCopyNode(xmlNodePtr node, int recursive);
xmlNodePtr xmlCopyNodeList(xmlNodePtr node);
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index 084d97f..7d58459 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -11,6 +11,22 @@
#define __XML_VALID_H__
#include "tree.h"
+/**
+ * an xmlValidCtxt is used for error reporting when validating
+ */
+
+typedef void (*xmlValidityErrorFunc) (void *ctx, const char *msg, ...);
+typedef void (*xmlValidityWarningFunc) (void *ctx, const char *msg, ...);
+
+typedef struct xmlValidCtxt {
+ void *userData; /* user specific data block */
+ xmlValidityErrorFunc error; /* the callback in case of errors */
+ xmlValidityWarningFunc warning; /* the callback in case of warning */
+} xmlValidCtxt, *xmlValidCtxtPtr;
+
+extern void xmlParserValidityError(void *ctx, const char *msg, ...);
+extern void xmlParserValidityWarning(void *ctx, const char *msg, ...);
+
/*
* ALl notation declarations are stored in a table
* there is one table per DTD
@@ -21,7 +37,7 @@
typedef struct xmlNotationTable {
int nb_notations; /* number of notations stored */
int max_notations; /* maximum number of notations */
- xmlNotationPtr table; /* the table of attributes */
+ xmlNotationPtr *table; /* the table of attributes */
} xmlNotationTable;
typedef xmlNotationTable *xmlNotationTablePtr;
@@ -35,7 +51,7 @@
typedef struct xmlElementTable {
int nb_elements; /* number of elements stored */
int max_elements; /* maximum number of elements */
- xmlElementPtr table; /* the table of elements */
+ xmlElementPtr *table; /* the table of elements */
} xmlElementTable;
typedef xmlElementTable *xmlElementTablePtr;
@@ -49,13 +65,13 @@
typedef struct xmlAttributeTable {
int nb_attributes; /* number of attributes stored */
int max_attributes; /* maximum number of attributes */
- xmlAttributePtr table; /* the table of attributes */
+ xmlAttributePtr *table; /* the table of attributes */
} xmlAttributeTable;
typedef xmlAttributeTable *xmlAttributeTablePtr;
/* Notation */
-xmlNotationPtr xmlAddNotationDecl(xmlDtdPtr dtd, const CHAR *name,
- const CHAR *PublicID, const CHAR *SystemID);
+xmlNotationPtr xmlAddNotationDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd,
+ const CHAR *name, const CHAR *PublicID, const CHAR *SystemID);
xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
void xmlFreeNotationTable(xmlNotationTablePtr table);
void xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table);
@@ -66,8 +82,8 @@
void xmlFreeElementContent(xmlElementContentPtr cur);
/* Element */
-xmlElementPtr xmlAddElementDecl(xmlDtdPtr dtd, const CHAR *name, int type,
- xmlElementContentPtr content);
+xmlElementPtr xmlAddElementDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd,
+ const CHAR *name, int type, xmlElementContentPtr content);
xmlElementTablePtr xmlCopyElementTable(xmlElementTablePtr table);
void xmlFreeElementTable(xmlElementTablePtr table);
void xmlDumpElementTable(xmlBufferPtr buf, xmlElementTablePtr table);
@@ -78,11 +94,32 @@
xmlEnumerationPtr xmlCopyEnumeration(xmlEnumerationPtr cur);
/* Attribute */
-xmlAttributePtr xmlAddAttributeDecl(xmlDtdPtr dtd, const CHAR *elem,
- const CHAR *name, int type, int def,
+xmlAttributePtr xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd,
+ const CHAR *elem, const CHAR *name, int type, int def,
const CHAR *defaultValue, xmlEnumerationPtr tree);
xmlAttributeTablePtr xmlCopyAttributeTable(xmlAttributeTablePtr table);
void xmlFreeAttributeTable(xmlAttributeTablePtr table);
void xmlDumpAttributeTable(xmlBufferPtr buf, xmlAttributeTablePtr table);
+/**
+ * The public function calls related to validity checking
+ */
+
+int xmlValidateRoot(xmlValidCtxtPtr ctxt, xmlDocPtr doc);
+int xmlValidateElementDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+ xmlElementPtr elem);
+int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+ xmlAttributePtr attr);
+int xmlValidateNotationDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+ xmlNotationPtr nota);
+int xmlValidateDtd(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlDtdPtr dtd);
+
+int xmlValidateDocument(xmlValidCtxtPtr ctxt, xmlDocPtr doc);
+int xmlValidateElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem);
+int xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+ xmlNodePtr elem);
+int xmlValidateOneAttribute(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+ xmlNodePtr elem, xmlAttrPtr attr, const CHAR *value);
+
+int xmlIsMixedElement(xmlDocPtr doc, const CHAR *name);
#endif /* __XML_VALID_H__ */
diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h
index bdeecc3..0e086d7 100644
--- a/include/libxml/xmlIO.h
+++ b/include/libxml/xmlIO.h
@@ -45,6 +45,7 @@
int xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len);
void xmlFreeParserInputBuffer(xmlParserInputBufferPtr in);
+char *xmlParserGetDirectory(const char *filename);
#ifdef __cplusplus
}