Huge commit: 1.5.0, XML validation, Xpath, bugfixes, examples .... Daniel
diff --git a/include/libxml/entities.h b/include/libxml/entities.h
index 8604057..180a40c 100644
--- a/include/libxml/entities.h
+++ b/include/libxml/entities.h
@@ -66,6 +66,7 @@
 xmlEntityPtr xmlGetPredefinedEntity(const CHAR *name);
 xmlEntityPtr xmlGetDocEntity(xmlDocPtr doc, const CHAR *name);
 xmlEntityPtr xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name);
+xmlEntityPtr xmlGetParameterEntity(xmlDocPtr doc, const CHAR *name);
 const CHAR *xmlEncodeEntities(xmlDocPtr doc, const CHAR *input);
 CHAR *xmlEncodeEntitiesReentrant(xmlDocPtr doc, const CHAR *input);
 xmlEntitiesTablePtr xmlCreateEntitiesTable(void);
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 43d183e..6659cdf 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -1,5 +1,5 @@
 /*
- * parser.h : constants and stuff related to the XML parser.
+ * parser.h : Interfaces, constants and types related to the XML parser.
  *
  * See Copyright for the status of this software.
  *
@@ -10,6 +10,7 @@
 #define __XML_PARSER_H__
 
 #include "tree.h"
+#include "valid.h"
 #include "xmlIO.h"
 
 #ifdef __cplusplus
@@ -21,12 +22,22 @@
  */
 #define XML_DEFAULT_VERSION	"1.0"
 
+/**
+ * an xmlParserInput is an input flow for the XML processor.
+ * Each entity parsed is associated an xmlParserInput (except the
+ * few predefined ones). This is the case both for internal entities
+ * - in which case the flow is already completely in memory - or
+ * external entities - in which case we use the buf structure for
+ * progressive reading and I18N conversions to the internal UTF-8 format.
+ */
+
 typedef void (* xmlParserInputDeallocate)(CHAR *);
 typedef struct xmlParserInput {
     /* Input buffer */
     xmlParserInputBufferPtr buf;      /* UTF-8 encoded buffer */
 
     const char *filename;             /* The file analyzed, if any */
+    const char *directory;            /* the directory/base of teh file */
     const CHAR *base;                 /* Base of the array to parse */
     const CHAR *cur;                  /* Current char being parsed */
     int line;                         /* Current line */
@@ -36,6 +47,11 @@
 } xmlParserInput;
 typedef xmlParserInput *xmlParserInputPtr;
 
+/**
+ * the parser can be asked to collect Node informations, i.e. at what
+ * place in the file they were detected. 
+ * NOTE: This is off by default and not very well tested.
+ */
 typedef struct _xmlParserNodeInfo {
   const struct xmlNode* node;
   /* Position & line # that text that created the node begins & ends on */
@@ -54,24 +70,63 @@
 typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
 
+/**
+ * The parser is not a state based parser, but we need to maintain
+ * minimum state informations, especially for entities processing.
+ */
+typedef enum xmlParserInputState {
+    XML_PARSER_EOF = 0,
+    XML_PARSER_PROLOG,
+    XML_PARSER_CONTENT,
+    XML_PARSER_ENTITY_DECL,
+    XML_PARSER_ENTITY_VALUE,
+    XML_PARSER_ATTRIBUTE_VALUE,
+    XML_PARSER_DTD,
+    XML_PARSER_EPILOG,
+    XML_PARSER_COMMENT,
+    XML_PARSER_CDATA_SECTION,
+} xmlParserInputState;
+
+/**
+ * The parser context.
+ * NOTE This doesn't completely defines the parser state, the (current ?)
+ *      design of the parser uses recursive function calls since this allow
+ *      and easy mapping from the production rules of the specification
+ *      to the actual code. The drawback is that the actual function call
+ *      also reflect the parser state. However most of the parsing routines
+ *      takes as the only argument the parser context pointer, so migrating
+ *      to a state based parser for progressive parsing shouldn't be too hard.
+ */
 typedef struct _xmlParserCtxt {
     struct xmlSAXHandler *sax;        /* The SAX handler */
     void            *userData;        /* the document being built */
     xmlDocPtr           myDoc;        /* the document being built */
-    int            wellFormed;        /* is the document well formed */
     int       replaceEntities;        /* shall we replace entities ? */
-    const CHAR     *version;	      /* the XML version string */
-    const CHAR     *encoding;         /* encoding, if any */
-    int             standalone;       /* standalone document */
-    int                   html;       /* are we parsing an HTML document */
+    const CHAR       *version;        /* the XML version string */
+    const CHAR      *encoding;        /* encoding, if any */
+    int            standalone;        /* standalone document */
+    int     hasExternalSubset;        /* reference and external subset */
+    int             hasPErefs;        /* the internal subset has PE refs */
+    int                  html;        /* are we parsing an HTML document */
+    int              external;        /* are we parsing an external entity */
 
+    int            wellFormed;        /* is the document well formed */
+    int                 valid;        /* is the document valid */
+    int              validate;        /* shall we try to validate ? */
+    xmlValidCtxt        vctxt;        /* The validity context */
+
+    xmlParserInputState instate;      /* current type of input */
+    int                 token;        /* next char look-ahead */    
+
+    char           *directory;        /* the data directory */
+    
     /* Input stream stack */
     xmlParserInputPtr  input;         /* Current input stream */
     int                inputNr;       /* Number of current input streams */
     int                inputMax;      /* Max number of input streams */
     xmlParserInputPtr *inputTab;      /* stack of inputs */
 
-    /* Node analysis stack */
+    /* Node analysis stack only used for DOM building */
     xmlNodePtr         node;          /* Current parsed Node */
     int                nodeNr;        /* Depth of the parsing stack */
     int                nodeMax;       /* Max depth of the parsing stack */
@@ -83,10 +138,9 @@
 typedef _xmlParserCtxt xmlParserCtxt;
 typedef xmlParserCtxt *xmlParserCtxtPtr;
 
-/*
+/**
  * a SAX Locator.
  */
-
 typedef struct xmlSAXLocator {
     const CHAR *(*getPublicId)(void *ctx);
     const CHAR *(*getSystemId)(void *ctx);
@@ -96,8 +150,9 @@
 typedef _xmlSAXLocator xmlSAXLocator;
 typedef xmlSAXLocator *xmlSAXLocatorPtr;
 
-/*
- * a SAX Exception.
+/**
+ * a SAX handler is bunch of callbacks called by the parser when processing
+ * of the input generate data or structure informations.
  */
 
 #include "entities.h"
@@ -108,6 +163,8 @@
                             const CHAR *ExternalID, const CHAR *SystemID);
 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
                             const CHAR *name);
+typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
+                            const CHAR *name);
 typedef void (*entityDeclSAXFunc) (void *ctx,
                             const CHAR *name, int type, const CHAR *publicId,
 			    const CHAR *systemId, CHAR *content);
@@ -138,6 +195,7 @@
 typedef void (*processingInstructionSAXFunc) (void *ctx,
                             const CHAR *target, const CHAR *data);
 typedef void (*commentSAXFunc) (void *ctx, const CHAR *value);
+typedef void (*cdataBlockSAXFunc) (void *ctx, const CHAR *value, int len);
 typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
 typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
 typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
@@ -170,12 +228,13 @@
     warningSAXFunc warning;
     errorSAXFunc error;
     fatalErrorSAXFunc fatalError;
+    getParameterEntitySAXFunc getParameterEntity;
+    cdataBlockSAXFunc cdataBlock;
 } xmlSAXHandler;
 typedef xmlSAXHandler *xmlSAXHandlerPtr;
 
-/*
- * Global variables: just the SAX interface tables we are looking for full
- *      reentrancy of the code and version infos.
+/**
+ * Global variables: just the default SAX interface tables and XML version infos.
  */
 extern const char *xmlParserVersion;
 
@@ -186,14 +245,14 @@
 #include "entities.h"
 #include "xml-error.h"
 
-/*
+/**
  * Input functions
  */
 
 int xmlParserInputRead(xmlParserInputPtr in, int len);
 int xmlParserInputGrow(xmlParserInputPtr in, int len);
 
-/*
+/**
  * CHAR handling
  */
 CHAR *xmlStrdup(const CHAR *cur);
@@ -207,23 +266,23 @@
 CHAR *xmlStrcat(CHAR *cur, const CHAR *add);
 CHAR *xmlStrncat(CHAR *cur, const CHAR *add, int len);
 
-/*
- * Interfaces
+/**
+ * Basic parsing Interfaces
  */
 xmlDocPtr xmlParseDoc(CHAR *cur);
 xmlDocPtr xmlParseMemory(char *buffer, int size);
 xmlDocPtr xmlParseFile(const char *filename);
 int xmlSubstituteEntitiesDefault(int val);
 
-/*
+/**
  * Recovery mode 
  */
 xmlDocPtr xmlRecoverDoc(CHAR *cur);
 xmlDocPtr xmlRecoverMemory(char *buffer, int size);
 xmlDocPtr xmlRecoverFile(const char *filename);
 
-/*
- * Internal routines
+/**
+ * Less common routines and SAX interfaces
  */
 int xmlParseDocument(xmlParserCtxtPtr ctxt);
 xmlDocPtr xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery);
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index b107718..ae8e2cc 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -18,6 +18,26 @@
 /**
  * A few macros needed to help building the parser.
  */
+/* #define UNICODE */
+
+#ifdef UNICODE
+typedef unsigned long CHARVAL;
+
+#define NEXTCHARVAL(p) (unsigned long)					\
+   ((*(p) == 0) ? (unsigned long) 0 :							\
+    ((*(p) < 0x80) ? (unsigned long) (*(p)++) :						\
+      (*(p) < 0xC0) ? (unsigned long) 0 :						\
+       (*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) :		\
+        (*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 +	\
+	              (*(p)++ & 0x3F)) :				\
+         (*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 +	\
+	                (*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
+#else
+typedef unsigned char CHARVAL;
+
+#define NEXTCHARVAL(p) (unsigned long) *(p);
+#define SKIPCHARVAL(p) (p)++;
+#endif
 
 #ifdef UNICODE
 /************************************************************************
@@ -402,7 +422,6 @@
 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
 
 #else
-#ifndef USE_UTF_8
 /************************************************************************
  *									*
  * 8bits / ISO-Latin version of the macros.				*
@@ -453,15 +472,6 @@
  */
 #define IS_EXTENDER(c) ((c) == 0xb7)
 
-#else /* USE_UTF_8 */
-/************************************************************************
- *									*
- * 8bits / UTF-8 version of the macros.					*
- *									*
- ************************************************************************/
-
-TODO !!!
-#endif /* USE_UTF_8 */
 #endif /* !UNICODE */
 
 /*
@@ -513,6 +523,10 @@
 xmlCreateMemoryParserCtxt(char *buffer, int size);
 void
 xmlFreeParserCtxt(xmlParserCtxtPtr ctxt);
+xmlParserCtxtPtr
+xmlNewParserCtxt();
+void
+xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc);
 
 /**
  * Entities
@@ -554,6 +568,8 @@
  * Generic production rules
  */
 CHAR *
+xmlScanName(xmlParserCtxtPtr ctxt);
+CHAR *
 xmlParseName(xmlParserCtxtPtr ctxt);
 CHAR *
 xmlParseNmtoken(xmlParserCtxtPtr ctxt);
@@ -638,6 +654,9 @@
 xmlParseXMLDecl(xmlParserCtxtPtr ctxt);
 void
 xmlParseMisc(xmlParserCtxtPtr ctxt);
+void
+xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
+                       const CHAR *SystemID);
 
 /*
  * Entities substitution
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index fddabb7..a627d92 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -97,6 +97,7 @@
 typedef struct xmlAttribute {
     const CHAR            *elem;	/* Element holding the attribute */
     const CHAR            *name;	/* Attribute name */
+    struct xmlAttribute   *next;        /* list of attributes of an element */
     xmlAttributeType       type;	/* The type */
     xmlAttributeDefault    def;		/* the default */
     const CHAR            *defaultValue;/* or the default value */
@@ -138,9 +139,10 @@
 } xmlElementTypeVal;
 
 typedef struct xmlElement {
-    const CHAR          *name;		/* Element name */
-    xmlElementTypeVal    type;		/* The type */
+    const CHAR             *name;	/* Element name */
+    xmlElementTypeVal       type;	/* The type */
     xmlElementContentPtr content;	/* the allowed element content */
+    xmlAttributePtr   attributes;	/* List of the declared attributes */
 } xmlElement;
 typedef xmlElement *xmlElementPtr;
 
@@ -188,7 +190,7 @@
 #endif
     xmlElementType  type;       /* XML_ATTRIBUTE_NODE, must be third ! */
     struct xmlNode *node;	/* attr->node link */
-    struct xmlAttr *next;	/* parent->childs link */
+    struct xmlAttr *next;	/* attribute list link */
     const CHAR     *name;       /* the name of the property */
     struct xmlNode *val;        /* the value of the property */
 } xmlAttr;
@@ -310,6 +312,7 @@
 xmlNodePtr xmlNewTextLen(const CHAR *content, int len);
 xmlNodePtr xmlNewDocComment(xmlDocPtr doc, const CHAR *content);
 xmlNodePtr xmlNewComment(const CHAR *content);
+xmlNodePtr xmlNewCDataBlock(xmlDocPtr doc, const CHAR *content, int len);
 xmlNodePtr xmlNewReference(xmlDocPtr doc, const CHAR *name);
 xmlNodePtr xmlCopyNode(xmlNodePtr node, int recursive);
 xmlNodePtr xmlCopyNodeList(xmlNodePtr node);
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index 084d97f..7d58459 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -11,6 +11,22 @@
 #define __XML_VALID_H__
 #include "tree.h"
 
+/**
+ * an xmlValidCtxt is used for error reporting when validating
+ */
+
+typedef void (*xmlValidityErrorFunc) (void *ctx, const char *msg, ...);
+typedef void (*xmlValidityWarningFunc) (void *ctx, const char *msg, ...);
+
+typedef struct xmlValidCtxt {
+    void *userData;			/* user specific data block */
+    xmlValidityErrorFunc error;		/* the callback in case of errors */
+    xmlValidityWarningFunc warning;	/* the callback in case of warning */
+} xmlValidCtxt, *xmlValidCtxtPtr;
+
+extern void xmlParserValidityError(void *ctx, const char *msg, ...);
+extern void xmlParserValidityWarning(void *ctx, const char *msg, ...);
+
 /*
  * ALl notation declarations are stored in a table
  * there is one table per DTD
@@ -21,7 +37,7 @@
 typedef struct xmlNotationTable {
     int nb_notations;		/* number of notations stored */
     int max_notations;		/* maximum number of notations */
-    xmlNotationPtr table;	/* the table of attributes */
+    xmlNotationPtr *table;	/* the table of attributes */
 } xmlNotationTable;
 typedef xmlNotationTable *xmlNotationTablePtr;
 
@@ -35,7 +51,7 @@
 typedef struct xmlElementTable {
     int nb_elements;		/* number of elements stored */
     int max_elements;		/* maximum number of elements */
-    xmlElementPtr table;	/* the table of elements */
+    xmlElementPtr *table;	/* the table of elements */
 } xmlElementTable;
 typedef xmlElementTable *xmlElementTablePtr;
 
@@ -49,13 +65,13 @@
 typedef struct xmlAttributeTable {
     int nb_attributes;		/* number of attributes stored */
     int max_attributes;		/* maximum number of attributes */
-    xmlAttributePtr table;	/* the table of attributes */
+    xmlAttributePtr *table;	/* the table of attributes */
 } xmlAttributeTable;
 typedef xmlAttributeTable *xmlAttributeTablePtr;
 
 /* Notation */
-xmlNotationPtr xmlAddNotationDecl(xmlDtdPtr dtd, const CHAR *name,
-	       const CHAR *PublicID, const CHAR *SystemID);
+xmlNotationPtr xmlAddNotationDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd,
+	   const CHAR *name, const CHAR *PublicID, const CHAR *SystemID);
 xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
 void xmlFreeNotationTable(xmlNotationTablePtr table);
 void xmlDumpNotationTable(xmlBufferPtr buf, xmlNotationTablePtr table);
@@ -66,8 +82,8 @@
 void xmlFreeElementContent(xmlElementContentPtr cur);
 
 /* Element */
-xmlElementPtr xmlAddElementDecl(xmlDtdPtr dtd, const CHAR *name, int type, 
-                                       xmlElementContentPtr content);
+xmlElementPtr xmlAddElementDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd,
+         const CHAR *name, int type, xmlElementContentPtr content);
 xmlElementTablePtr xmlCopyElementTable(xmlElementTablePtr table);
 void xmlFreeElementTable(xmlElementTablePtr table);
 void xmlDumpElementTable(xmlBufferPtr buf, xmlElementTablePtr table);
@@ -78,11 +94,32 @@
 xmlEnumerationPtr xmlCopyEnumeration(xmlEnumerationPtr cur);
 
 /* Attribute */
-xmlAttributePtr xmlAddAttributeDecl(xmlDtdPtr dtd, const CHAR *elem,
-	       const CHAR *name, int type, int def,
+xmlAttributePtr xmlAddAttributeDecl(xmlValidCtxtPtr ctxt, xmlDtdPtr dtd,
+               const CHAR *elem, const CHAR *name, int type, int def,
 	       const CHAR *defaultValue, xmlEnumerationPtr tree);
 xmlAttributeTablePtr xmlCopyAttributeTable(xmlAttributeTablePtr table);
 void xmlFreeAttributeTable(xmlAttributeTablePtr table);
 void xmlDumpAttributeTable(xmlBufferPtr buf, xmlAttributeTablePtr table);
 
+/**
+ * The public function calls related to validity checking
+ */
+
+int xmlValidateRoot(xmlValidCtxtPtr ctxt, xmlDocPtr doc);
+int xmlValidateElementDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+                           xmlElementPtr elem);
+int xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+                             xmlAttributePtr attr);
+int xmlValidateNotationDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+                            xmlNotationPtr nota);
+int xmlValidateDtd(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlDtdPtr dtd);
+
+int xmlValidateDocument(xmlValidCtxtPtr ctxt, xmlDocPtr doc);
+int xmlValidateElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem);
+int xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+                          xmlNodePtr elem);
+int xmlValidateOneAttribute(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
+			xmlNodePtr elem, xmlAttrPtr attr, const CHAR *value);
+
+int xmlIsMixedElement(xmlDocPtr doc, const CHAR *name);
 #endif /* __XML_VALID_H__ */
diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h
index bdeecc3..0e086d7 100644
--- a/include/libxml/xmlIO.h
+++ b/include/libxml/xmlIO.h
@@ -45,6 +45,7 @@
 int xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len);
 
 void xmlFreeParserInputBuffer(xmlParserInputBufferPtr in);
+char *xmlParserGetDirectory(const char *filename);
 
 #ifdef __cplusplus
 }