xml_parser.c - platform/external/libxml2 - Gitiles

 /*
  * parser.c : an XML 1.0 non-verifying parser
  *
  * See Copyright for the status of this software.
  *
  * $Id$
  */

 #include <config.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <string.h> /* for memset() only */
 #include <malloc.h>
 #include <sys/stat.h>
 #ifdef HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 #ifdef HAVE_ZLIB_H
 #include <zlib.h>
 #endif

 #include "xml_tree.h"
 #include "xml_parser.h"
 #include "xml_entities.h"

 /*
  * A few macros needed to help building the parser.
  */

 #ifdef UNICODE
 /*
  * UNICODE version of the macros. Incomplete now TODO !!!!
  */
 #define IS_CHAR(c)							\
     (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) ||			\
      (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))

 #define SKIP_BLANKS(p) 							\
     while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) ||		\
            (*(p) == 0x3000)) (p)++;

 /* I'm too lazy to complete this one TODO !!!! */
 #define IS_BASECHAR(c)							\
     ((((c) >= 0x41) && ((c) <= 0x5a)) ||				\
      (((c) >= 0x61) && ((c) <= 0x7a)) ||				\
      (((c) >= 0xaa) && ((c) <= 0x5b)) ||				\
      (((c) >= 0xc0) && ((c) <= 0xd6)) ||				\
      (((c) >= 0xd8) && ((c) <= 0xf6)) ||				\
      (((c) >= 0xf8) && ((c) <= 0xff)) ||				\
       ((c) == 0xba))

 /* I'm too lazy to complete this one TODO !!!! */
 #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))

 /* I'm too lazy to complete this one TODO !!!! */
 #define IS_COMBINING(c) 0

 #define IS_IGNORABLE(c)							\
     ((((c) >= 0x200c) && ((c) <= 0x200f)) ||				\
      (((c) >= 0x202a) && ((c) <= 0x202e)) ||				\
      (((c) >= 0x206a) && ((c) <= 0x206f)) ||				\
       ((c) == 0xfeff))

 #define IS_EXTENDER(c)							\
     (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) ||		\
      ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) ||		\
      ((c) == 0xec6) || ((c) == 0x3005)					\
      (((c) >= 0x3031) && ((c) <= 0x3035)) ||				\
      (((c) >= 0x309b) && ((c) <= 0x309e)) ||				\
      (((c) >= 0x30fc) && ((c) <= 0x30fe)) ||				\
      (((c) >= 0xff70) && ((c) <= 0xff9e)) ||				\
       ((c) == 0xff9f))

 #define IS_IDEOGRAPHIC(c)						\
     ((((c) >= 0x4e00) && ((c) <= 0x9fa5)) ||				\
      (((c) >= 0xf900) && ((c) <= 0xfa2d)) ||				\
      (((c) >= 0x3021) && ((c) <= 0x3029)) ||				\
       ((c) == 0x3007))

 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))

 /* I'm too lazy to complete this one ! */
 #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
 #else
 /*
  * 8bits / ASCII version of the macros.
  */
 #define IS_CHAR(c)							\
     (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20))

 #define IS_BASECHAR(c)							\
     ((((c) >= 0x41) && ((c) <= 0x5a)) ||				\
      (((c) >= 0x61) && ((c) <= 0x7a)) ||				\
      (((c) >= 0xaa) && ((c) <= 0x5b)) ||				\
      (((c) >= 0xc0) && ((c) <= 0xd6)) ||				\
      (((c) >= 0xd8) && ((c) <= 0xf6)) ||				\
      (((c) >= 0xf8) && ((c) <= 0xff)) ||				\
       ((c) == 0xba))

 #define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))

 #define IS_LETTER(c) IS_BASECHAR(c)

 #define IS_COMBINING(c) 0

 #define IS_IGNORABLE(c) 0

 #define IS_EXTENDER(c) ((c) == 0xb7)

 #define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
 #endif


 #define SKIP_EOL(p) 							\
     if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; }			\
     if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }

 #define SKIP_BLANKS(p) 							\
     while (IS_BLANK(*(p))) (p)++;

 #define MOVETO_ENDTAG(p)						\
     while (IS_CHAR(*p) && (*(p) != '>')) (p)++;

 #define MOVETO_STARTTAG(p)						\
     while (IS_CHAR(*p) && (*(p) != '<')) (p)++;

 /*
  * Forward definition for recusive behaviour.
  */
 xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);

 /*
  * xmlHandleData : this routine represent's the specific application
  *    behaviour when reading a piece of text.
  *
  * For example in WebDav, any piece made only of blanks is eliminated
  */

 CHAR *xmlHandleData(CHAR *in) {
     CHAR *cur;

     if (in == NULL) return(NULL);
     cur = in;
     while (IS_CHAR(*cur)) {
         if (!IS_BLANK(*cur)) goto not_blank;
 	cur++;
     }
     free(in);
     return(NULL);

 not_blank:
     return(in);
 }

 /*
  * xmlStrndup : a strdup for array of CHAR's
  */

 CHAR *xmlStrndup(const CHAR *cur, int len) {
     CHAR *ret = malloc((len + 1) * sizeof(CHAR));

     if (ret == NULL) {
         fprintf(stderr, "malloc of %d byte failed\n",
 	        (len + 1) * sizeof(CHAR));
         return(NULL);
     }
     memcpy(ret, cur, len * sizeof(CHAR));
     ret[len] = 0;
     return(ret);
 }

 /*
  * xmlStrdup : a strdup for CHAR's
  */

 CHAR *xmlStrdup(const CHAR *cur) {
     const CHAR *p = cur;

     while (IS_CHAR(*p)) p++;
     return(xmlStrndup(cur, p - cur));
 }

 /*
  * xmlStrcmp : a strcmp for CHAR's
  */

 int xmlStrcmp(const CHAR *str1, const CHAR *str2) {
     register int tmp;

     do {
         tmp = *str1++ - *str2++;
 	if (tmp != 0) return(tmp);
     } while ((*str1 != 0) && (*str2 != 0));
     return (*str1 - *str2);
 }

 /*
  * xmlStrncmp : a strncmp for CHAR's
  */

 int xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
     register int tmp;

     if (len <= 0) return(0);
     do {
         tmp = *str1++ - *str2++;
 	if (tmp != 0) return(tmp);
 	len--;
         if (len <= 0) return(0);
     } while ((*str1 != 0) && (*str2 != 0));
     return (*str1 - *str2);
 }

 /*
  * xmlStrchr : a strchr for CHAR's
  */

 CHAR *xmlStrchr(const CHAR *str, CHAR val) {
     while (*str != 0) {
         if (*str == val) return((CHAR *) str);
 	str++;
     }
     return(NULL);
 }

 /*
  * xmlParseName : parse an XML name.
  */

 CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
     const CHAR *q;
     CHAR *ret = NULL;

     /*
      * Name ::= (Letter | '_') (NameChar)*
      */
     if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
     q = ctxt->cur++;
     while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
            (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') || (ctxt->cur[0] == '_') ||
 	   (ctxt->cur[0] == ':') ||
 	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
 	   (IS_EXTENDER(ctxt->cur[0])))
 	ctxt->cur++;

     ret = xmlStrndup(q, ctxt->cur - q);

     return(ret);
 }

 /*
  * Parse and return a string between quotes or doublequotes
  */
 CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
     CHAR *ret = NULL;
     const CHAR *q;

     if (ctxt->cur[0] == '"') {
         ctxt->cur++;
 	q = ctxt->cur;
 	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
 	if (ctxt->cur[0] != '"')
 	    fprintf(stderr, "String not closed \"%.50s\n", q);
         else {
             ret = xmlStrndup(q, ctxt->cur - q);
 	    ctxt->cur++;
 	}
     } else if (ctxt->cur[0] == '\''){
         ctxt->cur++;
 	q = ctxt->cur;
 	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
 	if (ctxt->cur[0] != '\'')
 	    fprintf(stderr, "String not closed '%.50s\n", q);
         else {
             ret = xmlStrndup(q, ctxt->cur - q);
 	    ctxt->cur++;
 	}
     }
     return(ret);
 }

 /*
  * Skip an XML (SGML) comment <!-- .... -->
  *
  * TODO !!!! Save the comment in the tree !!!
  */
 void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
     const CHAR *q, *start;
     const CHAR *r;

     /*
      * An extra check may avoid errors and isn't that costly !
      */
     if ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '!') ||
         (ctxt->cur[2] != '-') || (ctxt->cur[3] != '-')) return;

     ctxt->cur += 4;
     start = q = ctxt->cur;
     ctxt->cur++;
     r = ctxt->cur;
     ctxt->cur++;
     while (IS_CHAR(ctxt->cur[0]) &&
            ((ctxt->cur[0] == ':') || (ctxt->cur[0] != '>') ||
 	    (*r != '-') || (*q != '-'))) {
         ctxt->cur++;r++;q++;
     }
     if (!IS_CHAR(ctxt->cur[0])) {
         fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
 	ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
     } else {
         ctxt->cur++;
     }
 }

 /*
  * xmlParseNamespace: parse specific '<?namespace ...' constructs.
  */

 void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
     CHAR *href = NULL;
     CHAR *AS = NULL;
     int garbage = 0;

     /*
      * We just skipped "namespace" or "xml:namespace"
      */
     SKIP_BLANKS(ctxt->cur);

     while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
 	/*
 	 * We can have "ns" or "prefix" attributes
 	 * Old encoding as 'href' or 'AS' attributes is still supported
 	 */
 	if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
 	    garbage = 0;
 	    ctxt->cur += 2;
 	    SKIP_BLANKS(ctxt->cur);

 	    if (ctxt->cur[0] != '=') continue;
 	    ctxt->cur++;
 	    SKIP_BLANKS(ctxt->cur);

 	    href = xmlParseQuotedString(ctxt);
 	    SKIP_BLANKS(ctxt->cur);
 	} else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
 	    (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
 	    garbage = 0;
 	    ctxt->cur += 4;
 	    SKIP_BLANKS(ctxt->cur);

 	    if (ctxt->cur[0] != '=') continue;
 	    ctxt->cur++;
 	    SKIP_BLANKS(ctxt->cur);

 	    href = xmlParseQuotedString(ctxt);
 	    SKIP_BLANKS(ctxt->cur);
 	} else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
 	           (ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
 	           (ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
 	    garbage = 0;
 	    ctxt->cur += 6;
 	    SKIP_BLANKS(ctxt->cur);

 	    if (ctxt->cur[0] != '=') continue;
 	    ctxt->cur++;
 	    SKIP_BLANKS(ctxt->cur);

 	    AS = xmlParseQuotedString(ctxt);
 	    SKIP_BLANKS(ctxt->cur);
 	} else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
 	    garbage = 0;
 	    ctxt->cur += 2;
 	    SKIP_BLANKS(ctxt->cur);

 	    if (ctxt->cur[0] != '=') continue;
 	    ctxt->cur++;
 	    SKIP_BLANKS(ctxt->cur);

 	    AS = xmlParseQuotedString(ctxt);
 	    SKIP_BLANKS(ctxt->cur);
 	} else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
 	    garbage = 0;
 	    ctxt->cur ++;
 	} else {
             /*
 	     * Found garbage when parsing the namespace
 	     */
 	    if (!garbage) fprintf(stderr,
 	          "\nxmlParseNamespace found garbage: ");
             fprintf(stderr, "%c", ctxt->cur[0]);
             ctxt->cur++;
         }
     }

     MOVETO_ENDTAG(ctxt->cur);
     ctxt->cur++;

     /*
      * Register the DTD.
      */
     if (href != NULL)
         xmlNewDtd(ctxt->doc, href, AS);

     if (AS != NULL) free(AS);
     if (href != NULL) free(href);
 }

 /*
  * xmlParsePI: parse an XML Processing Instruction.
  */

 void xmlParsePI(xmlParserCtxtPtr ctxt) {
     if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
 	/*
 	 * this is a Processing Instruction.
 	 */
 	ctxt->cur += 2;

 	/*
 	 * Special for WebDav, support for the Processing Instruction
 	 * '<?namespace ...' contruct in the header of the XML document.
 	 */
 	if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'a') &&
 	    (ctxt->cur[2] == 'm') && (ctxt->cur[3] == 'e') &&
 	    (ctxt->cur[4] == 's') && (ctxt->cur[5] == 'p') &&
 	    (ctxt->cur[6] == 'a') && (ctxt->cur[7] == 'c') &&
 	    (ctxt->cur[8] == 'e')) {
 	    ctxt->cur += 9;
 	    xmlParseNamespace(ctxt);
 	} else if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
 	           (ctxt->cur[2] == 'l') && (ctxt->cur[3] == ':') &&
 	           (ctxt->cur[4] == 'n') && (ctxt->cur[5] == 'a') &&
 	           (ctxt->cur[6] == 'm') && (ctxt->cur[7] == 'e') &&
 	           (ctxt->cur[8] == 's') && (ctxt->cur[9] == 'p') &&
 	           (ctxt->cur[10] == 'a') && (ctxt->cur[11] == 'c') &&
 	           (ctxt->cur[12] == 'e')) {
 	    ctxt->cur += 13;
 	    xmlParseNamespace(ctxt);
 	} else {
 	    /* Unknown PI, ignore it ! */
 	    fprintf(stderr, "xmlParsePI : skipping unknown PI %30s\n",
 	            ctxt->cur);
 	    MOVETO_ENDTAG(ctxt->cur);
 	    ctxt->cur++;
 	}
     }
 }

 /*
  * xmlParseAttribute: parse a start of tag.
  *
  * Attribute ::= Name Eq AttValue
  */

 void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
     const CHAR *q;
     CHAR *name, *value = NULL;

     if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
         return;
     }
     q = ctxt->cur++;
     while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
            (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
 	   (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
 	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
 	   (IS_EXTENDER(ctxt->cur[0])))
 	ctxt->cur++;
     name = xmlStrndup(q, ctxt->cur - q);

     /*
      * We should have the equal, we are laxist here and allow attributes
      * without values and extra spaces.
      */
     SKIP_BLANKS(ctxt->cur);
     if (ctxt->cur[0] == '=') {
         ctxt->cur++;
 	SKIP_BLANKS(ctxt->cur);
 	if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) {
 	    fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
 	            q);
 	} else
 	    value = xmlParseQuotedString(ctxt);
     }

     /*
      * Add the attribute to the node.
      */
     if (name != NULL) {
 	xmlNewProp(node, name, value);
         free(name);
     }
     if ( value != NULL )
       free(value);
 }

 /*
  * xmlParseStartTag: parse a start of tag.
  */

 xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
     const CHAR *q;
     CHAR *ns, *name;
     xmlDtdPtr dtd = NULL;
     xmlNodePtr ret = NULL;

     /*
      * Theorically one should just parse a Name, but with the addition
      * of the namespace needed for WebDav, it's a bit more complicated
      * since the element name may be prefixed by a namespace prefix.
      *
      * QName ::= (NSPart ':')? LocalPart
      * NSPart ::= Name
      * LocalPart ::= Name
      * STag ::= '<' QName (S Attribute)* S? '>'
      *
      * instead of :
      *
      * STag ::= '<' QName (S Attribute)* S? '>'
      */
     if (ctxt->cur[0] != '<') return(NULL);
     ctxt->cur++;

     if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
     q = ctxt->cur++;
     while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
            (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
 	   (ctxt->cur[0] == '_') ||
 	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
 	   (IS_EXTENDER(ctxt->cur[0])))
 	ctxt->cur++;

     if (ctxt->cur[0] == ':') {
         ns = xmlStrndup(q, ctxt->cur - q);

 	ctxt->cur++; /* skip the column */
 	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
 	    fprintf(stderr,
 	       "Start tag : no element name after namespace identifier %.20s\n",
 	            q);
             free(ns);
 	    return(NULL);
 	}
 	q = ctxt->cur++;
 	while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
 	       (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
 	       (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
 	       (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
 	       (IS_EXTENDER(ctxt->cur[0])))
 	    ctxt->cur++;
         name = xmlStrndup(q, ctxt->cur - q);

 	/*
 	 * Search the DTD associated to ns.
 	 */
 	dtd = xmlSearchDtd(ctxt->doc, ns);
 	if (dtd == NULL)
 	    fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
 	free(ns);
     } else
         name = xmlStrndup(q, ctxt->cur - q);

     ret = xmlNewNode(dtd, name, NULL);

     /*
      * Now parse the attributes, it ends up with the ending
      *
      * (S Attribute)* S?
      */
     SKIP_BLANKS(ctxt->cur);
     while ((IS_CHAR(ctxt->cur[0])) &&
            (ctxt->cur[0] != '>') &&
 	   ((ctxt->cur[0] != '/') || (ctxt->cur[1] != '>'))) {
 	if (IS_LETTER(ctxt->cur[0]) || (ctxt->cur[0] == '_'))
 	    xmlParseAttribute(ctxt, ret);
 	else {
 	    /* We should warn TODO !!! */
 	    ctxt->cur++;
 	}
 	SKIP_BLANKS(ctxt->cur);
     }

     return(ret);
 }

 /*
  * xmlParseEndTag: parse an end of tag, note that the '</' part has
  * already been read.
  */

 void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
     const CHAR *q;
     CHAR *ns, *name;
     xmlDtdPtr dtd = NULL;

     *dtdPtr = NULL;
     *tagPtr = NULL;

     /*
      * Theorically one should just parse a Name, but with the addition
      * of the namespace needed for WebDav, it's a bit more complicated
      * since the element name may be prefixed by a namespace prefix.
      *
      * QName ::= (NSPart ':')? LocalPart
      * NSPart ::= Name
      * LocalPart ::= Name
      * ETag ::= '</' QName S? '>'
      *
      * instead of :
      *
      * ETag ::= '</' Name S? '>'
      */
     if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return;
     q = ctxt->cur++;
     while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
            (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
 	   (ctxt->cur[0] == '_') ||
 	   (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
 	   (IS_EXTENDER(ctxt->cur[0])))
 	ctxt->cur++;

     if (ctxt->cur[0] == ':') {
         ns = xmlStrndup(q, ctxt->cur - q);

 	ctxt->cur++; /* skip the column */
 	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
 	    fprintf(stderr,
 	        "End tag : no element name after namespace identifier %.20s\n",
 	            q);
             free(ns);
 	    return;
 	}
 	q = ctxt->cur++;
 	while ((IS_LETTER(ctxt->cur[0])) || (IS_DIGIT(ctxt->cur[0])) ||
 	       (ctxt->cur[0] == '.') || (ctxt->cur[0] == '-') ||
 	       (ctxt->cur[0] == '_') || (ctxt->cur[0] == ':') ||
 	       (IS_COMBINING(ctxt->cur[0])) || (IS_IGNORABLE(ctxt->cur[0])) ||
 	       (IS_EXTENDER(ctxt->cur[0])))
 	    ctxt->cur++;
         name = xmlStrndup(q, ctxt->cur - q);

 	/*
 	 * Search the DTD associated to ns.
 	 */
 	dtd = xmlSearchDtd(ctxt->doc, ns);
 	if (dtd == NULL)
 	    fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
 	free(ns);
     } else
         name = xmlStrndup(q, ctxt->cur - q);

     *dtdPtr = dtd;
     *tagPtr = name;

     /*
      * We should definitely be at the ending "S? '>'" part
      */
     SKIP_BLANKS(ctxt->cur);
     if ((!IS_CHAR(ctxt->cur[0])) || (ctxt->cur[0] != '>')) {
         fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
 	/*
 	 * Note : skipping to the next '>' is probably otherkill,
 	 * especially in case the '>' is hust missing.
 	 *
 	 * Otherwise add:
 	 *  MOVETO_ENDTAG(ctxt->cur);
 	 */
     } else
 	ctxt->cur++;

     return;
 }

 /*
  * xmlParseCDSect: escaped pure raw content.
  */
 CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
     const CHAR *r, *s, *base;
     CHAR *ret;

     base = ctxt->cur;
     if (!IS_CHAR(ctxt->cur[0])) {
         fprintf(stderr, "CData section not finished : %.20s\n", base);
         return(NULL);
     }
     r = ctxt->cur++;
     if (!IS_CHAR(ctxt->cur[0])) {
         fprintf(stderr, "CData section not finished : %.20s\n", base);
         return(NULL);
     }
     s = ctxt->cur++;
     while (IS_CHAR(ctxt->cur[0]) &&
            ((*r != ']') || (*s != ']') || (ctxt->cur[0] != '>'))) {
         r++;s++;ctxt->cur++;
     }
     if (!IS_CHAR(ctxt->cur[0])) {
         fprintf(stderr, "CData section not finished : %.20s\n", base);
         return(NULL);
     }
     ret = xmlStrndup(base, ctxt->cur-base);

     return(ret);
 }

 /*
  * xmlParseContent: a content is
  * (element | PCData | Reference | CDSect | PI | Comment)
  *
  * element : starts by '<'
  * PCData : any CHAR but '&' or '<'
  * Reference : starts by '&'
  * CDSect : starts by '<![CDATA['
  * PI : starts by '<?'
  */

 xmlNodePtr xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
     const CHAR *q;
     CHAR *data = NULL;
     xmlNodePtr ret = NULL;

     /*
      * First case : a Processing Instruction.
      */
     if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
 	xmlParsePI(ctxt);
     }
     /*
      * Second case : a CDSection
      */
     if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
         (ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
 	(ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
 	(ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
 	(ctxt->cur[8] == '[')) {
 	ctxt->cur += 9;
 	data = xmlParseCDSect(ctxt);
     }
     /*
      * Third case :  a sub-element.
      */
     else if (ctxt->cur[0] == '<') {
         ret = xmlParseElement(ctxt);
     }
     /*
      * Last case, text. Note that References are handled directly.
      */
     else {
         q = ctxt->cur;
 	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;

 	if (!IS_CHAR(ctxt->cur[0])) {
 	    fprintf(stderr, "Truncated content : %.50s\n", q);
 	    return(NULL);
 	}

 	/*
 	 * Do the Entities decoding...
 	 */
 	data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
     }

     /*
      * Handle the data if any. If there is no child
      * add it as content, otherwise create a new node of type text.
      */
     if (data != NULL)
 	data = xmlHandleData(data);
     if (data != NULL) {
 	if (node->childs == NULL)
 	    xmlNodeSetContent(node, data);
 	else
 	    ret = xmlNewText(data);
         free(data);
     }

     return(ret);
 }

 /*
  * xmlParseElement: parse an XML element
  */

 xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
     xmlNodePtr ret, child;
     const CHAR *openTag = ctxt->cur;
     const CHAR *closeTag = ctxt->cur;

     ret = xmlParseStartTag(ctxt);
     if (ret == NULL) {
         return(NULL);
     }

     /*
      * Check for an Empty Element.
      */
     if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
         ctxt->cur += 2;
 	return(ret);
     }
     if (ctxt->cur[0] == '>') ctxt->cur++;
     else {
         fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
 	return(NULL);
     }

     /*
      * Parse the content of the element:
      * (element | PCData | Reference | CDSect | PI | Comment) *
      *
      * element : starts by '<'
      * PCData : any CHAR but '&' or '<'
      * Reference : starts by '&'
      * CDSect : starts by '<![CDATA['
      * PI : starts by '<?'
      *
      * The loop stops upon detection of an end of tag '</'
      */
     while ((IS_CHAR(ctxt->cur[0])) &&
            ((ctxt->cur[0] != '<') || (ctxt->cur[1] != '/'))) {
         child = xmlParseContent(ctxt, ret);
 	if (child != NULL)
 	    xmlAddChild(ret, child);
     }
     if (!IS_CHAR(ctxt->cur[0])) {
         fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
 	return(NULL);
     }

     /*
      * parse the end of tag : '</' has been detected.
      */
     ctxt->cur += 2;
     if (ctxt->cur[0] == '>') ctxt->cur++; /* simplified closing </> */
     else {
         CHAR *endTag;
 	xmlDtdPtr endDtd;

 	xmlParseEndTag(ctxt, &endDtd, &endTag);

         /*
 	 * Check that the Name in the ETag is the same as in the STag.
 	 */
 	if (endDtd != ret->dtd) {
 	    fprintf(stderr, "Start and End tags don't use the same DTD:\n");
 	    fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
 	}
 	if (strcmp(ret->name, endTag)) {
 	    fprintf(stderr, "Start and End tags don't use the same name:\n");
 	    fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
 	}

         if ( endTag != NULL )
           free(endTag);
     }

     return(ret);
 }

 /*
  * xmlParseXMLDecl: parse an XML declaration header
  */

 void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
     CHAR *version;

     /*
      * We know that '<?xml' is here.
      */
     ctxt->cur += 5;

     /*
      * Parse the version info
      */
     SKIP_BLANKS(ctxt->cur);

     /*
      * We should have 'version=' here !
      */
     if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
         (ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
 	(ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
 	(ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) {
 	ctxt->cur += 8;
 	version = xmlParseQuotedString(ctxt);
 	if (version == NULL)
 	    ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
 	else {
 	    ctxt->doc = xmlNewDoc(version);
 	    free(version);
 	}
     } else {
         ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
     }

     /*
      * We should check for Required Markup Declaration TODO !!!!
      */
     MOVETO_ENDTAG(ctxt->cur);
     ctxt->cur++;

 }

 /*
  * xmlParseMisc: parse an XML Misc optionnal field.
  * (Comment | PI | S)*
  */

 void xmlParseMisc(xmlParserCtxtPtr ctxt) {
     while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) ||
            ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
 	    (ctxt->cur[2] == '-') && (ctxt->cur[2] == '-')) ||
            IS_BLANK(ctxt->cur[0])) {
         if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
 	    xmlParsePI(ctxt);
 	} else if (IS_BLANK(ctxt->cur[0])) {
 	    ctxt->cur++;
 	} else
 	    xmlParserSkipComment(ctxt);
     }
 }

 /*
  * xmlParseDocument : parse an XML document and build a tree.
  */

 int xmlParseDocument(xmlParserCtxtPtr ctxt) {
     /*
      * We should check for encoding here and plug-in some
      * conversion code TODO !!!!
      */

     /*
      * Wipe out everything which is before the first '<'
      */
     SKIP_BLANKS(ctxt->cur);

     /*
      * Check for the XMLDecl in the Prolog.
      */
     if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
         (ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
 	(ctxt->cur[4] == 'l')) {
 	xmlParseXMLDecl(ctxt);
 	/* SKIP_EOL(cur); */
 	SKIP_BLANKS(ctxt->cur);
     } else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
         (ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
 	(ctxt->cur[4] == 'L')) {
 	/*
 	 * The first drafts were using <?XML and the final W3C REC
 	 * now use <?xml ...
 	 */
 	xmlParseXMLDecl(ctxt);
 	/* SKIP_EOL(cur); */
 	SKIP_BLANKS(ctxt->cur);
     } else {
         ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
     }

     /*
      * The Misc part of the Prolog
      * (Comment | PI | S) *
      */
     xmlParseMisc(ctxt);

     /*
      * Time to start parsing
      */
     ctxt->doc->root = xmlParseElement(ctxt);

     return(0);
 }

 /*
  * xmlParseDoc : parse an XML in-memory document and build a tree.
  */

 xmlDocPtr xmlParseDoc(CHAR *cur) {
     xmlDocPtr ret;
     xmlParserCtxtPtr ctxt;

     if (cur == NULL) return(NULL);

     ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
     if (ctxt == NULL) {
         perror("malloc");
 	return(NULL);
     }

     xmlInitParserCtxt(ctxt);
     ctxt->base = cur;
     ctxt->cur = cur;

     xmlParseDocument(ctxt);
     ret = ctxt->doc;
     free(ctxt->nodes);
     free(ctxt);

     return(ret);
 }

 /*
  * xmlParseFile : parse an XML file and build a tree.
  */

 xmlDocPtr xmlParseFile(const char *filename) {
     xmlDocPtr ret;
 #ifdef HAVE_ZLIB_H
     gzFile input;
 #else
     int input;
 #endif
     int res;
     struct stat buf;
     char *buffer;
     xmlParserCtxtPtr ctxt;

     res = stat(filename, &buf);
     if (res < 0) return(NULL);

 #ifdef HAVE_ZLIB_H
 retry_bigger:
     buffer = malloc((buf.st_size * 20) + 100);
 #else
     buffer = malloc(buf.st_size + 100);
 #endif
     if (buffer == NULL) {
 	perror("malloc");
         return(NULL);
     }

     memset(buffer, 0, sizeof(buffer));
 #ifdef HAVE_ZLIB_H
     input = gzopen (filename, "r");
     if (input == NULL) {
         fprintf (stderr, "Cannot read file %s :\n", filename);
 	perror ("gzopen failed");
 	return(NULL);
     }
 #else
     input = open (filename, O_RDONLY);
     if (input < 0) {
         fprintf (stderr, "Cannot read file %s :\n", filename);
 	perror ("open failed");
 	return(NULL);
     }
 #endif
 #ifdef HAVE_ZLIB_H
     res = gzread(input, buffer, 20 * buf.st_size);
 #else
     res = read(input, buffer, buf.st_size);
 #endif
     if (res < 0) {
         fprintf (stderr, "Cannot read file %s :\n", filename);
 #ifdef HAVE_ZLIB_H
 	perror ("gzread failed");
 #else
 	perror ("read failed");
 #endif
 	return(NULL);
     }
 #ifdef HAVE_ZLIB_H
     gzclose(input);
     if (res >= 20 * buf.st_size) {
         free(buffer);
 	buf.st_size *= 2;
 	goto retry_bigger;
     }
     buf.st_size = res;
 #else
     close(input);
 #endif


     ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
     if (ctxt == NULL) {
         perror("malloc");
 	return(NULL);
     }
     buffer[buf.st_size] = '\0';

     xmlInitParserCtxt(ctxt);
     ctxt->filename = filename;
     ctxt->base = buffer;
     ctxt->cur = buffer;

     xmlParseDocument(ctxt);
     ret = ctxt->doc;
     free(buffer);
     free(ctxt->nodes);
     free(ctxt);

     return(ret);
 }

 /*
  * xmlParseFile : parse an XML memory block and build a tree.
  */

 xmlDocPtr xmlParseMemory(char *buffer, int size) {
     xmlDocPtr ret;
     xmlParserCtxtPtr ctxt;

     ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
     if (ctxt == NULL) {
         perror("malloc");
 	return(NULL);
     }

     buffer[size - 1] = '\0';

     xmlInitParserCtxt(ctxt);
     ctxt->base = buffer;
     ctxt->cur = buffer;

     xmlParseDocument(ctxt);
     ret = ctxt->doc;
     free(ctxt->nodes);
     free(ctxt);

     return(ret);
 }


 /* Initialize parser context */
 void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
 {
     int i;

     ctxt->filename = NULL;
     ctxt->base = NULL;
     ctxt->cur = NULL;
     ctxt->line = 1;
     ctxt->col = 1;
     ctxt->doc = NULL;
     ctxt->depth = 0;
     ctxt->max_depth = 10;
     ctxt->nodes = (xmlNodePtr *) malloc(ctxt->max_depth * sizeof(xmlNodePtr));
     if (ctxt->nodes == NULL) {
 	fprintf(stderr, "malloc of %d byte failed\n",
 		ctxt->max_depth * sizeof(xmlNodePtr));
 	ctxt->max_depth = 0;
     } else {
         for (i = 0;i < ctxt->max_depth;i++)
 	    ctxt->nodes[i] = NULL;
     }
 }


 /*
  * Clear (release owned resources) and reinitialize context
  */
 void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
 {
     xmlInitParserCtxt(ctx);
 }


 /*
  * Setup the parser context to parse a new buffer; Clears any prior
  * contents from the parser context. The buffer parameter must not be
  * NULL, but the filename parameter can be
  */
 void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
                              const char* filename)
 {
   xmlClearParserCtxt(ctxt);
   ctxt->base = buffer;
   ctxt->cur = buffer;
   ctxt->filename = filename;
 }


 void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
 {
   fputs(msg, stderr);
 }