- doc/encoding.html doc/xml.html: added I18N doc
- encoding.[ch] HTMLtree.[ch] parser.c HTMLparser.c: I18N encoding
  improvements, both parser and filters, added ASCII & HTML,
  fixed the ISO-Latin-1 one
- xmllint.c testHTML.c: added/made visible --encode
- debugXML.c : cleanup
- most .c files: applied patches due to warning on Windows and
  when using Sun Pro cc compiler
- xpath.c : cleanup memleaks
- nanoftp.c : added a TESTING preprocessor flag for standalong
  compile so that people can report bugs more easilly
- nanohttp.c : ditched socklen_t which was a portability mess
  and replaced it with unsigned int.
- tree.[ch]: added xmlHasProp()
- TODO: updated
- test/ : added more test for entities, NS, encoding, HTML, wap
- configure.in: preparing for 2.2.0 release
Daniel
diff --git a/ChangeLog b/ChangeLog
index fb47706..c5019df 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+Fri Jul 14 16:12:20 MEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+	* doc/encoding.html doc/xml.html: added I18N doc
+	* encoding.[ch] HTMLtree.[ch] parser.c HTMLparser.c: I18N encoding
+	  improvements, both parser and filters, added ASCII & HTML,
+	  fixed the ISO-Latin-1 one
+	* xmllint.c testHTML.c: added/made visible --encode
+	* debugXML.c : cleanup
+	* most .c files: applied patches due to warning on Windows and
+	  when using Sun Pro cc compiler
+	* xpath.c : cleanup memleaks
+	* nanoftp.c : added a TESTING preprocessor flag for standalong
+	  compile so that people can report bugs more easilly
+	* nanohttp.c : ditched socklen_t which was a portability mess
+	  and replaced it with unsigned int.
+	* tree.[ch]: added xmlHasProp()
+	* TODO: updated
+	* test/ : added more test for entities, NS, encoding, HTML, wap
+	* configure.in: preparing for 2.2.0 release
+
 Mon Jul 10 16:17:18 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
 
 	* nanoftp.c: fixed the way the control connection is handled
diff --git a/HTMLparser.c b/HTMLparser.c
index ff33148..0877f4c 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -16,7 +16,7 @@
 #ifdef LIBXML_HTML_ENABLED
 
 #include <stdio.h>
-#include <string.h> /* for memset() only */
+#include <string.h>
 #ifdef HAVE_CTYPE_H
 #include <ctype.h>
 #endif
@@ -41,6 +41,7 @@
 #include <libxml/HTMLparser.h>
 #include <libxml/entities.h>
 #include <libxml/encoding.h>
+#include <libxml/parser.h>
 #include <libxml/valid.h>
 #include <libxml/parserInternals.h>
 #include <libxml/xmlIO.h>
@@ -48,7 +49,7 @@
 
 #define HTML_MAX_NAMELEN 1000
 #define INPUT_CHUNK     50
-#define HTML_PARSER_BIG_BUFFER_SIZE 1024
+#define HTML_PARSER_BIG_BUFFER_SIZE 1000
 #define HTML_PARSER_BUFFER_SIZE 100
 
 /* #define DEBUG */
@@ -68,7 +69,7 @@
 scope int html##name##Push(htmlParserCtxtPtr ctxt, type value) {	\
     if (ctxt->name##Nr >= ctxt->name##Max) {				\
 	ctxt->name##Max *= 2;						\
-        ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab,		\
+        ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,		\
 	             ctxt->name##Max * sizeof(ctxt->name##Tab[0]));	\
         if (ctxt->name##Tab == NULL) {					\
 	    fprintf(stderr, "realloc failed !\n");			\
@@ -124,8 +125,6 @@
  *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
  */
 
-#define CUR ((int) (*ctxt->input->cur))
-    
 #define UPPER (toupper(*ctxt->input->cur))
 
 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
@@ -142,10 +141,173 @@
 
 #define CURRENT ((int) (*ctxt->input->cur))
 
-#define NEXT htmlNextChar(ctxt);
-
 #define SKIP_BLANKS htmlSkipBlankChars(ctxt);
 
+#if 0
+#define CUR ((int) (*ctxt->input->cur))
+#define NEXT htmlNextChar(ctxt);
+#else
+/* Inported from XML */
+
+/* #define CUR (ctxt->token ? ctxt->token : (int) (*ctxt->input->cur)) */
+#define CUR ((int) (*ctxt->input->cur))
+#define NEXT xmlNextChar(ctxt);ctxt->nbChars++;
+
+#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
+#define NXT(val) ctxt->input->cur[(val)]
+#define CUR_PTR ctxt->input->cur
+
+
+#define NEXTL(l)							\
+    if (*(ctxt->input->cur) == '\n') {					\
+	ctxt->input->line++; ctxt->input->col = 1;			\
+    } else ctxt->input->col++;						\
+    ctxt->token = 0; ctxt->input->cur += l; ctxt->nbChars++;
+    
+/************
+    \
+    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
+    if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
+ ************/
+
+#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l);
+#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
+
+#define COPY_BUF(l,b,i,v)						\
+    if (l == 1) b[i++] = (xmlChar) v;					\
+    else i += xmlCopyChar(l,&b[i],v);
+#endif
+
+/**
+ * htmlCurrentChar:
+ * @ctxt:  the HTML parser context
+ * @len:  pointer to the length of the char read
+ *
+ * The current char value, if using UTF-8 this may actaully span multiple
+ * bytes in the input buffer. Implement the end of line normalization:
+ * 2.11 End-of-Line Handling
+ * If the encoding is unspecified, in the case we find an ISO-Latin-1
+ * char, then the encoding converter is plugged in automatically.
+ *
+ * Returns the current char value and its lenght
+ */
+
+int
+htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
+    if (ctxt->instate == XML_PARSER_EOF)
+	return(0);
+
+    if (ctxt->token != 0) {
+	*len = 0;
+	return(ctxt->token);
+    }	
+    if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
+	/*
+	 * We are supposed to handle UTF8, check it's valid
+	 * From rfc2044: encoding of the Unicode values on UTF-8:
+	 *
+	 * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
+	 * 0000 0000-0000 007F   0xxxxxxx
+	 * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
+	 * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx 
+	 *
+	 * Check for the 0x110000 limit too
+	 */
+	const unsigned char *cur = ctxt->input->cur;
+	unsigned char c;
+	unsigned int val;
+
+	c = *cur;
+	if (c & 0x80) {
+	    if (cur[1] == 0)
+		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+	    if ((cur[1] & 0xc0) != 0x80)
+		goto encoding_error;
+	    if ((c & 0xe0) == 0xe0) {
+
+		if (cur[2] == 0)
+		    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+		if ((cur[2] & 0xc0) != 0x80)
+		    goto encoding_error;
+		if ((c & 0xf0) == 0xf0) {
+		    if (cur[3] == 0)
+			xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+		    if (((c & 0xf8) != 0xf0) ||
+			((cur[3] & 0xc0) != 0x80))
+			goto encoding_error;
+		    /* 4-byte code */
+		    *len = 4;
+		    val = (cur[0] & 0x7) << 18;
+		    val |= (cur[1] & 0x3f) << 12;
+		    val |= (cur[2] & 0x3f) << 6;
+		    val |= cur[3] & 0x3f;
+		} else {
+		  /* 3-byte code */
+		    *len = 3;
+		    val = (cur[0] & 0xf) << 12;
+		    val |= (cur[1] & 0x3f) << 6;
+		    val |= cur[2] & 0x3f;
+		}
+	    } else {
+	      /* 2-byte code */
+		*len = 2;
+		val = (cur[0] & 0x1f) << 6;
+		val |= cur[1] & 0x3f;
+	    }
+	    if (!IS_CHAR(val)) {
+		if ((ctxt->sax != NULL) &&
+		    (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData, 
+				     "Char 0x%X out of allowed range\n", val);
+		ctxt->errNo = XML_ERR_INVALID_ENCODING;
+		ctxt->wellFormed = 0;
+		ctxt->disableSAX = 1;
+	    }    
+	    return(val);
+	} else {
+	    /* 1-byte code */
+	    *len = 1;
+	    return((int) *ctxt->input->cur);
+	}
+    }
+    /*
+     * Assume it's a fixed lenght encoding (1) with
+     * a compatibke encoding for the ASCII set, since
+     * XML constructs only use < 128 chars
+     */
+    *len = 1;
+    if ((int) *ctxt->input->cur < 0x80)
+	return((int) *ctxt->input->cur);
+
+    /*
+     * Humm this is bad, do an automatic flow conversion
+     */
+    xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
+    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    return(xmlCurrentChar(ctxt, len));
+
+encoding_error:
+    /*
+     * If we detect an UTF8 error that probably mean that the
+     * input encoding didn't get properly advertized in the
+     * declaration header. Report the error and switch the encoding
+     * to ISO-Latin-1 (if you don't like this policy, just declare the
+     * encoding !)
+     */
+    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
+	ctxt->sax->error(ctxt->userData, 
+			 "Input is not proper UTF-8, indicate encoding !\n");
+	ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
+			ctxt->input->cur[0], ctxt->input->cur[1],
+			ctxt->input->cur[2], ctxt->input->cur[3]);
+    }
+    ctxt->errNo = XML_ERR_INVALID_ENCODING;
+
+    ctxt->charset = XML_CHAR_ENCODING_8859_1; 
+    *len = 1;
+    return((int) *ctxt->input->cur);
+}
+
 /**
  * htmlNextChar:
  * @ctxt:  the HTML parser context
@@ -443,8 +605,8 @@
 
 /**
  * htmlCheckAutoClose:
- * @new:  The new tag name
- * @old:  The old tag name
+ * @newtag:  The new tag name
+ * @oldtag:  The old tag name
  *
  * Checks wether the new tag is one of the registered valid tags for closing old.
  * Initialize the htmlStartCloseIndex for fast lookup of closing tags names.
@@ -452,7 +614,7 @@
  * Returns 0 if no, 1 if yes.
  */
 int
-htmlCheckAutoClose(const xmlChar *new, const xmlChar *old) {
+htmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) {
     int i, index;
     char **close;
 
@@ -462,13 +624,13 @@
     for (index = 0; index < 100;index++) {
         close = htmlStartCloseIndex[index];
 	if (close == NULL) return(0);
-	if (!xmlStrcmp(BAD_CAST *close, new)) break;
+	if (!xmlStrcmp(BAD_CAST *close, newtag)) break;
     }
 
     i = close - htmlStartClose;
     i++;
     while (htmlStartClose[i] != NULL) {
-        if (!xmlStrcmp(BAD_CAST htmlStartClose[i], old)) {
+        if (!xmlStrcmp(BAD_CAST htmlStartClose[i], oldtag)) {
 	    return(1);
 	}
 	i++;
@@ -477,24 +639,73 @@
 }
 
 /**
+ * htmlAutoCloseOnClose:
+ * @ctxt:  an HTML parser context
+ * @newtag:  The new tag name
+ *
+ * The HTmL DtD allows an ending tag to implicitely close other tags.
+ */
+void
+htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
+    htmlElemDescPtr info;
+    xmlChar *oldname;
+    int i;
+
+#ifdef DEBUG
+    fprintf(stderr,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
+    for (i = 0;i < ctxt->nameNr;i++) 
+        fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]);
+#endif
+
+    for (i = (ctxt->nameNr - 1);i >= 0;i--) {
+        if (!xmlStrcmp(newtag, ctxt->nameTab[i])) break;
+    }
+    if (i < 0) return;
+
+    while (xmlStrcmp(newtag, ctxt->name)) {
+	info = htmlTagLookup(ctxt->name);
+	if ((info == NULL) || (info->endTag == 1)) {
+#ifdef DEBUG
+	    fprintf(stderr,"htmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name);
+#endif
+        } else {
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		ctxt->sax->error(ctxt->userData,
+		 "Opening and ending tag mismatch: %s and %s\n",
+		                 newtag, ctxt->name);
+	    ctxt->wellFormed = 0;
+	}
+	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
+	    ctxt->sax->endElement(ctxt->userData, ctxt->name);
+	oldname = htmlnamePop(ctxt);
+	if (oldname != NULL) {
+#ifdef DEBUG
+	    fprintf(stderr,"htmlAutoCloseOnClose: popped %s\n", oldname);
+#endif
+	    xmlFree(oldname);
+	}	
+    }
+}
+
+/**
  * htmlAutoClose:
  * @ctxt:  an HTML parser context
- * @new:  The new tag name or NULL
+ * @newtag:  The new tag name or NULL
  *
  * The HTmL DtD allows a tag to implicitely close other tags.
  * The list is kept in htmlStartClose array. This function is
  * called when a new tag has been detected and generates the
  * appropriates closes if possible/needed.
- * If new is NULL this mean we are at the end of the resource
+ * If newtag is NULL this mean we are at the end of the resource
  * and we should check 
  */
 void
-htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {
+htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
     xmlChar *oldname;
-    while ((new != NULL) && (ctxt->name != NULL) && 
-           (htmlCheckAutoClose(new, ctxt->name))) {
+    while ((newtag != NULL) && (ctxt->name != NULL) && 
+           (htmlCheckAutoClose(newtag, ctxt->name))) {
 #ifdef DEBUG
-	fprintf(stderr,"htmlAutoClose: %s closes %s\n", new, ctxt->name);
+	fprintf(stderr,"htmlAutoClose: %s closes %s\n", newtag, ctxt->name);
 #endif
 	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
 	    ctxt->sax->endElement(ctxt->userData, ctxt->name);
@@ -506,7 +717,12 @@
 	    xmlFree(oldname);
         }
     }
-    while ((new == NULL) && (ctxt->name != NULL) &&
+    if (newtag == NULL) {
+	htmlAutoCloseOnClose(ctxt, BAD_CAST"head");
+	htmlAutoCloseOnClose(ctxt, BAD_CAST"body");
+	htmlAutoCloseOnClose(ctxt, BAD_CAST"html");
+    }
+    while ((newtag == NULL) && (ctxt->name != NULL) &&
 	   ((!xmlStrcmp(ctxt->name, BAD_CAST"head")) ||
 	    (!xmlStrcmp(ctxt->name, BAD_CAST"body")) ||
 	    (!xmlStrcmp(ctxt->name, BAD_CAST"html")))) {
@@ -579,66 +795,17 @@
 }
 
 /**
- * htmlAutoCloseOnClose:
- * @ctxt:  an HTML parser context
- * @new:  The new tag name
- *
- * The HTmL DtD allows an ending tag to implicitely close other tags.
- */
-void
-htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) {
-    htmlElemDescPtr info;
-    xmlChar *oldname;
-    int i;
-
-#ifdef DEBUG
-    fprintf(stderr,"Close of %s stack: %d elements\n", new, ctxt->nameNr);
-    for (i = 0;i < ctxt->nameNr;i++) 
-        fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]);
-#endif
-
-    for (i = (ctxt->nameNr - 1);i >= 0;i--) {
-        if (!xmlStrcmp(new, ctxt->nameTab[i])) break;
-    }
-    if (i < 0) return;
-
-    while (xmlStrcmp(new, ctxt->name)) {
-	info = htmlTagLookup(ctxt->name);
-	if ((info == NULL) || (info->endTag == 1)) {
-#ifdef DEBUG
-	    fprintf(stderr,"htmlAutoCloseOnClose: %s closes %s\n", new, ctxt->name);
-#endif
-        } else {
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		ctxt->sax->error(ctxt->userData,
-		 "Opening and ending tag mismatch: %s and %s\n",
-		                 new, ctxt->name);
-	    ctxt->wellFormed = 0;
-	}
-	if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
-	    ctxt->sax->endElement(ctxt->userData, ctxt->name);
-	oldname = htmlnamePop(ctxt);
-	if (oldname != NULL) {
-#ifdef DEBUG
-	    fprintf(stderr,"htmlAutoCloseOnClose: popped %s\n", oldname);
-#endif
-	    xmlFree(oldname);
-	}	
-    }
-}
-
-/**
  * htmlCheckImplied:
  * @ctxt:  an HTML parser context
- * @new:  The new tag name
+ * @newtag:  The new tag name
  *
  * The HTmL DtD allows a tag to exists only implicitely
  * called when a new tag has been detected and generates the
  * appropriates implicit tags if missing
  */
 void
-htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
-    if (!xmlStrcmp(new, BAD_CAST"html"))
+htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
+    if (!xmlStrcmp(newtag, BAD_CAST"html"))
 	return;
     if (ctxt->nameNr <= 0) {
 #ifdef DEBUG
@@ -648,15 +815,15 @@
 	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
 	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
     }
-    if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head")))
+    if ((!xmlStrcmp(newtag, BAD_CAST"body")) || (!xmlStrcmp(newtag, BAD_CAST"head")))
         return;
     if (ctxt->nameNr <= 1) {
-	if ((!xmlStrcmp(new, BAD_CAST"script")) ||
-	    (!xmlStrcmp(new, BAD_CAST"style")) ||
-	    (!xmlStrcmp(new, BAD_CAST"meta")) ||
-	    (!xmlStrcmp(new, BAD_CAST"link")) ||
-	    (!xmlStrcmp(new, BAD_CAST"title")) ||
-	    (!xmlStrcmp(new, BAD_CAST"base"))) {
+	if ((!xmlStrcmp(newtag, BAD_CAST"script")) ||
+	    (!xmlStrcmp(newtag, BAD_CAST"style")) ||
+	    (!xmlStrcmp(newtag, BAD_CAST"meta")) ||
+	    (!xmlStrcmp(newtag, BAD_CAST"link")) ||
+	    (!xmlStrcmp(newtag, BAD_CAST"title")) ||
+	    (!xmlStrcmp(newtag, BAD_CAST"base"))) {
 	    /* 
 	     * dropped OBJECT ... i you put it first BODY will be
 	     * assumed !
@@ -1006,6 +1173,114 @@
     return(NULL);
 }
 
+/**
+ * UTF8ToHtml:
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of UTF-8 chars
+ * @inlen:  the length of @in
+ *
+ * Take a block of UTF-8 chars in and try to convert it to an ASCII
+ * plus HTML entities block of chars out.
+ *
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of octets consumed.
+ */
+int
+UTF8ToHtml(unsigned char* out, int *outlen,
+              const unsigned char* in, int *inlen) {
+    const unsigned char* processed = in;
+    const unsigned char* outend;
+    const unsigned char* outstart = out;
+    const unsigned char* instart = in;
+    const unsigned char* inend;
+    unsigned int c, d;
+    int trailing;
+
+    if (in == NULL) {
+        /*
+	 * initialization nothing to do
+	 */
+	*outlen = 0;
+	*inlen = 0;
+	return(0);
+    }
+    inend = in + (*inlen);
+    outend = out + (*outlen);
+    while (in < inend) {
+	d = *in++;
+	if      (d < 0x80)  { c= d; trailing= 0; }
+	else if (d < 0xC0) {
+	    /* trailing byte in leading position */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
+        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
+	else {
+	    /* no chance for this in Ascii */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+	}
+
+	if (inend - in < trailing) {
+	    break;
+	} 
+
+	for ( ; trailing; trailing--) {
+	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
+		break;
+	    c <<= 6;
+	    c |= d & 0x3F;
+	}
+
+	/* assertion: c is a single UTF-4 value */
+	if (c < 0x80) {
+	    if (out >= outend)
+		break;
+	    *out++ = c;
+	} else {
+	    int i, j, len;
+	    /*
+	     * Try to lookup a predefined HTML entity for it
+	     */
+
+	    for (i = 0;i < (sizeof(html40EntitiesTable)/
+			    sizeof(html40EntitiesTable[0]));i++) {
+		if (html40EntitiesTable[i].value == c) {
+#ifdef DEBUG
+		    fprintf(stderr,"Found entity %s\n", name);
+#endif
+		    goto found_ent;
+		}
+		if (html40EntitiesTable[i].value > c)
+		    break;
+	    }
+
+	    /* no chance for this in Ascii */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+found_ent:
+	    len = strlen(html40EntitiesTable[i].name);
+	    if (out + 2 + len >= outend)
+		break;
+	    *out++ = '&';
+            for (j = 0;j < len;j++)
+		*out++ = html40EntitiesTable[i].name[j];
+	    *out++ = ';';
+	}
+	processed = in;
+    }
+    *outlen = out - outstart;
+    *inlen = processed - instart;
+    return(0);
+}
+
 
 /**
  * htmlDecodeEntities:
@@ -1025,15 +1300,23 @@
 xmlChar *
 htmlDecodeEntities(htmlParserCtxtPtr ctxt, int len,
                   xmlChar end, xmlChar  end2, xmlChar end3) {
-    xmlChar *buffer = NULL;
-    int buffer_size = 0;
-    xmlChar *out = NULL;
     xmlChar *name = NULL;
-
-    xmlChar *cur = NULL;
+    xmlChar *buffer = NULL;
+    unsigned int buffer_size = 0;
+    unsigned int nbchars = 0;
     htmlEntityDescPtr ent;
-    int nbchars = 0;
     unsigned int max = (unsigned int) len;
+    int c,l;
+
+    if (ctxt->depth > 40) {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt->userData,
+		"Detected entity reference loop\n");
+	ctxt->wellFormed = 0;
+	ctxt->disableSAX = 1;
+	ctxt->errNo = XML_ERR_ENTITY_LOOP;
+	return(NULL);
+    }
 
     /*
      * allocate a translation buffer.
@@ -1041,68 +1324,52 @@
     buffer_size = HTML_PARSER_BIG_BUFFER_SIZE;
     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
     if (buffer == NULL) {
-	perror("htmlDecodeEntities: malloc failed");
+	perror("xmlDecodeEntities: malloc failed");
 	return(NULL);
     }
-    out = buffer;
 
     /*
      * Ok loop until we reach one of the ending char or a size limit.
      */
-    while ((nbchars < (int) max) && (CUR != end) &&
-           (CUR != end2) && (CUR != end3)) {
+    c = CUR_CHAR(l);
+    while ((nbchars < max) && (c != end) &&
+           (c != end2) && (c != end3)) {
 
-        if (CUR == '&') {
-	    if (NXT(1) == '#') {
-		int val = htmlParseCharRef(ctxt);
-		/* invalid for UTF-8 variable encoding !!!!! */
-		*out++ = val;
-		nbchars += 3; /* !!!! */
-	    } else {
-		ent = htmlParseEntityRef(ctxt, &name);
-		if (name != NULL) {
-		    if ((ent == NULL) || (ent->value <= 0) ||
-		        (ent->value >= 255)) {
-		        *out++ = '&';
-		        cur = name;
-			while (*cur != 0) {
-			    if (out - buffer > buffer_size - 100) {
-				int index = out - buffer;
+	if (c == 0) break;
+        if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
+	    int val = htmlParseCharRef(ctxt);
+	    COPY_BUF(0,buffer,nbchars,val);
+	    NEXTL(l);
+	} else if ((c == '&') && (ctxt->token != '&')) {
+	    ent = htmlParseEntityRef(ctxt, &name);
+	    if (name != NULL) {
+		if (ent != NULL) {
+		    int val = ent->value;
+		    COPY_BUF(0,buffer,nbchars,val);
+		    NEXTL(l);
+		} else {
+		    const xmlChar *cur = name;
 
-				growBuffer(buffer);
-				out = &buffer[index];
-			    }
-			    *out++ = *cur++;
-			}
-		        *out++ = ';';
-		    } else {
-			/* invalid for UTF-8 variable encoding !!!!! */
-			*out++ = (xmlChar)ent->value;
-			if (out - buffer > buffer_size - 100) {
-			    int index = out - buffer;
-
-			    growBuffer(buffer);
-			    out = &buffer[index];
-			}
+		    buffer[nbchars++] = '&';
+		    if (nbchars > buffer_size - HTML_PARSER_BUFFER_SIZE) {
+			growBuffer(buffer);
 		    }
-		    nbchars += 2 + xmlStrlen(name);
-		    xmlFree(name);
+		    while (*cur != 0) {
+			buffer[nbchars++] = *cur++;
+		    }
+		    buffer[nbchars++] = ';';
 		}
 	    }
 	} else {
-	    /*  invalid for UTF-8 , use COPY(out); !!!!! */
-	    *out++ = CUR;
-	    nbchars++;
-	    if (out - buffer > buffer_size - 100) {
-	      int index = out - buffer;
-	      
+	    COPY_BUF(l,buffer,nbchars,c);
+	    NEXTL(l);
+	    if (nbchars > buffer_size - HTML_PARSER_BUFFER_SIZE) {
 	      growBuffer(buffer);
-	      out = &buffer[index];
 	    }
-	    NEXT;
 	}
+	c = CUR_CHAR(l);
     }
-    *out++ = 0;
+    buffer[nbchars++] = 0;
     return(buffer);
 }
 
@@ -1152,6 +1419,7 @@
 	ctxt->errNo = XML_ERR_NO_MEMORY;
 	return(NULL);
     }
+    memset(input, 0, sizeof(htmlParserInput));
     input->filename = NULL;
     input->directory = NULL;
     input->base = NULL;
@@ -1161,6 +1429,7 @@
     input->col = 1;
     input->buf = NULL;
     input->free = NULL;
+    input->version = NULL;
     input->consumed = 0;
     input->length = 0;
     return(input);
@@ -1191,6 +1460,7 @@
     for (i = 0;i < len;i++)
         if (!(IS_BLANK(str[i]))) return(0);
 
+    if (CUR == 0) return(1);
     if (CUR != '<') return(0);
     if (ctxt->node == NULL) return(0);
     lastChild = xmlGetLastChild(ctxt->node);
@@ -1427,8 +1697,22 @@
 	if ((stop == 0) && (IS_BLANK(CUR))) break;
         if (CUR == '&') {
 	    if (NXT(1) == '#') {
-		int val = htmlParseCharRef(ctxt);
-		*out++ = val;
+		unsigned int c;
+		int bits;
+
+		c = htmlParseCharRef(ctxt);
+		if      (c <    0x80)
+		        { *out++  = c;                bits= -6; }
+		else if (c <   0x800)
+		        { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+		else if (c < 0x10000)
+		        { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+		else                 
+		        { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
+	 
+		for ( ; bits >= 0; bits-= 6) {
+		    *out++  = ((c >> bits) & 0x3F) | 0x80;
+		}
 	    } else {
 		ent = htmlParseEntityRef(ctxt, &name);
 		if (name == NULL) {
@@ -1439,8 +1723,7 @@
 			growBuffer(buffer);
 			out = &buffer[index];
 		    }
-		} else if ((ent == NULL) || (ent->value <= 0) ||
-		           (ent->value >= 255)) {
+		} else if (ent == NULL) {
 		    *out++ = '&';
 		    cur = name;
 		    while (*cur != 0) {
@@ -1454,23 +1737,53 @@
 		    }
 		    xmlFree(name);
 		} else {
-		    *out++ = ent->value;
+		    unsigned int c;
+		    int bits;
+
 		    if (out - buffer > buffer_size - 100) {
 			int index = out - buffer;
 
 			growBuffer(buffer);
 			out = &buffer[index];
 		    }
+		    c = (xmlChar)ent->value;
+		    if      (c <    0x80)
+			{ *out++  = c;                bits= -6; }
+		    else if (c <   0x800)
+			{ *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+		    else if (c < 0x10000)
+			{ *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+		    else                 
+			{ *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
+	     
+		    for ( ; bits >= 0; bits-= 6) {
+			*out++  = ((c >> bits) & 0x3F) | 0x80;
+		    }
 		    xmlFree(name);
 		}
 	    }
 	} else {
-	    *out++ = CUR;
+	    unsigned int c;
+	    int bits;
+
 	    if (out - buffer > buffer_size - 100) {
-	      int index = out - buffer;
-	      
-	      growBuffer(buffer);
-	      out = &buffer[index];
+		int index = out - buffer;
+
+		growBuffer(buffer);
+		out = &buffer[index];
+	    }
+	    c = CUR;
+	    if      (c <    0x80)
+		    { *out++  = c;                bits= -6; }
+	    else if (c <   0x800)
+		    { *out++  =((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+	    else if (c < 0x10000)
+		    { *out++  =((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+	    else                 
+		    { *out++  =((c >> 18) & 0x07) | 0xF0;  bits= 12; }
+     
+	    for ( ; bits >= 0; bits-= 6) {
+		*out++  = ((c >> bits) & 0x3F) | 0x80;
 	    }
 	    NEXT;
 	}
@@ -1729,60 +2042,49 @@
 
 void
 htmlParseCharData(htmlParserCtxtPtr ctxt, int cdata) {
-    xmlChar *buf = NULL;
-    int len = 0;
-    int size = HTML_PARSER_BUFFER_SIZE;
-    xmlChar q;
+    xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 5];
+    int nbchar = 0;
+    int cur, l;
 
-    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
-    if (buf == NULL) {
-	fprintf(stderr, "malloc of %d byte failed\n", size);
-	return;
+    SHRINK;
+    cur = CUR_CHAR(l);
+    while (((cur != '<') || (ctxt->token == '<')) &&
+           ((cur != '&') || (ctxt->token == '&')) && 
+	   (IS_CHAR(cur))) {
+	COPY_BUF(l,buf,nbchar,cur);
+	if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) {
+	    /*
+	     * Ok the segment is to be consumed as chars.
+	     */
+	    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
+		if (areBlanks(ctxt, buf, nbchar)) {
+		    if (ctxt->sax->ignorableWhitespace != NULL)
+			ctxt->sax->ignorableWhitespace(ctxt->userData,
+			                               buf, nbchar);
+		} else {
+		    if (ctxt->sax->characters != NULL)
+			ctxt->sax->characters(ctxt->userData, buf, nbchar);
+		}
+	    }
+	    nbchar = 0;
+	}
+	NEXTL(l);
+	cur = CUR_CHAR(l);
     }
-
-    q = CUR;
-    while ((IS_CHAR(q)) && (q != '<') &&
-           (q != '&')) {
-	if ((q == ']') && (NXT(1) == ']') &&
-	    (NXT(2) == '>')) {
-	    if (cdata) break;
-	    else {
-		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-		    ctxt->sax->error(ctxt->userData,
-		       "Sequence ']]>' not allowed in content\n");
-		ctxt->wellFormed = 0;
+    if (nbchar != 0) {
+	/*
+	 * Ok the segment is to be consumed as chars.
+	 */
+	if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
+	    if (areBlanks(ctxt, buf, nbchar)) {
+		if (ctxt->sax->ignorableWhitespace != NULL)
+		    ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
+	    } else {
+		if (ctxt->sax->characters != NULL)
+		    ctxt->sax->characters(ctxt->userData, buf, nbchar);
 	    }
 	}
-	if (len + 1 >= size) {
-	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
-	    if (buf == NULL) {
-		fprintf(stderr, "realloc of %d byte failed\n", size);
-		return;
-	    }
-	}
-	buf[len++] = q;
-        NEXT;
-	q = CUR;
     }
-    if (len == 0) {
-	xmlFree(buf);
-	return;
-    }
-
-    /*
-     * Ok the buffer is to be consumed as chars.
-     */
-    if (ctxt->sax != NULL) {
-	if (areBlanks(ctxt, buf, len)) {
-	    if (ctxt->sax->ignorableWhitespace != NULL)
-		ctxt->sax->ignorableWhitespace(ctxt->userData, buf, len);
-	} else {
-	    if (ctxt->sax->characters != NULL)
-		ctxt->sax->characters(ctxt->userData, buf, len);
-        }
-    }
-    xmlFree(buf);
 }
 
 /**
@@ -1889,7 +2191,7 @@
            ((s != '>') || (r != '-') || (q != '-'))) {
 	if (len + 1 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
 		return;
@@ -2130,8 +2432,6 @@
     if ((ctxt == NULL) || (attvalue == NULL))
 	return;
 
-fprintf(stderr, "htmlCheckEncoding: \"%s\"\n", attvalue);
-
     encoding = xmlStrstr(attvalue, BAD_CAST"charset=");
     if (encoding == NULL) 
 	encoding = xmlStrstr(attvalue, BAD_CAST"Charset=");
@@ -2164,6 +2464,7 @@
 	 */
 	if (enc != XML_CHAR_ENCODING_ERROR) {
 	    xmlSwitchEncoding(ctxt, enc);
+	    ctxt->charset = XML_CHAR_ENCODING_UTF8;
 	} else {
 	    /*
 	     * fallback for unknown encodings
@@ -2492,36 +2793,60 @@
 void
 htmlParseReference(htmlParserCtxtPtr ctxt) {
     htmlEntityDescPtr ent;
-    xmlChar out[2];
+    xmlChar out[6];
     xmlChar *name;
-    int val;
     if (CUR != '&') return;
 
     if (NXT(1) == '#') {
-	val = htmlParseCharRef(ctxt);
-	/* invalid for UTF-8 variable encoding !!!!! */
-	out[0] = val;
-	out[1] = 0;
+	unsigned int c;
+	int bits, i = 0;
+
+	c = htmlParseCharRef(ctxt);
+        if      (c <    0x80) { out[i++]= c;                bits= -6; }
+        else if (c <   0x800) { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+        else if (c < 0x10000) { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+        else                  { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
+ 
+        for ( ; bits >= 0; bits-= 6) {
+            out[i++]= ((c >> bits) & 0x3F) | 0x80;
+        }
+	out[i] = 0;
+
 	if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
-	    ctxt->sax->characters(ctxt->userData, out, 1);
+	    ctxt->sax->characters(ctxt->userData, out, i);
     } else {
 	ent = htmlParseEntityRef(ctxt, &name);
 	if (name == NULL) {
 	    ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
 	    return;
 	}
-	if ((ent == NULL) || (ent->value <= 0) || (ent->value >= 255)) {
+	if ((ent == NULL) || (ent->value <= 0)) {
 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL)) {
 		ctxt->sax->characters(ctxt->userData, BAD_CAST "&", 1);
 		ctxt->sax->characters(ctxt->userData, name, xmlStrlen(name));
 		/* ctxt->sax->characters(ctxt->userData, BAD_CAST ";", 1); */
 	    }
 	} else {
-	    /* invalid for UTF-8 variable encoding !!!!! */
-	    out[0] = ent->value;
-	    out[1] = 0;
+	    unsigned int c;
+	    int bits, i = 0;
+
+	    c = ent->value;
+	    if      (c <    0x80)
+	            { out[i++]= c;                bits= -6; }
+	    else if (c <   0x800)
+	            { out[i++]=((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+	    else if (c < 0x10000)
+	            { out[i++]=((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+	    else                 
+	            { out[i++]=((c >> 18) & 0x07) | 0xF0;  bits= 12; }
+     
+	    for ( ; bits >= 0; bits-= 6) {
+		out[i++]= ((c >> bits) & 0x3F) | 0x80;
+	    }
+	    out[i] = 0;
+
 	    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
-		ctxt->sax->characters(ctxt->userData, out, 1);
+		ctxt->sax->characters(ctxt->userData, out, i);
 	}
 	xmlFree(name);
     }
@@ -2761,10 +3086,12 @@
     }	
 
     if (!IS_CHAR(CUR)) {
+	/************
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	    ctxt->sax->error(ctxt->userData,
 	         "Premature end of data in tag %s\n", currentNode);
 	ctxt->wellFormed = 0;
+	 *************/
 
 	/*
 	 * end of parsing of this node.
@@ -3458,9 +3785,17 @@
 		if ((avail == 1) && (terminate)) {
 		    cur = in->cur[0];
 		    if ((cur != '<') && (cur != '&')) {
-			if ((ctxt->sax != NULL) &&
-			    (ctxt->sax->characters != NULL))
-			ctxt->sax->characters(ctxt->userData, &cur, 1);
+			if (ctxt->sax != NULL) {
+			    if (IS_BLANK(cur)) {
+				if (ctxt->sax->ignorableWhitespace != NULL)
+				    ctxt->sax->ignorableWhitespace(
+					    ctxt->userData, &cur, 1);
+			    } else {
+				if (ctxt->sax->characters != NULL)
+				    ctxt->sax->characters(
+					    ctxt->userData, &cur, 1);
+			    }
+			}
 			ctxt->token = 0;
 			ctxt->checkIndex = 0;
 			NEXT;
@@ -3599,6 +3934,14 @@
 		fprintf(stderr, "HPP: entering START_TAG\n");
 #endif
 		break;
+	    case XML_PARSER_SYSTEM_LITERAL:
+		fprintf(stderr, "HPP: internal error, state == XML_PARSER_SYSTEM_LITERAL\n");
+		ctxt->instate = XML_PARSER_CONTENT;
+		ctxt->checkIndex = 0;
+#ifdef DEBUG_PUSH
+		fprintf(stderr, "HPP: entering CONTENT\n");
+#endif
+		break;
 	}
     }
 done:    
diff --git a/HTMLparser.h b/HTMLparser.h
index 44d9c27..b04e3b0 100644
--- a/HTMLparser.h
+++ b/HTMLparser.h
@@ -81,6 +81,10 @@
 					 void *userData);
 htmlDocPtr		htmlParseFile	(const char *filename,
 					 const char *encoding);
+int			UTF8ToHtml	(unsigned char* out,
+					 int *outlen,
+					 const unsigned char* in,
+					 int *inlen);
 
 /**
  * Interfaces for the Push mode
diff --git a/HTMLtree.c b/HTMLtree.c
index d981ec0..d8c5dc6 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -32,6 +32,305 @@
 #include <libxml/entities.h>
 #include <libxml/valid.h>
 
+/************************************************************************
+ *									*
+ *   		Getting/Setting encoding meta tags			*
+ *									*
+ ************************************************************************/
+
+/**
+ * htmlGetMetaEncoding:
+ * @doc:  the document
+ * 
+ * Encoding definition lookup in the Meta tags
+ *
+ * Returns the current encoding as flagged in the HTML source
+ */
+const xmlChar *
+htmlGetMetaEncoding(htmlDocPtr doc) {
+    htmlNodePtr cur;
+    const xmlChar *content;
+    const xmlChar *encoding;
+
+    if (doc == NULL)
+	return(NULL);
+    cur = doc->children;
+
+    /*
+     * Search the html
+     */
+    while (cur != NULL) {
+	if (cur->name != NULL) {
+	    if (!xmlStrcmp(cur->name, BAD_CAST"html"))
+		break;
+	    if (!xmlStrcmp(cur->name, BAD_CAST"head"))
+		goto found_head;
+	    if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
+		goto found_meta;
+	}
+	cur = cur->next;
+    }
+    if (cur == NULL)
+	return(NULL);
+    cur = cur->children;
+
+    /*
+     * Search the head
+     */
+    while (cur != NULL) {
+	if (cur->name != NULL) {
+	    if (!xmlStrcmp(cur->name, BAD_CAST"head"))
+		break;
+	    if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
+		goto found_meta;
+	}
+	cur = cur->next;
+    }
+    if (cur == NULL)
+	return(NULL);
+found_head:
+    cur = cur->children;
+
+    /*
+     * Search the meta elements
+     */
+found_meta:
+    while (cur != NULL) {
+	if (cur->name != NULL) {
+	    if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
+		xmlAttrPtr attr = cur->properties;
+		int http;
+		const xmlChar *value;
+
+		content = NULL;
+		http = 0;
+		while (attr != NULL) {
+		    if ((attr->children != NULL) &&
+		        (attr->children->type == XML_TEXT_NODE) &&
+		        (attr->children->next == NULL)) {
+#ifndef XML_USE_BUFFER_CONTENT
+			value = attr->children->content;
+#else
+			value = xmlBufferContent(attr->children->content);
+#endif
+			if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
+			     (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
+			     (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
+			    ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
+			     (!xmlStrcmp(value, BAD_CAST"content-type")) ||
+			     (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
+			    http = 1;
+			else if ((value != NULL) &&
+				 ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
+				  (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
+				  (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
+			    content = value;
+			if ((http != 0) && (content != NULL))
+			    goto found_content;
+		    }
+		    attr = attr->next;
+		}
+	    }
+	}
+	cur = cur->next;
+    }
+    return(NULL);
+
+found_content:
+    encoding = xmlStrstr(content, BAD_CAST"charset=");
+    if (encoding == NULL) 
+	encoding = xmlStrstr(content, BAD_CAST"Charset=");
+    if (encoding == NULL) 
+	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
+    if (encoding != NULL) {
+	encoding += 8;
+    } else {
+	encoding = xmlStrstr(content, BAD_CAST"charset =");
+	if (encoding == NULL) 
+	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
+	if (encoding == NULL) 
+	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
+	if (encoding != NULL)
+	    encoding += 9;
+    }
+    if (encoding != NULL) {
+	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
+    }
+    return(encoding);
+}
+
+/**
+ * htmlSetMetaEncoding:
+ * @doc:  the document
+ * @encoding:  the encoding string
+ * 
+ * Sets the current encoding in the Meta tags
+ * NOTE: this will not change the document content encoding, just
+ * the META flag associated.
+ *
+ * Returns 0 in case of success and -1 in case of error
+ */
+int
+htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
+    htmlNodePtr cur, meta;
+    const xmlChar *content;
+    char newcontent[100];
+
+
+    if (doc == NULL)
+	return(-1);
+
+    if (encoding != NULL) {
+#ifndef HAVE_SNPRINTF
+	sprintf(newcontent, "text/html; charset=%s", encoding);
+#else /* HAVE_SNPRINTF */
+	snprintf(newcontent, 99, "text/html; charset=%s", encoding);
+#endif /* HAVE_SNPRINTF */
+	newcontent[99] = 0;
+    }
+
+    cur = doc->children;
+
+    /*
+     * Search the html
+     */
+    while (cur != NULL) {
+	if (cur->name != NULL) {
+	    if (!xmlStrcmp(cur->name, BAD_CAST"html"))
+		break;
+	    if (!xmlStrcmp(cur->name, BAD_CAST"body")) {
+		if (encoding == NULL)
+		    return(0);
+		meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
+		xmlAddPrevSibling(cur, meta);
+		cur = meta;
+		meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
+		xmlAddChild(cur, meta);
+		xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
+		xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
+		return(0);
+	    }
+	    if (!xmlStrcmp(cur->name, BAD_CAST"head"))
+		goto found_head;
+	    if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
+		goto found_meta;
+	}
+	cur = cur->next;
+    }
+    if (cur == NULL)
+	return(-1);
+    cur = cur->children;
+
+    /*
+     * Search the head
+     */
+    while (cur != NULL) {
+	if (cur->name != NULL) {
+	    if (!xmlStrcmp(cur->name, BAD_CAST"head"))
+		break;
+	    if (!xmlStrcmp(cur->name, BAD_CAST"body")) {
+		if (encoding == NULL)
+		    return(0);
+		meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL);
+		xmlAddPrevSibling(cur, meta);
+		cur = meta;
+		meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
+		xmlAddChild(cur, meta);
+		xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
+		xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
+		return(0);
+	    }
+	    if (!xmlStrcmp(cur->name, BAD_CAST"meta"))
+		goto found_meta;
+	}
+	cur = cur->next;
+    }
+    if (cur == NULL)
+	return(-1);
+found_head:
+    if (cur->children == NULL) {
+	if (encoding == NULL)
+	    return(0);
+	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
+	xmlAddChild(cur, meta);
+	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
+	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
+	return(0);
+    }
+    cur = cur->children;
+
+found_meta:
+    if (encoding != NULL) {
+	/*
+	 * Create a new Meta element with the right aatributes
+	 */
+
+	meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
+	xmlAddPrevSibling(cur, meta);
+	xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
+	xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
+    }
+
+    /*
+     * Search and destroy all the remaining the meta elements carrying
+     * encoding informations
+     */
+    while (cur != NULL) {
+	if (cur->name != NULL) {
+	    if (!xmlStrcmp(cur->name, BAD_CAST"meta")) {
+		xmlAttrPtr attr = cur->properties;
+		int http;
+		const xmlChar *value;
+
+		content = NULL;
+		http = 0;
+		while (attr != NULL) {
+		    if ((attr->children != NULL) &&
+		        (attr->children->type == XML_TEXT_NODE) &&
+		        (attr->children->next == NULL)) {
+#ifndef XML_USE_BUFFER_CONTENT
+			value = attr->children->content;
+#else
+			value = xmlBufferContent(attr->children->content);
+#endif
+			if (((!xmlStrcmp(attr->name, BAD_CAST"http-equiv")) ||
+			     (!xmlStrcmp(attr->name, BAD_CAST"Http-Equiv")) ||
+			     (!xmlStrcmp(attr->name, BAD_CAST"HTTP-EQUIV"))) &&
+			    ((!xmlStrcmp(value, BAD_CAST"Content-Type")) ||
+			     (!xmlStrcmp(value, BAD_CAST"content-type")) ||
+			     (!xmlStrcmp(value, BAD_CAST"CONTENT-TYPE"))))
+			    http = 1;
+			else if ((value != NULL) &&
+				 ((!xmlStrcmp(attr->name, BAD_CAST"content")) ||
+				  (!xmlStrcmp(attr->name, BAD_CAST"Content")) ||
+				  (!xmlStrcmp(attr->name, BAD_CAST"CONTENT"))))
+			    content = value;
+			if ((http != 0) && (content != NULL))
+			    break;
+		    }
+		    attr = attr->next;
+		}
+		if ((http != 0) && (content != NULL)) {
+		    meta = cur;
+		    cur = cur->next;
+		    xmlUnlinkNode(meta);
+                    xmlFreeNode(meta);
+		    continue;
+		}
+
+	    }
+	}
+	cur = cur->next;
+    }
+    return(0);
+}
+
+/************************************************************************
+ *									*
+ *   		Dumping HTML tree content to a simple buffer		*
+ *									*
+ ************************************************************************/
+
 static void
 htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
 
@@ -168,7 +467,6 @@
 	if (cur->content != NULL) {
             xmlChar *buffer;
 
-	    /* uses the HTML encoding routine !!!!!!!!!! */
 #ifndef XML_USE_BUFFER_CONTENT
             buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 #else
@@ -319,7 +617,7 @@
         htmlNodeListDump(buf, cur, cur->children);
     }
     xmlBufferWriteChar(buf, "\n");
-    cur->type = type;
+    cur->type = (xmlElementType) type;
 }
 
 /**
@@ -357,59 +655,470 @@
 }
 
 
+/************************************************************************
+ *									*
+ *   		Dumping HTML tree content to an I/O output buffer	*
+ *									*
+ ************************************************************************/
+
+static void
+htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding);
+
+/**
+ * htmlDtdDump:
+ * @buf:  the HTML buffer output
+ * @doc:  the document
+ * 
+ * Dump the HTML document DTD, if any.
+ */
+static void
+htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) {
+    xmlDtdPtr cur = doc->intSubset;
+
+    if (cur == NULL) {
+        fprintf(stderr, "htmlDtdDump : no internal subset\n");
+	return;
+    }
+    xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
+    xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    if (cur->ExternalID != NULL) {
+	xmlOutputBufferWriteString(buf, " PUBLIC ");
+	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
+	if (cur->SystemID != NULL) {
+	    xmlOutputBufferWriteString(buf, " ");
+	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
+	} 
+    }  else if (cur->SystemID != NULL) {
+	xmlOutputBufferWriteString(buf, " SYSTEM ");
+	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
+    }
+    xmlOutputBufferWriteString(buf, ">\n");
+}
+
+/**
+ * htmlAttrDump:
+ * @buf:  the HTML buffer output
+ * @doc:  the document
+ * @cur:  the attribute pointer
+ *
+ * Dump an HTML attribute
+ */
+static void
+htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
+    xmlChar *value;
+
+    if (cur == NULL) {
+        fprintf(stderr, "htmlAttrDump : property == NULL\n");
+	return;
+    }
+    xmlOutputBufferWriteString(buf, " ");
+    xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    if (cur->children != NULL) {
+	value = xmlNodeListGetString(doc, cur->children, 0);
+	if (value) {
+	    xmlOutputBufferWriteString(buf, "=");
+	    xmlBufferWriteQuotedString(buf->buffer, value);
+	    xmlFree(value);
+	} else  {
+	    xmlOutputBufferWriteString(buf, "=\"\"");
+	}
+    }
+}
+
+/**
+ * htmlAttrListDump:
+ * @buf:  the HTML buffer output
+ * @doc:  the document
+ * @cur:  the first attribute pointer
+ *
+ * Dump a list of HTML attributes
+ */
+static void
+htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
+    if (cur == NULL) {
+        fprintf(stderr, "htmlAttrListDump : property == NULL\n");
+	return;
+    }
+    while (cur != NULL) {
+        htmlAttrDumpOutput(buf, doc, cur, encoding);
+	cur = cur->next;
+    }
+}
+
+
+void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+	                xmlNodePtr cur, const char *encoding);
+
+/**
+ * htmlNodeListDump:
+ * @buf:  the HTML buffer output
+ * @doc:  the document
+ * @cur:  the first node
+ *
+ * Dump an HTML node list, recursive behaviour,children are printed too.
+ */
+static void
+htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
+    if (cur == NULL) {
+        fprintf(stderr, "htmlNodeListDump : node == NULL\n");
+	return;
+    }
+    while (cur != NULL) {
+        htmlNodeDumpOutput(buf, doc, cur, encoding);
+	cur = cur->next;
+    }
+}
+
+/**
+ * htmlNodeDump:
+ * @buf:  the HTML buffer output
+ * @doc:  the document
+ * @cur:  the current node
+ *
+ * Dump an HTML node, recursive behaviour,children are printed too.
+ */
+void
+htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) {
+    htmlElemDescPtr info;
+
+    if (cur == NULL) {
+        fprintf(stderr, "htmlNodeDump : node == NULL\n");
+	return;
+    }
+    /*
+     * Special cases.
+     */
+    if (cur->type == XML_DTD_NODE)
+	return;
+    if (cur->type == XML_HTML_DOCUMENT_NODE) {
+	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
+	return;
+    }
+    if (cur->type == HTML_TEXT_NODE) {
+	if (cur->content != NULL) {
+            xmlChar *buffer;
+
+#ifndef XML_USE_BUFFER_CONTENT
+            buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+#else
+            buffer = xmlEncodeEntitiesReentrant(doc, 
+                                                xmlBufferContent(cur->content));
+#endif 
+	    if (buffer != NULL) {
+		xmlOutputBufferWriteString(buf, (const char *)buffer);
+		xmlFree(buffer);
+	    }
+	}
+	return;
+    }
+    if (cur->type == HTML_COMMENT_NODE) {
+	if (cur->content != NULL) {
+	    xmlOutputBufferWriteString(buf, "<!--");
+#ifndef XML_USE_BUFFER_CONTENT
+	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
+#else
+	    xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content));
+#endif
+	    xmlOutputBufferWriteString(buf, "-->");
+	}
+	return;
+    }
+    if (cur->type == HTML_ENTITY_REF_NODE) {
+        xmlOutputBufferWriteString(buf, "&");
+	xmlOutputBufferWriteString(buf, (const char *)cur->name);
+        xmlOutputBufferWriteString(buf, ";");
+	return;
+    }
+
+    /*
+     * Get specific HTmL info for taht node.
+     */
+    info = htmlTagLookup(cur->name);
+
+    xmlOutputBufferWriteString(buf, "<");
+    xmlOutputBufferWriteString(buf, (const char *)cur->name);
+    if (cur->properties != NULL)
+        htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
+
+    if ((info != NULL) && (info->empty)) {
+        xmlOutputBufferWriteString(buf, ">");
+	if (cur->next != NULL) {
+	    if ((cur->next->type != HTML_TEXT_NODE) &&
+		(cur->next->type != HTML_ENTITY_REF_NODE))
+		xmlOutputBufferWriteString(buf, "\n");
+	}
+	return;
+    }
+    if ((cur->content == NULL) && (cur->children == NULL)) {
+        if ((info != NULL) && (info->endTag != 0))
+	    xmlOutputBufferWriteString(buf, ">");
+	else {
+	    xmlOutputBufferWriteString(buf, "></");
+	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
+	    xmlOutputBufferWriteString(buf, ">");
+	}
+	if (cur->next != NULL) {
+	    if ((cur->next->type != HTML_TEXT_NODE) &&
+		(cur->next->type != HTML_ENTITY_REF_NODE))
+		xmlOutputBufferWriteString(buf, "\n");
+	}
+	return;
+    }
+    xmlOutputBufferWriteString(buf, ">");
+    if (cur->content != NULL) {
+#if 0
+	xmlChar *buffer;
+
+#ifndef XML_USE_BUFFER_CONTENT
+    buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+#else
+    buffer = xmlEncodeEntitiesReentrant(doc, 
+                                        xmlBufferContent(cur->content));
+#endif
+	if (buffer != NULL) {
+	    xmlOutputBufferWriteString(buf, buffer);
+	    xmlFree(buffer);
+	}
+#else
+	    /*
+	     * Uses the OutputBuffer property to automatically convert
+	     * invalids to charrefs
+	     */
+
+#ifndef XML_USE_BUFFER_CONTENT
+            xmlOutputBufferWriteString(buf, (const char *) cur->content);
+#else
+            xmlOutputBufferWriteString(buf, 
+		           (const char *) xmlBufferContent(cur->content));
+#endif 
+#endif 
+    }
+    if (cur->children != NULL) {
+        if ((cur->children->type != HTML_TEXT_NODE) &&
+	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
+	    (cur->children != cur->last))
+	    xmlOutputBufferWriteString(buf, "\n");
+	htmlNodeListDumpOutput(buf, doc, cur->children, encoding);
+        if ((cur->last->type != HTML_TEXT_NODE) &&
+	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
+	    (cur->children != cur->last))
+	    xmlOutputBufferWriteString(buf, "\n");
+    }
+    if (!htmlIsAutoClosed(doc, cur)) {
+	xmlOutputBufferWriteString(buf, "</");
+	xmlOutputBufferWriteString(buf, (const char *)cur->name);
+	xmlOutputBufferWriteString(buf, ">");
+    }
+    if (cur->next != NULL) {
+        if ((cur->next->type != HTML_TEXT_NODE) &&
+	    (cur->next->type != HTML_ENTITY_REF_NODE))
+	    xmlOutputBufferWriteString(buf, "\n");
+    }
+}
+
+/**
+ * htmlDocContentDump:
+ * @buf:  the HTML buffer output
+ * @cur:  the document
+ *
+ * Dump an HTML document.
+ */
+static void
+htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) {
+    int type;
+
+    /*
+     * force to output the stuff as HTML, especially for entities
+     */
+    type = cur->type;
+    cur->type = XML_HTML_DOCUMENT_NODE;
+    if (cur->intSubset != NULL)
+        htmlDtdDumpOutput(buf, cur, NULL);
+    else {
+	/* Default to HTML-4.0 transitionnal @@@@ */
+	xmlOutputBufferWriteString(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
+
+    }
+    if (cur->children != NULL) {
+        htmlNodeListDumpOutput(buf, cur, cur->children, encoding);
+    }
+    xmlOutputBufferWriteString(buf, "\n");
+    cur->type = (xmlElementType) type;
+}
+
+
+/************************************************************************
+ *									*
+ *		Saving functions front-ends				*
+ *									*
+ ************************************************************************/
+
 /**
  * htmlDocDump:
  * @f:  the FILE*
  * @cur:  the document
  *
  * Dump an HTML document to an open FILE.
+ *
+ * returns: the number of byte written or -1 in case of failure.
  */
-void
+int
 htmlDocDump(FILE *f, xmlDocPtr cur) {
-    xmlBufferPtr buf;
+    xmlOutputBufferPtr buf;
+    xmlCharEncodingHandlerPtr handler = NULL;
+    const char *encoding;
+    int ret;
 
     if (cur == NULL) {
 #ifdef DEBUG_TREE
         fprintf(stderr, "htmlDocDump : document == NULL\n");
 #endif
-	return;
+	return(-1);
     }
-    buf = xmlBufferCreate();
-    if (buf == NULL) return;
-    htmlDocContentDump(buf, cur);
-    xmlBufferDump(f, buf);
-    xmlBufferFree(buf);
+
+    encoding = (const char *) htmlGetMetaEncoding(cur);
+
+    if (encoding != NULL) {
+	xmlCharEncoding enc;
+
+	enc = xmlParseCharEncoding(encoding);
+	if (enc != cur->charset) {
+	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+		/*
+		 * Not supported yet
+		 */
+		return(-1);
+	    }
+
+	    handler = xmlFindCharEncodingHandler(encoding);
+	    if (handler == NULL)
+		return(-1);
+	}
+    }
+
+    /*
+     * Fallback to HTML or ASCII when the encoding is unspecified
+     */
+    if (handler == NULL)
+	handler = xmlFindCharEncodingHandler("HTML");
+    if (handler == NULL)
+	handler = xmlFindCharEncodingHandler("ascii");
+
+    buf = xmlOutputBufferCreateFile(f, handler);
+    if (buf == NULL) return(-1);
+    htmlDocContentDumpOutput(buf, cur, NULL);
+
+    ret = xmlOutputBufferClose(buf);
+    return(ret);
 }
 
 /**
  * htmlSaveFile:
- * @filename:  the filename
+ * @filename:  the filename (or URL)
  * @cur:  the document
  *
- * Dump an HTML document to a file.
- * 
+ * Dump an HTML document to a file. If @filename is "-" the stdout file is
+ * used.
  * returns: the number of byte written or -1 in case of failure.
  */
 int
 htmlSaveFile(const char *filename, xmlDocPtr cur) {
-    xmlBufferPtr buf;
-    FILE *output = NULL;
+    xmlOutputBufferPtr buf;
+    xmlCharEncodingHandlerPtr handler = NULL;
+    const char *encoding;
     int ret;
 
+    encoding = (const char *) htmlGetMetaEncoding(cur);
+
+    if (encoding != NULL) {
+	xmlCharEncoding enc;
+
+	enc = xmlParseCharEncoding(encoding);
+	if (enc != cur->charset) {
+	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+		/*
+		 * Not supported yet
+		 */
+		return(-1);
+	    }
+
+	    handler = xmlFindCharEncodingHandler(encoding);
+	    if (handler == NULL)
+		return(-1);
+	}
+    }
+
+    /*
+     * Fallback to HTML or ASCII when the encoding is unspecified
+     */
+    if (handler == NULL)
+	handler = xmlFindCharEncodingHandler("HTML");
+    if (handler == NULL)
+	handler = xmlFindCharEncodingHandler("ascii");
+
     /* 
      * save the content to a temp buffer.
      */
-    buf = xmlBufferCreate();
+    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
     if (buf == NULL) return(0);
-    htmlDocContentDump(buf, cur);
 
-    output = fopen(filename, "w");
-    if (output == NULL) return(-1);
-    ret = xmlBufferDump(output, buf);
-    fclose(output);
+    htmlDocContentDumpOutput(buf, cur, NULL);
 
-    xmlBufferFree(buf);
-    return(ret * sizeof(xmlChar));
+    ret = xmlOutputBufferClose(buf);
+    return(ret);
 }
 
+/**
+ * htmlSaveFileEnc:
+ * @filename:  the filename
+ * @cur:  the document
+ *
+ * Dump an HTML document to a file using a given encoding.
+ * 
+ * returns: the number of byte written or -1 in case of failure.
+ */
+int
+htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
+    xmlOutputBufferPtr buf;
+    xmlCharEncodingHandlerPtr handler = NULL;
+    int ret;
+
+    if (encoding != NULL) {
+	xmlCharEncoding enc;
+
+	enc = xmlParseCharEncoding(encoding);
+	if (enc != cur->charset) {
+	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+		/*
+		 * Not supported yet
+		 */
+		return(-1);
+	    }
+
+	    handler = xmlFindCharEncodingHandler(encoding);
+	    if (handler == NULL)
+		return(-1);
+            htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
+	}
+    }
+
+    /*
+     * Fallback to HTML or ASCII when the encoding is unspecified
+     */
+    if (handler == NULL)
+	handler = xmlFindCharEncodingHandler("HTML");
+    if (handler == NULL)
+	handler = xmlFindCharEncodingHandler("ascii");
+
+    /* 
+     * save the content to a temp buffer.
+     */
+    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
+    if (buf == NULL) return(0);
+
+    htmlDocContentDumpOutput(buf, cur, encoding);
+
+    ret = xmlOutputBufferClose(buf);
+    return(ret);
+}
 #endif /* LIBXML_HTML_ENABLED */
diff --git a/HTMLtree.h b/HTMLtree.h
index d41d8d9..feff3a4 100644
--- a/HTMLtree.h
+++ b/HTMLtree.h
@@ -23,12 +23,27 @@
 #define HTML_ENTITY_REF_NODE	XML_ENTITY_REF_NODE
 #define HTML_COMMENT_NODE	XML_COMMENT_NODE
 
-void htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size);
-void htmlDocDump(FILE *f, xmlDocPtr cur);
-int htmlSaveFile(const char *filename, xmlDocPtr cur);
-void htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
-void htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur);
-htmlDocPtr htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID);
+htmlDocPtr	htmlNewDoc		(const xmlChar *URI,
+					 const xmlChar *ExternalID);
+const xmlChar *	htmlGetMetaEncoding	(htmlDocPtr doc);
+int		htmlSetMetaEncoding	(htmlDocPtr doc,
+					 const xmlChar *encoding);
+void		htmlDocDumpMemory	(xmlDocPtr cur,
+					 xmlChar**mem,
+					 int *size);
+int		htmlDocDump		(FILE *f,
+					 xmlDocPtr cur);
+int		htmlSaveFile		(const char *filename,
+					 xmlDocPtr cur);
+void		htmlNodeDump		(xmlBufferPtr buf,
+					 xmlDocPtr doc,
+					 xmlNodePtr cur);
+void		htmlNodeDumpFile	(FILE *out,
+					 xmlDocPtr doc,
+					 xmlNodePtr cur);
+int		htmlSaveFileEnc		(const char *filename,
+					 xmlDocPtr cur,
+					 const char *encoding);
 
 #ifdef __cplusplus
 }
diff --git a/SAX.c b/SAX.c
index c352a04..80922fd 100644
--- a/SAX.c
+++ b/SAX.c
@@ -14,13 +14,14 @@
 #endif
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <libxml/xmlmemory.h>
 #include <libxml/tree.h>
 #include <libxml/parser.h>
 #include <libxml/parserInternals.h>
 #include <libxml/valid.h>
 #include <libxml/entities.h>
-#include "xml-error.h"
+#include <libxml/xml-error.h>
 #include <libxml/debugXML.h>
 #include <libxml/xmlIO.h>
 #include <libxml/SAX.h>
@@ -206,7 +207,7 @@
 	int oldwellFormed;
 	xmlParserInputPtr input = NULL;
 	xmlCharEncoding enc;
-	xmlCharEncoding oldcharset;
+	int oldcharset;
 
 	/*
 	 * Ask the Entity resolver to load the damn thing
@@ -426,10 +427,12 @@
     name = xmlSplitQName(ctxt, fullname, &prefix);
     if (ctxt->inSubset == 1)
 	attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem,
-                               name, prefix, type, def, defaultValue, tree);
+	       name, prefix, (xmlAttributeType) type,
+	       (xmlAttributeDefault) def, defaultValue, tree);
     else if (ctxt->inSubset == 2)
 	attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem,
-                               name, prefix, type, def, defaultValue, tree);
+	   name, prefix, (xmlAttributeType) type, 
+	   (xmlAttributeDefault) def, defaultValue, tree);
     else {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	    ctxt->sax->error(ctxt, 
@@ -470,10 +473,10 @@
     
     if (ctxt->inSubset == 1)
 	elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset,
-                             name, type, content);
+                             name, (xmlElementTypeVal) type, content);
     else if (ctxt->inSubset == 2)
 	elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset,
-                             name, type, content);
+                             name, (xmlElementTypeVal) type, content);
     else {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	    ctxt->sax->error(ctxt, 
diff --git a/SAX.h b/SAX.h
index a3bd102..3c0f4cb 100644
--- a/SAX.h
+++ b/SAX.h
@@ -16,7 +16,7 @@
 #include <libxml/xlink.h>
 
 #ifdef __cplusplus
-#define extern "C" {
+extern "C" {
 #endif
 const xmlChar *	getPublicId			(void *ctx);
 const xmlChar *	getSystemId			(void *ctx);
diff --git a/TODO b/TODO
index 8b1bc46..f9ef8ca 100644
--- a/TODO
+++ b/TODO
@@ -6,8 +6,6 @@
 TODO:
 =====
 
-- If the internal encoding is not UTF8 saving to a given encoding doesn't
-  work
 - problem when parsing hrefs with & with the HTML parser (IRC ac)
 - DOM needs
   xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
@@ -26,11 +24,8 @@
   http://www.w3.org/XML/xml-19980210-errata ... bummmer 
 - Handle undefined namespaces in entity contents better ... at least
   issue a warning
-- Issue warning when using non-absolute namespaces URI.
 - fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
   not WITHOUT_CORBA flag
-- the html parser should add <head> and <body> if they don't exist
-  started, not finished.
 
 TODO:
 =====
@@ -96,8 +91,16 @@
 Done:
 =====
 
+- If the internal encoding is not UTF8 saving to a given encoding doesn't
+  work => fix to force UTF8 encoding ...
+  done, added documentation too
+- Add an ASCII I/O encoder (asciiToUTF8 and UTF8Toascii)
+- Issue warning when using non-absolute namespaces URI.
+- the html parser should add <head> and <body> if they don't exist
+  started, not finished.
+  Done, the automatic closing is added and 3 testcases were inserted
 - Command to force the parser to stop parsing and ignore the rest of the file.
-  xmlStopParser() should allow this
+  xmlStopParser() should allow this, mostly untested
 - support for HTML empty attributes like <hr noshade>
 - plugged iconv() in for support of a large set of encodings.
 - xmlSwitchToEncoding() rewrite done
diff --git a/configure.in b/configure.in
index 7c25323..0ae4e05 100644
--- a/configure.in
+++ b/configure.in
@@ -4,8 +4,8 @@
 AM_CONFIG_HEADER(config.h)
 
 LIBXML_MAJOR_VERSION=2
-LIBXML_MINOR_VERSION=1
-LIBXML_MICRO_VERSION=1
+LIBXML_MINOR_VERSION=2
+LIBXML_MICRO_VERSION=0
 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
 LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
 
diff --git a/debugXML.c b/debugXML.c
index 88158d8..5b21110 100644
--- a/debugXML.c
+++ b/debugXML.c
@@ -17,6 +17,7 @@
 #ifdef LIBXML_DEBUG_ENABLED
 
 #include <stdio.h>
+#include <string.h>
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
@@ -39,6 +40,8 @@
     for (i = 0;i < 40;i++)
         if (str[i] == 0) return;
 	else if (IS_BLANK(str[i])) fputc(' ', output);
+	else if (str[i] >= 0x80)
+	     fprintf(output, "#%X", str[i]);
 	else fputc(str[i], output);
     fprintf(output, "...");
 }
@@ -221,9 +224,11 @@
 	fprintf(output, "PBM: not a Elem\n");
 	return;
     }
-    if (elem->name != NULL)
-	fprintf(output, "ELEMDECL(%s)", elem->name);
-    else
+    if (elem->name != NULL) {
+	fprintf(output, "ELEMDECL(");
+	xmlDebugDumpString(output, elem->name);
+	fprintf(output, ")");
+    } else
 	fprintf(output, "PBM ELEMDECL noname!!!");
     switch (elem->etype) {
 	case XML_ELEMENT_TYPE_EMPTY: 
@@ -288,9 +293,11 @@
 	fprintf(output, "PBM: not a Entity decl\n");
 	return;
     }
-    if (ent->name != NULL)
-	fprintf(output, "ENTITYDECL(%s)", ent->name);
-    else
+    if (ent->name != NULL) {
+	fprintf(output, "ENTITYDECL(");
+	xmlDebugDumpString(output, ent->name);
+	fprintf(output, ")");
+    } else
 	fprintf(output, "PBM ENTITYDECL noname!!!");
     switch (ent->etype) {
 	case XML_INTERNAL_GENERAL_ENTITY: 
@@ -434,7 +441,9 @@
 
     fprintf(output, shift);
 
-    fprintf(output, "ATTRIBUTE %s\n", attr->name);
+    fprintf(output, "ATTRIBUTE ");
+    xmlDebugDumpString(output, attr->name);
+    fprintf(output, "\n");
     if (attr->children != NULL) 
         xmlDebugDumpNodeList(output, attr->children, depth + 1);
 
@@ -479,10 +488,12 @@
 	case XML_ELEMENT_NODE:
 	    fprintf(output, shift);
 	    fprintf(output, "ELEMENT ");
-	    if (node->ns != NULL)
-	        fprintf(output, "%s:%s\n", node->ns->prefix, node->name);
-	    else
-	        fprintf(output, "%s\n", node->name);
+	    if (node->ns != NULL) {
+		xmlDebugDumpString(output, node->ns->prefix);
+	        fprintf(output, ":");
+	    }
+	    xmlDebugDumpString(output, node->name);
+	    fprintf(output, "\n");
 	    break;
 	case XML_ATTRIBUTE_NODE:
 	    fprintf(output, shift);
diff --git a/doc/html/book1.html b/doc/html/book1.html
index e066da6..e69de29 100644
--- a/doc/html/book1.html
+++ b/doc/html/book1.html
@@ -1,276 +0,0 @@
-<HTML
-><HEAD
-><TITLE
->Gnome XML Library Reference Manual</TITLE
-><META
-NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.33"><LINK
-REL="NEXT"
-TITLE="Libxml Programming Notes"
-HREF="libxml-notes.html"></HEAD
-><BODY
-BGCOLOR="#FFFFFF"
-TEXT="#000000"
-><DIV
-CLASS="BOOK"
-><DIV
-CLASS="TITLEPAGE"
-><TABLE
-WIDTH="100%"
-BORDER="0"
-BGCOLOR="#000000"
-CELLPADDING="1"
-CELLSPACING="0"
-><TR
-><TH
-ALIGN="center"
-VALIGN="center"
-><FONT
-COLOR="#FFFFFF"
-SIZE="7"
-><P
-CLASS="TITLE"
-><A
-NAME="AEN2"
->Gnome XML Library Reference Manual</A
-></P
-></FONT
-></TH
-></TR
-></TABLE
-><H3
-CLASS="AUTHOR"
->Daniel Veillard</H3
-><DIV
-CLASS="AFFILIATION"
-><DIV
-CLASS="ADDRESS"
-><P
-CLASS="LITERALLAYOUT"
->	&nbsp;&nbsp;&nbsp;&nbsp;Daniel.Veillard@w3.org<br>
-	&nbsp;&nbsp;</P
-></DIV
-></DIV
-><P
-CLASS="COPYRIGHT"
->Copyright © 1999 by <SPAN
-CLASS="HOLDER"
->Daniel Veillard</SPAN
-></P
-><DIV
-><DIV
-CLASS="ABSTRACT"
-><P
-></P
-><P
->This manual documents the interfaces of the libxml
-      library and has some short notes to help get you up to speed
-      with using the library.</P
-><P
-></P
-></DIV
-></DIV
-><DIV
-CLASS="LEGALNOTICE"
-><P
-></P
-><P
->Permission is granted to make and distribute verbatim
-      copies of this manual provided the copyright notice and this
-      permission notice are preserved on all copies.</P
-><P
->Permission is granted to copy and distribute modified
-      versions of this manual under the conditions for verbatim
-      copying, provided also that the entire resulting derived work is
-      distributed under the terms of a permission notice identical to
-      this one.</P
-><P
->Permission is granted to copy and distribute translations
-      of this manual into another language, under the above conditions
-      for modified versions.</P
-><P
-></P
-></DIV
-></DIV
-><DIV
-CLASS="TOC"
-><DL
-><DT
-><B
->Table of Contents</B
-></DT
-><DT
-><A
-HREF="libxml-notes.html"
->Libxml Programming Notes</A
-></DT
-><DT
-><A
-HREF="libxml-lib.html"
->Libxml Library Reference</A
-></DT
-><DD
-><DL
-><DT
-><A
-HREF="gnome-xml-parser.html"
->parser</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-sax.html"
->SAX</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-tree.html"
->tree</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-entities.html"
->entities</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-valid.html"
->valid</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-uri.html"
->uri</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-xml-error.html"
->xml-error</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-htmlparser.html"
->HTMLparser</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-htmltree.html"
->HTMLtree</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-xpath.html"
->xpath</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-nanohttp.html"
->nanohttp</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-nanoftp.html"
->nanoftp</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-xmlio.html"
->xmlIO</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-parserinternals.html"
->parserInternals</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-encoding.html"
->encoding</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-debugxml.html"
->debugXML</A
-> &#8212; </DT
-><DT
-><A
-HREF="gnome-xml-xmlmemory.html"
->xmlmemory</A
-> &#8212; </DT
-></DL
-></DD
-></DL
-></DIV
-></DIV
-><DIV
-CLASS="NAVFOOTER"
-><BR
-CLEAR="all"><BR><TABLE
-WIDTH="100%"
-BORDER="0"
-BGCOLOR="#000000"
-CELLPADDING="1"
-CELLSPACING="0"
-><TR
-><TD
-WIDTH="25%"
-BGCOLOR="#C00000"
-ALIGN="left"
->&nbsp;</TD
-><TD
-WIDTH="25%"
-BGCOLOR="#0000C0"
-ALIGN="center"
-><FONT
-COLOR="#FFFFFF"
-SIZE="3"
-><B
->&nbsp;</B
-></FONT
-></TD
-><TD
-WIDTH="25%"
-BGCOLOR="#00C000"
-ALIGN="center"
-><FONT
-COLOR="#FFFFFF"
-SIZE="3"
-><B
->&nbsp;</B
-></FONT
-></TD
-><TD
-WIDTH="25%"
-BGCOLOR="#C00000"
-ALIGN="right"
-><A
-HREF="libxml-notes.html"
-><FONT
-COLOR="#FFFFFF"
-SIZE="3"
-><B
->Next Page &#62;&#62;&#62;</B
-></FONT
-></A
-></TD
-></TR
-><TR
-><TD
-COLSPAN="2"
-ALIGN="left"
->&nbsp;</TD
-><TD
-COLSPAN="2"
-ALIGN="right"
-><FONT
-COLOR="#FFFFFF"
-SIZE="3"
-><B
->Libxml Programming Notes</B
-></FONT
-></TD
-></TR
-></TABLE
-></DIV
-></BODY
-></HTML
->
\ No newline at end of file
diff --git a/doc/html/libxml-notes.html b/doc/html/libxml-notes.html
index 985cbef..9c59104 100644
--- a/doc/html/libxml-notes.html
+++ b/doc/html/libxml-notes.html
@@ -4,7 +4,7 @@
 >Libxml Programming Notes</TITLE
 ><META
 NAME="GENERATOR"
-CONTENT="Modular DocBook HTML Stylesheet Version 1.33"><LINK
+CONTENT="Modular DocBook HTML Stylesheet Version 1.52"><LINK
 REL="HOME"
 TITLE="Gnome XML Library Reference Manual"
 HREF="book1.html"><LINK
@@ -15,8 +15,12 @@
 TITLE="Libxml Library Reference"
 HREF="libxml-lib.html"></HEAD
 ><BODY
+CLASS="CHAPTER"
 BGCOLOR="#FFFFFF"
 TEXT="#000000"
+LINK="#0000FF"
+VLINK="#840084"
+ALINK="#0000FF"
 ><DIV
 CLASS="NAVHEADER"
 ><TABLE
diff --git a/encoding.c b/encoding.c
index dd36720..3d99734 100644
--- a/encoding.c
+++ b/encoding.c
@@ -43,6 +43,9 @@
 #endif
 #include <libxml/encoding.h>
 #include <libxml/xmlmemory.h>
+#ifdef LIBXML_HTML_ENABLED
+#include <libxml/HTMLparser.h>
+#endif
 
 xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
 xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
@@ -178,6 +181,140 @@
 }
 
 /**
+ * asciiToUTF8:
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of ASCII chars
+ * @inlen:  the length of @in
+ *
+ * Take a block of ASCII chars in and try to convert it to an UTF-8
+ * block of chars out.
+ * Returns 0 if success, or -1 otherwise
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
+ */
+int
+asciiToUTF8(unsigned char* out, int *outlen,
+              const unsigned char* in, int *inlen) {
+    unsigned char* outstart = out;
+    const unsigned char* base = in;
+    const unsigned char* processed = in;
+    unsigned char* outend = out + *outlen;
+    const unsigned char* inend;
+    unsigned int c;
+    int bits;
+
+    inend = in + (*inlen);
+    while ((in < inend) && (out - outstart + 5 < *outlen)) {
+	c= *in++;
+
+	/* assertion: c is a single UTF-4 value */
+        if (out >= outend)
+	    break;
+        if      (c <    0x80) {  *out++=  c;                bits= -6; }
+        else { 
+	    *outlen = out - outstart;
+	    *inlen = processed - base;
+	    return(-1);
+	}
+ 
+        for ( ; bits >= 0; bits-= 6) {
+            if (out >= outend)
+	        break;
+            *out++= ((c >> bits) & 0x3F) | 0x80;
+        }
+	processed = (const unsigned char*) in;
+    }
+    *outlen = out - outstart;
+    *inlen = processed - base;
+    return(0);
+}
+
+/**
+ * UTF8Toascii:
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of UTF-8 chars
+ * @inlen:  the length of @in
+ *
+ * Take a block of UTF-8 chars in and try to convert it to an ASCII
+ * block of chars out.
+ *
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
+ */
+int
+UTF8Toascii(unsigned char* out, int *outlen,
+              const unsigned char* in, int *inlen) {
+    const unsigned char* processed = in;
+    const unsigned char* outend;
+    const unsigned char* outstart = out;
+    const unsigned char* instart = in;
+    const unsigned char* inend;
+    unsigned int c, d;
+    int trailing;
+
+    if (in == NULL) {
+        /*
+	 * initialization nothing to do
+	 */
+	*outlen = 0;
+	*inlen = 0;
+	return(0);
+    }
+    inend = in + (*inlen);
+    outend = out + (*outlen);
+    while (in < inend) {
+	d = *in++;
+	if      (d < 0x80)  { c= d; trailing= 0; }
+	else if (d < 0xC0) {
+	    /* trailing byte in leading position */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
+        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
+	else {
+	    /* no chance for this in Ascii */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+	}
+
+	if (inend - in < trailing) {
+	    break;
+	} 
+
+	for ( ; trailing; trailing--) {
+	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
+		break;
+	    c <<= 6;
+	    c |= d & 0x3F;
+	}
+
+	/* assertion: c is a single UTF-4 value */
+	if (c < 0x80) {
+	    if (out >= outend)
+		break;
+	    *out++ = c;
+	} else {
+	    /* no chance for this in Ascii */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+	}
+	processed = in;
+    }
+    *outlen = out - outstart;
+    *inlen = processed - instart;
+    return(0);
+}
+
+/**
  * isolat1ToUTF8:
  * @out:  a pointer to an array of bytes to store the result
  * @outlen:  the length of @out
@@ -195,28 +332,32 @@
 isolat1ToUTF8(unsigned char* out, int *outlen,
               const unsigned char* in, int *inlen) {
     unsigned char* outstart = out;
+    const unsigned char* base = in;
     const unsigned char* processed = in;
     unsigned char* outend = out + *outlen;
-    const unsigned char* inend = in + *inlen;
-    unsigned char c;
+    const unsigned char* inend;
+    unsigned int c;
+    int bits;
 
-    while (in < inend) {
-        c= *in++;
-        if (c < 0x80) {
+    inend = in + (*inlen);
+    while ((in < inend) && (out - outstart + 5 < *outlen)) {
+	c= *in++;
+
+	/* assertion: c is a single UTF-4 value */
+        if (out >= outend)
+	    break;
+        if      (c <    0x80) {  *out++=  c;                bits= -6; }
+        else                  {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+ 
+        for ( ; bits >= 0; bits-= 6) {
             if (out >= outend)
-		break;
-            *out++ = c;
+	        break;
+            *out++= ((c >> bits) & 0x3F) | 0x80;
         }
-        else {
-            if (out + 1 >= outend)  break;
-            *out++ = 0xC0 | (c >> 6);
-            *out++ = 0x80 | (0x3F & c);
-        }
-	processed = in;
+	processed = (const unsigned char*) in;
     }
     *outlen = out - outstart;
-    *inlen = processed - in;
-
+    *inlen = processed - base;
     return(0);
 }
 
@@ -229,7 +370,6 @@
  *
  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
  * block of chars out.
- * TODO: UTF8Toisolat1 need a fallback mechanism ...
  *
  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
  * The value of @inlen after return is the number of octets consumed
@@ -239,34 +379,68 @@
 int
 UTF8Toisolat1(unsigned char* out, int *outlen,
               const unsigned char* in, int *inlen) {
-    unsigned char* outstart = out;
     const unsigned char* processed = in;
-    unsigned char* outend = out + *outlen;
-    const unsigned char* inend = in + *inlen;
-    unsigned char c;
+    const unsigned char* outend;
+    const unsigned char* outstart = out;
+    const unsigned char* instart = in;
+    const unsigned char* inend;
+    unsigned int c, d;
+    int trailing;
 
+    if (in == NULL) {
+        /*
+	 * initialization nothing to do
+	 */
+	*outlen = 0;
+	*inlen = 0;
+	return(0);
+    }
+    inend = in + (*inlen);
+    outend = out + (*outlen);
     while (in < inend) {
-        c= *in++;
-        if (c < 0x80) {
-            if (out >= outend)  return(-1);
-            *out++= c;
-        }
-	else if (in == inend) {
-            break;
-	}
-	else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
-	    /* a two byte utf-8 and can be encoding as isolate1 */
-            *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
-	}
-	else {
+	d = *in++;
+	if      (d < 0x80)  { c= d; trailing= 0; }
+	else if (d < 0xC0) {
+	    /* trailing byte in leading position */
 	    *outlen = out - outstart;
-	    *inlen = processed - in;
+	    *inlen = processed - instart;
+	    return(-2);
+        } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+        else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
+        else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
+	else {
+	    /* no chance for this in IsoLat1 */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
+	    return(-2);
+	}
+
+	if (inend - in < trailing) {
+	    break;
+	} 
+
+	for ( ; trailing; trailing--) {
+	    if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
+		break;
+	    c <<= 6;
+	    c |= d & 0x3F;
+	}
+
+	/* assertion: c is a single UTF-4 value */
+	if (c <= 0xFF) {
+	    if (out >= outend)
+		break;
+	    *out++ = c;
+	} else {
+	    /* no chance for this in IsoLat1 */
+	    *outlen = out - outstart;
+	    *inlen = processed - instart;
 	    return(-2);
 	}
 	processed = in;
     }
     *outlen = out - outstart;
-    *inlen = processed - in;
+    *inlen = processed - instart;
     return(0);
 }
 
@@ -367,7 +541,6 @@
  *
  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
  * block of chars out.
- * TODO: UTF8ToUTF16LE need a fallback mechanism ...
  *
  * Returns the number of byte written, or -1 by lack of space, or -2
  *     if the transcoding failed. 
@@ -410,7 +583,7 @@
       if      (d < 0x80)  { c= d; trailing= 0; }
       else if (d < 0xC0) {
           /* trailing byte in leading position */
-	  *outlen = out - outstart;
+	  *outlen = (out - outstart) * 2;
 	  *inlen = processed - in;
 	  return(-2);
       } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
@@ -418,7 +591,7 @@
       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
       else {
 	/* no chance for this in UTF-16 */
-	*outlen = out - outstart;
+	*outlen = (out - outstart) * 2;
 	*inlen = processed - in;
 	return(-2);
       }
@@ -578,7 +751,6 @@
  *
  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
  * block of chars out.
- * TODO: UTF8ToUTF16BE need a fallback mechanism ...
  *
  * Returns the number of byte written, or -1 by lack of space, or -2
  *     if the transcoding failed. 
@@ -861,6 +1033,8 @@
             return("Shift-JIS");
         case XML_CHAR_ENCODING_EUC_JP:
             return("EUC-JP");
+        case XML_CHAR_ENCODING_ASCII:
+            return("ASCII");
     }
     return(NULL);
 }
@@ -974,6 +1148,10 @@
     xmlUTF16BEHandler = 
           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
+    xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
+#ifdef LIBXML_HTML_ENABLED
+    xmlNewCharEncodingHandler("HTML", NULL, UTF8ToHtml);
+#endif
 }
 
 /**
@@ -1081,16 +1259,51 @@
             handler = xmlFindCharEncodingHandler("UCS2");
             if (handler != NULL) return(handler);
 	    break;
+
+	    /*
+	     * We used to keep ISO Latin encodings native in the
+	     * generated data. This led to so many problems that
+	     * this has been removed. One can still change this
+	     * back by registering no-ops encoders for those
+	     */
         case XML_CHAR_ENCODING_8859_1:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-1");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_2:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-2");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_3:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-3");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_4:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-4");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_5:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-5");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_6:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-6");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_7:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-7");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_8:
+	    handler = xmlFindCharEncodingHandler("ISO-8859-8");
+	    if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_9:
-	    return(NULL);
+	    handler = xmlFindCharEncodingHandler("ISO-8859-9");
+	    if (handler != NULL) return(handler);
+	    break;
+
+
         case XML_CHAR_ENCODING_2022_JP:
             handler = xmlFindCharEncodingHandler("ISO-2022-JP");
             if (handler != NULL) return(handler);
@@ -1161,7 +1374,8 @@
     icv_in = iconv_open("UTF-8", name);
     icv_out = iconv_open(name, "UTF-8");
     if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
-	    enc = xmlMalloc(sizeof(xmlCharEncodingHandler));
+	    enc = (xmlCharEncodingHandlerPtr)
+	          xmlMalloc(sizeof(xmlCharEncodingHandler));
 	    if (enc == NULL) {
 	        iconv_close(icv_in);
 	        iconv_close(icv_out);
@@ -1506,6 +1720,10 @@
 	if (ret == -1) ret = -3;
     }
 #endif /* LIBXML_ICONV_ENABLED */
+    else {
+	fprintf(stderr, "xmlCharEncOutFunc: no output function !\n");
+	return(-1);
+    }
 
     if (ret >= 0) output += ret;
 
@@ -1528,7 +1746,7 @@
 #endif
         case -2: {
 	    int len = in->use;
-	    const char *utf = (const char *) in->content;
+	    const xmlChar *utf = (const xmlChar *) in->content;
 	    int cur;
 
 	    cur = xmlGetUTF8Char(utf, &len);
@@ -1546,7 +1764,7 @@
 		 * and continue the transcoding phase, hoping the error
 		 * did not mangle the encoder state.
 		 */
-		sprintf(charref, "&#x%X;", cur);
+		sprintf((char *) charref, "&#x%X;", cur);
 		xmlBufferShrink(in, len);
 		xmlBufferAddHead(in, charref, -1);
 
diff --git a/encoding.h b/encoding.h
index ce0ab75..5b6af9f 100644
--- a/encoding.h
+++ b/encoding.h
@@ -70,7 +70,8 @@
     XML_CHAR_ENCODING_8859_9=	18,/* ISO-8859-9 */
     XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
     XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
-    XML_CHAR_ENCODING_EUC_JP=   21 /* EUC-JP */
+    XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
+    XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
 } xmlCharEncoding;
 
 /**
diff --git a/entities.c b/entities.c
index c541d67..7505943 100644
--- a/entities.c
+++ b/entities.c
@@ -128,7 +128,7 @@
      * fill the structure.
      */
     ret->name = xmlStrdup(name);
-    ret->etype = type;
+    ret->etype = (xmlEntityType) type;
     if (ExternalID != NULL)
 	ret->ExternalID = xmlStrdup(ExternalID);
     if (SystemID != NULL)
@@ -754,9 +754,6 @@
  * Contrary to xmlEncodeEntities, this routine is reentrant, and result
  * must be deallocated.
  *
- * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii
- *           get erroneous.
- *
  * Returns A newly allocated string with the substitution done.
  */
 xmlChar *
@@ -832,20 +829,7 @@
 	     */
 	    *out++ = *cur;
 	} else if (*cur >= 0x80) {
-	    if (html) {
-		char buf[15], *ptr;
-
-		/*
-		 * TODO: improve by searching in html40EntitiesTable
-		 */
-#ifdef HAVE_SNPRINTF
-		snprintf(buf, 9, "&#%d;", *cur);
-#else
-		sprintf(buf, "&#%d;", *cur);
-#endif
-		ptr = buf;
-		while (*ptr != 0) *out++ = *ptr++;
-	    } else if (doc->encoding != NULL) {
+	    if ((doc->encoding != NULL) || (html)) {
 		/*
 		 * TODO !!!
 		 */
@@ -900,6 +884,7 @@
 #else
 		    sprintf(buf, "&#%d;", *cur);
 #endif
+		    buf[9] = 0;
 		    ptr = buf;
 		    while (*ptr != 0) *out++ = *ptr++;
 		    cur++;
@@ -909,11 +894,11 @@
 		 * We could do multiple things here. Just save as a char ref
 		 */
 #ifdef HAVE_SNPRINTF
-		snprintf(buf, 14, "&#x%X;", val);
+		snprintf(buf, 9, "&#x%X;", val);
 #else
 		sprintf(buf, "&#x%X;", val);
 #endif
-		buf[14] = 0;
+		buf[9] = 0;
 		ptr = buf;
 		while (*ptr != 0) *out++ = *ptr++;
 		cur += l;
@@ -927,6 +912,7 @@
 #else
 	    sprintf(buf, "&#%d;", *cur);
 #endif
+	    buf[9] = 0;
             ptr = buf;
 	    while (*ptr != 0) *out++ = *ptr++;
 	}
diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h
index 44d9c27..b04e3b0 100644
--- a/include/libxml/HTMLparser.h
+++ b/include/libxml/HTMLparser.h
@@ -81,6 +81,10 @@
 					 void *userData);
 htmlDocPtr		htmlParseFile	(const char *filename,
 					 const char *encoding);
+int			UTF8ToHtml	(unsigned char* out,
+					 int *outlen,
+					 const unsigned char* in,
+					 int *inlen);
 
 /**
  * Interfaces for the Push mode
diff --git a/include/libxml/HTMLtree.h b/include/libxml/HTMLtree.h
index d41d8d9..feff3a4 100644
--- a/include/libxml/HTMLtree.h
+++ b/include/libxml/HTMLtree.h
@@ -23,12 +23,27 @@
 #define HTML_ENTITY_REF_NODE	XML_ENTITY_REF_NODE
 #define HTML_COMMENT_NODE	XML_COMMENT_NODE
 
-void htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size);
-void htmlDocDump(FILE *f, xmlDocPtr cur);
-int htmlSaveFile(const char *filename, xmlDocPtr cur);
-void htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
-void htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur);
-htmlDocPtr htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID);
+htmlDocPtr	htmlNewDoc		(const xmlChar *URI,
+					 const xmlChar *ExternalID);
+const xmlChar *	htmlGetMetaEncoding	(htmlDocPtr doc);
+int		htmlSetMetaEncoding	(htmlDocPtr doc,
+					 const xmlChar *encoding);
+void		htmlDocDumpMemory	(xmlDocPtr cur,
+					 xmlChar**mem,
+					 int *size);
+int		htmlDocDump		(FILE *f,
+					 xmlDocPtr cur);
+int		htmlSaveFile		(const char *filename,
+					 xmlDocPtr cur);
+void		htmlNodeDump		(xmlBufferPtr buf,
+					 xmlDocPtr doc,
+					 xmlNodePtr cur);
+void		htmlNodeDumpFile	(FILE *out,
+					 xmlDocPtr doc,
+					 xmlNodePtr cur);
+int		htmlSaveFileEnc		(const char *filename,
+					 xmlDocPtr cur,
+					 const char *encoding);
 
 #ifdef __cplusplus
 }
diff --git a/include/libxml/SAX.h b/include/libxml/SAX.h
index a3bd102..3c0f4cb 100644
--- a/include/libxml/SAX.h
+++ b/include/libxml/SAX.h
@@ -16,7 +16,7 @@
 #include <libxml/xlink.h>
 
 #ifdef __cplusplus
-#define extern "C" {
+extern "C" {
 #endif
 const xmlChar *	getPublicId			(void *ctx);
 const xmlChar *	getSystemId			(void *ctx);
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index ce0ab75..5b6af9f 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -70,7 +70,8 @@
     XML_CHAR_ENCODING_8859_9=	18,/* ISO-8859-9 */
     XML_CHAR_ENCODING_2022_JP=  19,/* ISO-2022-JP */
     XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */
-    XML_CHAR_ENCODING_EUC_JP=   21 /* EUC-JP */
+    XML_CHAR_ENCODING_EUC_JP=   21,/* EUC-JP */
+    XML_CHAR_ENCODING_ASCII=    22 /* pure ASCII */
 } xmlCharEncoding;
 
 /**
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index e7e6fa0..f0f7561 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -572,6 +572,16 @@
 						 xmlParserInputPtr value);
 xmlParserInputPtr	inputPop		(xmlParserCtxtPtr ctxt);
 
+/*
+ * Really core function shared with HTML parser
+ */
+int			xmlCurrentChar		(xmlParserCtxtPtr ctxt,
+						 int *len);
+int			xmlCopyChar		(int len,
+						 xmlChar *out,
+						 int val);
+void			xmlNextChar		(xmlParserCtxtPtr ctxt);
+void			xmlParserInputShrink	(xmlParserInputPtr in);
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index 6c68dc3..00f4ee6 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -549,6 +549,8 @@
 					 const xmlChar *value);
 xmlChar *	xmlGetProp		(xmlNodePtr node,
 					 const xmlChar *name);
+xmlAttrPtr	xmlHasProp		(xmlNodePtr node,
+					 const xmlChar *name);
 xmlChar *	xmlGetNsProp		(xmlNodePtr node,
 					 const xmlChar *name,
 					 const xmlChar *nameSpace);
diff --git a/include/libxml/xlink.h b/include/libxml/xlink.h
index 68a35fe..37a5415 100644
--- a/include/libxml/xlink.h
+++ b/include/libxml/xlink.h
@@ -16,7 +16,7 @@
 #include <libxml/tree.h>
 
 #ifdef __cplusplus
-#define extern "C" {
+extern "C" {
 #endif
 /**
  * Various defines for the various Link properties.
diff --git a/nanoftp.c b/nanoftp.c
index 8d490e0..96fa272 100644
--- a/nanoftp.c
+++ b/nanoftp.c
@@ -4,12 +4,22 @@
  *  Reference: RFC 959
  */
 
+#ifdef TESTING
+#define STANDALONE
+#define HAVE_STDLIB_H
+#define HAVE_UNISTD_H
+#define HAVE_SYS_SOCKET_H
+#define HAVE_NETINET_IN_H
+#define HAVE_NETDB_H
+#define HAVE_SYS_TIME_H
+#else /* STANDALONE */
 #ifdef WIN32
 #define INCLUDE_WINSOCK
 #include "win32config.h"
 #else
 #include "config.h"
 #endif
+#endif /* STANDALONE */
 
 #include "xmlversion.h"
 
diff --git a/nanohttp.c b/nanohttp.c
index 33d0b7a..51f1e92 100644
--- a/nanohttp.c
+++ b/nanohttp.c
@@ -656,7 +656,7 @@
     }
 
     if ( FD_ISSET(s, &wfd) ) {
-	socklen_t len;
+	unsigned int len; /* was socklen_t barfed on some systems :-( */
 	len = sizeof(status);
 	if (getsockopt(s, SOL_SOCKET, SO_ERROR, &status, &len) < 0 ) {
 	    /* Solaris error code */
diff --git a/parser.c b/parser.c
index 974dc8d..1bcc7e6 100644
--- a/parser.c
+++ b/parser.c
@@ -13,7 +13,7 @@
 #endif
 
 #include <stdio.h>
-#include <string.h> /* for memset() only */
+#include <string.h>
 #ifdef HAVE_CTYPE_H
 #include <ctype.h>
 #endif
@@ -306,7 +306,7 @@
 scope int name##Push(xmlParserCtxtPtr ctxt, type value) {		\
     if (ctxt->name##Nr >= ctxt->name##Max) {				\
 	ctxt->name##Max *= 2;						\
-        ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab,		\
+        ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,		\
 	             ctxt->name##Max * sizeof(ctxt->name##Tab[0]));	\
         if (ctxt->name##Tab == NULL) {					\
 	    fprintf(stderr, "realloc failed !\n");			\
@@ -337,7 +337,7 @@
 int spacePush(xmlParserCtxtPtr ctxt, int val) {
     if (ctxt->spaceNr >= ctxt->spaceMax) {
 	ctxt->spaceMax *= 2;
-        ctxt->spaceTab = (void *) xmlRealloc(ctxt->spaceTab,
+        ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
 	             ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
         if (ctxt->spaceTab == NULL) {
 	    fprintf(stderr, "realloc failed !\n");
@@ -449,7 +449,7 @@
      *   the single character #xA. 
      */
     if (ctxt->token != 0) ctxt->token = 0;
-    else {
+    else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
 	if ((*ctxt->input->cur == 0) &&
 	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
 	    (ctxt->instate != XML_PARSER_COMMENT)) {
@@ -540,9 +540,16 @@
 	    if (*ctxt->input->cur == 0)
 		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
 	}
+    } else {
+	ctxt->input->cur++;
+	ctxt->nbChars++;
+	if (*ctxt->input->cur == 0)
+	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
     }
-    if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
-    if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
+    if ((*ctxt->input->cur == '%') && (!ctxt->html))
+	xmlParserHandlePEReference(ctxt);
+    if ((*ctxt->input->cur == '&')&& (!ctxt->html))
+	xmlParserHandleReference(ctxt);
     if ((*ctxt->input->cur == 0) &&
         (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
 	    xmlPopInput(ctxt);
@@ -2373,6 +2380,10 @@
 		/* let's assume it's UTF-8 without the XML decl */
 		ctxt->charset = XML_CHAR_ENCODING_UTF8;
 		return(0);
+	    case XML_CHAR_ENCODING_ASCII:
+		/* default encoding, no conversion should be needed */
+		ctxt->charset = XML_CHAR_ENCODING_UTF8;
+		return(0);
 	    case XML_CHAR_ENCODING_UTF8:
 		/* default encoding, no conversion should be needed */
 		ctxt->charset = XML_CHAR_ENCODING_UTF8;
@@ -2427,7 +2438,10 @@
 	    case XML_CHAR_ENCODING_8859_8:
 	    case XML_CHAR_ENCODING_8859_9:
 		/*
-		 * Keep the internal content in the document encoding
+		 * We used to keep the internal content in the
+		 * document encoding however this turns being unmaintainable
+		 * So xmlGetCharEncodingHandler() will return non-null
+		 * values for this now.
 		 */
 		if ((ctxt->inputNr == 1) &&
 		    (ctxt->encoding == NULL) &&
@@ -2625,7 +2639,7 @@
     xmlChar *ret;
     
     if ((cur == NULL) || (len < 0)) return(NULL);
-    ret = xmlMalloc((len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         fprintf(stderr, "malloc of %ld byte failed\n",
 	        (len + 1) * (long)sizeof(xmlChar));
@@ -2671,7 +2685,7 @@
     xmlChar *ret;
     
     if ((cur == NULL) || (len < 0)) return(NULL);
-    ret = xmlMalloc((len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         fprintf(stderr, "malloc of %ld byte failed\n",
 	        (len + 1) * (long)sizeof(xmlChar));
@@ -2872,7 +2886,7 @@
         return(xmlStrndup(add, len));
 
     size = xmlStrlen(cur);
-    ret = xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
 	        (size + len + 1) * (long)sizeof(xmlChar));
@@ -3113,7 +3127,7 @@
  * @name:  an XML parser context
  * @prefix:  a xmlChar ** 
  *
- * parse an XML qualified name string
+ * parse an UTF8 encoded XML qualified name string
  *
  * [NS 5] QName ::= (Prefix ':')? LocalPart
  *
@@ -3131,7 +3145,7 @@
     int len = 0;
     xmlChar *ret = NULL;
     const xmlChar *cur = name;
-    int c,l;
+    int c;
 
     *prefix = NULL;
 
@@ -3144,36 +3158,23 @@
     if (cur[0] == ':')
 	return(xmlStrdup(name));
 
-    c = CUR_SCHAR(cur, l);
-    if (!IS_LETTER(c) && (c != '_')) return(NULL);
-
-    while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
-           (c == '.') || (c == '-') ||
-	   (c == '_') ||
-	   (IS_COMBINING(c)) ||
-	   (IS_EXTENDER(c))) {
-	COPY_BUF(l,buf,len,c);
-	cur += l;
-	c = CUR_SCHAR(cur, l);
+    c = *cur++;
+    while ((c != 0) && (c != ':')) {
+	buf[len++] = c;
+	c = *cur++;
     }
     
     ret = xmlStrndup(buf, len);
 
     if (c == ':') {
-	cur += l;
-	c = CUR_SCHAR(cur, l);
-	if (!IS_LETTER(c) && (c != '_')) return(ret);
+	c = *cur++;
+	if (c == 0) return(ret);
         *prefix = ret;
 	len = 0;
 
-	while ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
-	       (c == '.') || (c == '-') ||
-	       (c == '_') ||
-	       (IS_COMBINING(c)) ||
-	       (IS_EXTENDER(c))) {
-	    COPY_BUF(l,buf,len,c);
-	    cur += l;
-	    c = CUR_SCHAR(cur, l);
+	while (c != 0) {
+	    buf[len++] = c;
+	    c = *cur++;
 	}
 	
 	ret = xmlStrndup(buf, len);
@@ -3181,6 +3182,7 @@
 
     return(ret);
 }
+
 /**
  * xmlNamespaceParseNSDef:
  * @ctxt:  an XML parser context
@@ -3237,7 +3239,7 @@
 	while (IS_CHAR(c) && (c != '"')) {
 	    if (len + 5 >= size) {
 		size *= 2;
-		buf = xmlRealloc(buf, size * sizeof(xmlChar));
+		buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 		if (buf == NULL) {
 		    fprintf(stderr, "realloc of %d byte failed\n", size);
 		    return(NULL);
@@ -3263,7 +3265,7 @@
 	while (IS_CHAR(c) && (c != '\'')) {
 	    if (len + 1 >= size) {
 		size *= 2;
-		buf = xmlRealloc(buf, size * sizeof(xmlChar));
+		buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 		if (buf == NULL) {
 		    fprintf(stderr, "realloc of %d byte failed\n", size);
 		    return(NULL);
@@ -3675,7 +3677,7 @@
     while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) {
 	if (len + 5 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
 		return(NULL);
@@ -3841,7 +3843,18 @@
     c = CUR_CHAR(l);
     while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
 	if (c == 0) break;
-        if ((c == '&') && (NXT(1) == '#')) {
+	if (ctxt->token == '&') {
+	    static xmlChar buffer[6] = "&#38;";
+
+	    if (len > buf_size - 10) {
+		growBuffer(buf);
+	    }
+	    current = &buffer[0];
+	    while (*current != 0) {
+		buf[len++] = *current++;
+	    }
+	    ctxt->token = 0;
+	} else if ((c == '&') && (NXT(1) == '#')) {
 	    int val = xmlParseCharRef(ctxt);
 	    COPY_BUF(l,buf,len,val);
 	    NEXTL(l);
@@ -3978,10 +3991,10 @@
     while ((IS_CHAR(cur)) && (cur != stop)) {
 	if (len + 5 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
-		ctxt->instate = state;
+		ctxt->instate = (xmlParserInputState) state;
 		return(NULL);
 	    }
 	}
@@ -3995,7 +4008,7 @@
 	}
     }
     buf[len] = 0;
-    ctxt->instate = state;
+    ctxt->instate = (xmlParserInputState) state;
     if (!IS_CHAR(cur)) {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	    ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
@@ -4052,7 +4065,7 @@
     while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
 	if (len + 1 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
 		return(NULL);
@@ -4324,7 +4337,7 @@
 	}
 	if (len + 5 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
 		ctxt->instate = state;
@@ -4502,7 +4515,7 @@
 		   ((cur != '?') || (NXT(1) != '>'))) {
 		if (len + 5 >= size) {
 		    size *= 2;
-		    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+		    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 		    if (buf == NULL) {
 			fprintf(stderr, "realloc of %d byte failed\n", size);
 			ctxt->instate = state;
@@ -7774,7 +7787,7 @@
            ((r != ']') || (s != ']') || (cur != '>'))) {
 	if (len + 5 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
 		return;
@@ -8099,7 +8112,7 @@
 	   (cur == ':') || (cur == '-')) {
 	if (len + 1 >= size) {
 	    size *= 2;
-	    buf = xmlRealloc(buf, size * sizeof(xmlChar));
+	    buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 	    if (buf == NULL) {
 		fprintf(stderr, "realloc of %d byte failed\n", size);
 		return(NULL);
@@ -8222,7 +8235,7 @@
 	       (cur == '-')) {
 	    if (len + 1 >= size) {
 		size *= 2;
-		buf = xmlRealloc(buf, size * sizeof(xmlChar));
+		buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
 		if (buf == NULL) {
 		    fprintf(stderr, "realloc of %d byte failed\n", size);
 		    return(NULL);
@@ -8345,7 +8358,9 @@
 		    xmlSwitchToEncoding(ctxt, handler);
 		} else {
 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-		    xmlFree(encoding);
+		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+			ctxt->sax->error(ctxt->userData,
+			     "Unsupported encoding %s\n", encoding);
 		    return(NULL);
 		}
 	    }
diff --git a/parserInternals.h b/parserInternals.h
index e7e6fa0..f0f7561 100644
--- a/parserInternals.h
+++ b/parserInternals.h
@@ -572,6 +572,16 @@
 						 xmlParserInputPtr value);
 xmlParserInputPtr	inputPop		(xmlParserCtxtPtr ctxt);
 
+/*
+ * Really core function shared with HTML parser
+ */
+int			xmlCurrentChar		(xmlParserCtxtPtr ctxt,
+						 int *len);
+int			xmlCopyChar		(int len,
+						 xmlChar *out,
+						 int val);
+void			xmlNextChar		(xmlParserCtxtPtr ctxt);
+void			xmlParserInputShrink	(xmlParserInputPtr in);
 #ifdef __cplusplus
 }
 #endif
diff --git a/result/HTML/fp40.htm b/result/HTML/fp40.htm
index 15bc076..95a5187 100644
--- a/result/HTML/fp40.htm
+++ b/result/HTML/fp40.htm
@@ -8,7 +8,7 @@
 <body>
 <font face="Verdana">
 <h1><a name="top">Microsoft FrontPage 2000 Server Extensions, UNIX</a></h1>
-<font size="2"><i>&#169; Copyright Microsoft Corporation, 1999&#160;</i></font>
+<font size="2"><i>&copy; Copyright Microsoft Corporation, 1999&nbsp;</i></font>
 <p>The FrontPage Server Extensions are a set of programs on the Web server that support: 
 
 </p>
@@ -17,11 +17,11 @@
 <li>Administering FrontPage webs</li>
 <li>Browse-time FrontPage web functionality</li>
 </ul>
-<h2>Contents&#160;</h2>
+<h2>Contents&nbsp;</h2>
 <a href="#relnotes">Release Notes</a>
 <br>
 <a href="#moreinfo">Resources for More Information</a>
-<p>&#160;</p>
+<p>&nbsp;</p>
 <hr>
 <h2><a name="relnotes">Release Notes</a></h2>
 <p>This section provides complementary or late-breaking 
@@ -41,7 +41,7 @@
 configuration files (access.conf, srm.conf), add the following lines to http.conf:</p>
 </font>
 <blockquote><font face="Courier New">
-ResourceConfig /dev/null&#160;<br>
+ResourceConfig /dev/null&nbsp;<br>
 AccessConfig /dev/null</font></blockquote>
 <font face="Verdana">
 <p>If you have some settings stored in secondary configuration files, move them to http.conf.</p>
@@ -100,7 +100,7 @@
 answering inquiries, so you can write your question in your own words. To begin, go to
 <a href="http://support.microsoft.com/support/">http://support.microsoft.com/support/</a>.</p>
 <p align="right"><font size="1"><a href="#moreinfo">Top of Section</a></font></p>
-<p>&#160;</p>
+<p>&nbsp;</p>
 </font>
 </body>
 </html>
diff --git a/result/HTML/wired.html b/result/HTML/wired.html
index d4439bb..6a523fb 100644
--- a/result/HTML/wired.html
+++ b/result/HTML/wired.html
@@ -80,7 +80,7 @@
 <td bgcolor="#FF0000" align="left" valign="center"><nobr>
 <img src="http://static.wired.com/news/images/spacer.gif" width="344" height="1">
 <br>
-<font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FFFFFF">&#160;&#160;&#160;<b>updated 10:15 a.m.&#160;&#160;15.Oct.99.PDT</b>
+<font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FFFFFF">&nbsp;&nbsp;&nbsp;<b>updated 10:15 a.m.&nbsp;&nbsp;15.Oct.99.PDT</b>
 </font>
 </nobr></td>
 </tr>
@@ -132,14 +132,14 @@
 <input type="hidden" name="LIST" value="wn_ascii">
 <input type="hidden" name="SOURCE" value="other">
 <input type="hidden" name="ACTION" value="subscribe">
-<input type="TEXT" name="from" size="10" value="enter email">&#160;
+<input type="TEXT" name="from" size="10" value="enter email">&nbsp;
 </form></td>
 <td valign="top" bgcolor="#99FF99"><input type="SUBMIT" name="SUBMIT" value="GO"></td>
 </tr></table></tr>
 <tr><td bgcolor="#FF0000"><font face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b><font size="1">STOCKS</font></b></font></td></tr>
 <tr><td bgcolor="#99FF99"><font face="Verdana, Arial, Helvetica, sans-serif" size="1">Get Quote:</font></td></tr>
 <tr><td bgcolor="#99FF99" marginwidth="0" marginheight="0"><form method="get" action="http://r.wired.com/r/10020/http://stocks.wired.com/stocks_quotes.asp">
-<input type="TEXT" name="Symbol" size="12">&#160;<input type="SUBMIT" name="submit" value="GO">
+<input type="TEXT" name="Symbol" size="12">&nbsp;<input type="SUBMIT" name="submit" value="GO">
 </form></td></tr>
 <!-- BEGIN BUTTON ADS --><tr><td bgcolor="#CCFFCC">
 <font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000">Financial Services</font>
@@ -286,17 +286,17 @@
 <br>
 <!-- IBD_SUBJECT: Homeless, but ID'd, in Seattle --><font face="Arial, Helvetica, sans-serif" size="5"><b><a href="/news/politics/0,1283,31911,00.html">Homeless, but ID'd, in Seattle</a></b></font>
 <br>
-<font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FF0000">8:15 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The city council approves a plan to track the homeless by a numbering system, saying it'll improve services. The implications worry privacy advocates, naturally. By Craig Bicknell.</font>
+<font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FF0000">8:15 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The city council approves a plan to track the homeless by a numbering system, saying it'll improve services. The implications worry privacy advocates, naturally. By Craig Bicknell.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in&#160;Politics</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in&nbsp;Politics</a></i></font>
 <br>
 <table bgcolor="#F0F0F0" cellpadding="0" cellspacing="0" border="0" width="147" align="RIGHT">
 <!-- Commentary Frag Begin --><tr>
-<td bgcolor="#000000">&#160;</td>
+<td bgcolor="#000000">&nbsp;</td>
 <td bgcolor="#000000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>HITS &amp; MISC.</b></font></td>
 </tr>
 <tr>
-<td>&#160;</td>
+<td>&nbsp;</td>
 <td>
 <img src="http://static.wired.com/news/images/spacer.gif" height="5" width="5" alt="">
 <br>
@@ -317,11 +317,11 @@
 </td>
 </tr>
 <!-- Commentary Frag End --><tr>
-<td align="left" bgcolor="#000000">&#160;</td>
+<td align="left" bgcolor="#000000">&nbsp;</td>
 <td bgcolor="#000000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>CURRENT HOO-HA</b></font></td>
 </tr>
 <tr>
-<td>&#160;</td>
+<td>&nbsp;</td>
 <td>
 <img src="http://static.wired.com/news/images/spacer.gif" height="5" width="5" alt="">
 <br>
@@ -418,7 +418,7 @@
 <br>
 <br>
 <font face="Arial, Helvetica, sans-serif" size="2"><b><i><a href="/news/special_reports/1,1293,,00.html">More Hoo-Ha</a></i></b></font>
-<br>&#160;<br>
+<br>&nbsp;<br>
 </font>
 </font>
 </font>
@@ -430,19 +430,19 @@
 </td>
 </tr>
 <!-- start of Gen News --><tr>
-<td bgcolor="#000000">&#160;</td>
+<td bgcolor="#000000">&nbsp;</td>
 <td bgcolor="#000000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>MEANWHILE...</b></font></td>
 </tr>
 <tr>
-<td>&#160;</td>
+<td>&nbsp;</td>
 <td align="left" valign="top">
 <img src="http://static.wired.com/news/images/spacer.gif" height="5" width="5" alt="">
 <br>
-<!-- 31942 --><font size="2" face="Arial, Helvetica, sans-serif" color="#000000"><b>F&#252;hrer Furor</b></font>
+<!-- 31942 --><font size="2" face="Arial, Helvetica, sans-serif" color="#000000"><b>F&uuml;hrer Furor</b></font>
 <br>
 <font size="1" face="Arial, Geneva, sans-serif" color="#000000">
 <p>

-Contruction workers in Berlin opened an old wound in the German psyche this week when they accidentally stumbled across Adolf Hitler's bunker while excavating near the Brandenburg Gate. The bunker, just south of the Gate, was where Hitler and his closest associates barricaded themselves as the Red Army approached Berlin in the waning days of World War II. It is also where the F&#252;hrer and his bride, Eva Braun, committed suicide rather than fall into the hands of the Russians. Although the bunker's location has never been a mystery, it has been sealed off since the end of the war to keep neo-Nazis from turning it into a shrine.

+Contruction workers in Berlin opened an old wound in the German psyche this week when they accidentally stumbled across Adolf Hitler's bunker while excavating near the Brandenburg Gate. The bunker, just south of the Gate, was where Hitler and his closest associates barricaded themselves as the Red Army approached Berlin in the waning days of World War II. It is also where the F&uuml;hrer and his bride, Eva Braun, committed suicide rather than fall into the hands of the Russians. Although the bunker's location has never been a mystery, it has been sealed off since the end of the war to keep neo-Nazis from turning it into a shrine.

 <br>
 </p>
 <li>More from <a href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&amp;lpv=1">Lycos</a>
@@ -454,7 +454,7 @@
 </tr>
 <!-- end of Gen News -->
 </table>
-<font size="1">&#160;<br>
+<font size="1">&nbsp;<br>
 </font>
 <br>
 <font face="Verdana, Arial, Geneva, sans-serif" size="2"><b><i>Other Top Stories</i></b></font>
@@ -463,67 +463,67 @@
 <br>
 <!-- SQL query here --><!-- IBD_SUBJECT:Wall Street Keeps Reeling --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/reuters/0,1349,31934,00.html">Wall Street Keeps Reeling</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">10:15 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Dow and Nasdaq suffer sizeable losses during the first half of Friday trading. Why? Wholesale prices are the highest this decade, and Greenspan is concerned about stock prices.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">10:15 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Dow and Nasdaq suffer sizeable losses during the first half of Friday trading. Why? Wholesale prices are the highest this decade, and Greenspan is concerned about stock prices.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in&#160;Reuters</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in&nbsp;Reuters</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:The Market's Madness --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/reuters/0,1349,31935,00.html">The Market's Madness</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">9:10 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The bulls and the bears are in the midst of a Battle Royale, and all this turbulence is not a healthy thing. So say the experts.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">9:10 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The bulls and the bears are in the midst of a Battle Royale, and all this turbulence is not a healthy thing. So say the experts.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in&#160;Reuters</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in&nbsp;Reuters</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:'Want a Loan? What's Your Race?' --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/politics/0,1283,31533,00.html">'Want a Loan? What's Your Race?'</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Federal Reserve is in the middle of changing banking regulations to let banks collect data on the race, sex, religion, and national origin of their customers. By Declan McCullagh. </font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Federal Reserve is in the middle of changing banking regulations to let banks collect data on the race, sex, religion, and national origin of their customers. By Declan McCullagh. </font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in&#160;Politics</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in&nbsp;Politics</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:Music Regs: A Bagful of Noise --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31832,00.html">Music Regs: A Bagful of Noise</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The struggle to come up with a digital music standard that would minimize download piracy is pushing right up against the holiday gift-giving season. By Jennifer Sullivan.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The struggle to come up with a digital music standard that would minimize download piracy is pushing right up against the holiday gift-giving season. By Jennifer Sullivan.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&#160;Business</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&nbsp;Business</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:Can't Beat 'Em? Green 'Em --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31927,00.html">Can't Beat 'Em? Green 'Em</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">High-tech companies are notoriously environmentally unfriendly, and a growing number of &quot;Greenies&quot; are trying to change things from the inside ... with varying results. By Chris Gaither.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">High-tech companies are notoriously environmentally unfriendly, and a growing number of &quot;Greenies&quot; are trying to change things from the inside ... with varying results. By Chris Gaither.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&#160;Technology</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&nbsp;Technology</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:Y2K Cloud Over MS Office --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31932,00.html">Y2K Cloud Over MS Office</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">Windows NT sales remain strong, but corporate clients are wary of upgrading to MS Office 2000. Analysts say that means strong, but not stunning, Microsoft earnings. </font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">Windows NT sales remain strong, but corporate clients are wary of upgrading to MS Office 2000. Analysts say that means strong, but not stunning, Microsoft earnings. </font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&#160;Business</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&nbsp;Business</a></i></font>
 <br>
 <br>
 <font color="#FF0000" face="Verdana, Arial, Geneva, sans-serif" size="1">Med-Tech</font>
 <br>
 <!-- IBD_SUBJECT:Biochips for Custom Chemo --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31914,00.html">Biochips for Custom Chemo</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">Different cancer patients need different medicine, but doctors can rarely determine the best match. New biochip technology promises chemotherapy tailored to a tumor's genetic make-up. By Kristen Philipkoski.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">Different cancer patients need different medicine, but doctors can rarely determine the best match. New biochip technology promises chemotherapy tailored to a tumor's genetic make-up. By Kristen Philipkoski.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&#160;Technology</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&nbsp;Technology</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:High Stakes in Priceline Suit --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31916,00.html">High Stakes in Priceline Suit</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">It's not just another round of Redmond-bashing. A Priceline.com lawsuit against Microsoft's Expedia.com may have a big impact on how Net companies protect their business models. By Joanna Glasner.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">It's not just another round of Redmond-bashing. A Priceline.com lawsuit against Microsoft's Expedia.com may have a big impact on how Net companies protect their business models. By Joanna Glasner.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&#160;Business</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&nbsp;Business</a></i></font>
 <br>
 <br>
 <!-- IBD_SUBJECT:Biodiversity Merges Online --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31918,00.html">Biodiversity Merges Online</a></b></font>
 <br>
-<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&#160;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The far-flung databases on global biodiversity get together to form one monster database. Soon the red-eyed tree frog will be eyeing those Swedish lingonberries. From the Environment News Service.</font>
+<font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The far-flung databases on global biodiversity get together to form one monster database. Soon the red-eyed tree frog will be eyeing those Swedish lingonberries. From the Environment News Service.</font>
 <br>
-<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&#160;Technology</a></i></font>
+<font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&nbsp;Technology</a></i></font>
 <br>
 <br>
 <!-- SQL above --><!------TRADES---------><br>
@@ -597,18 +597,18 @@
 <br>
 <p><font face="Verdana, Arial, Geneva, sans-serif" size="1">
 <a href="http://www.wired.com/news/feedback.html">Send us feedback</a>
-&#160;|&#160;
+&nbsp;|&nbsp;
 <a href="http://www.hotwired.com/jobs/">Work at Wired Digital</a>
-&#160;|&#160;
+&nbsp;|&nbsp;
 <a href="http://home.wired.com/advertising/">Advertise with us</a>
 <br>
 <a href="http://home.wired.com/">About Wired Digital</a>
-&#160;|&#160;
+&nbsp;|&nbsp;
 <a href="http://www.wired.com/home/digital/privacy/">Our Privacy Policy</a>
 </font></p>
 <p>
 <font face="Verdana, Arial, Geneva" size="1">
-<a href="http://www.wired.com/home/copyright.html">Copyright</a> &#169; 1994-99 Wired Digital Inc. All rights reserved.</font>
+<a href="http://www.wired.com/home/copyright.html">Copyright</a> &copy; 1994-99 Wired Digital Inc. All rights reserved.</font>
 <br>
 <!-- TRACKING --><img src="http://www.wired.com/special/modx/news.gif" height="1" width="1" alt="">
 <map NAME="navstrip.map">
diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err
index b65f43d..357dc0a 100644
--- a/result/HTML/wired.html.err
+++ b/result/HTML/wired.html.err
@@ -181,75 +181,75 @@
 ./test/HTML/wired.html:97: error: htmlParseEntityRef: expecting ';'
 lue="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&M
                                                                                 ^
-./test/HTML/wired.html:165: error: Opening and ending tag mismatch: td and form
+./test/HTML/wired.html:159: error: Opening and ending tag mismatch: td and form
 </td> 
      ^
-./test/HTML/wired.html:170: error: Unexpected end tag : form
+./test/HTML/wired.html:164: error: Unexpected end tag : form
 	</tr>    </form>
                 ^
-./test/HTML/wired.html:244: error: Opening and ending tag mismatch: td and form
+./test/HTML/wired.html:238: error: Opening and ending tag mismatch: td and form
  </select></font></td></tr>
                       ^
-./test/HTML/wired.html:248: error: htmlParseEntityRef: expecting ';'
+./test/HTML/wired.html:242: error: htmlParseEntityRef: expecting ';'
 MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_searc
                                                                                 ^
-./test/HTML/wired.html:265: error: Unexpected end tag : form
+./test/HTML/wired.html:257: error: Unexpected end tag : form
         </tr>  </form>
                      ^
-./test/HTML/wired.html:346: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:338: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:374: error: htmlParseEntityRef: no name
+./test/HTML/wired.html:366: error: htmlParseEntityRef: no name
 a, sans-serif"><b><a href="/news/commentarySection/0,1292,31926,00.html">Rants 
                                                                                 ^
-./test/HTML/wired.html:374: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:366: error: Opening and ending tag mismatch: td and font
 Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br>  </t
                                                                                 ^
-./test/HTML/wired.html:374: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:366: error: Opening and ending tag mismatch: td and font
 Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.</font><br><br>  </t
                                                                                 ^
-./test/HTML/wired.html:402: error: Opening and ending tag mismatch: a and font
+./test/HTML/wired.html:394: error: Opening and ending tag mismatch: a and font
 w.vignette.com/" style="text-decoration:none"><font color="#000000">Vignette</a
                                                                                 ^
-./test/HTML/wired.html:407: error: htmlParseEntityRef: expecting ';'
+./test/HTML/wired.html:398: error: htmlParseEntityRef: expecting ';'
 ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
                                               ^
-./test/HTML/wired.html:407: error: htmlParseEntityRef: expecting ';'
+./test/HTML/wired.html:398: error: htmlParseEntityRef: expecting ';'
 ervlet/appservlet?from=/wired/sprint/&template=/security/security.html&SITE=
                                                                            ^
-./test/HTML/wired.html:408: error: htmlParseEntityRef: expecting ';'
+./test/HTML/wired.html:398: error: htmlParseEntityRef: expecting ';'
 wired.com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Spr
                 ^
-./test/HTML/wired.html:408: error: Opening and ending tag mismatch: a and font
+./test/HTML/wired.html:398: error: Opening and ending tag mismatch: a and font
 com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a
                                                                                 ^
-./test/HTML/wired.html:408: error: End tag : expected '>'
+./test/HTML/wired.html:398: error: End tag : expected '>'
 =Sprint" style="text-decoration:none"><font color="#000000">Sprint</a></i></fon
                                                                                 ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:414: error: Opening and ending tag mismatch: td and font
+./test/HTML/wired.html:404: error: Opening and ending tag mismatch: td and font
 </td>
     ^
-./test/HTML/wired.html:432: error: htmlParseEntityRef: expecting ';'
+./test/HTML/wired.html:422: error: htmlParseEntityRef: expecting ';'
 href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1">Lycos</
                                                                     ^
diff --git a/result/valid/REC-xml-19980210.xml b/result/valid/REC-xml-19980210.xml
index a27855d..2d4f035 100644
--- a/result/valid/REC-xml-19980210.xml
+++ b/result/valid/REC-xml-19980210.xml
@@ -2574,7 +2574,7 @@
 &#xA9; 1947 %pub;. &rights;" >]]></eg>
 then the replacement text for the entity &quot;<code>book</code>&quot; is:
 <eg>La Peste: Albert Camus, 
-© 1947 Éditions Gallimard. &amp;rights;</eg>
+© 1947 Éditions Gallimard. &amp;rights;</eg>
 The general-entity reference &quot;<code>&amp;rights;</code>&quot; would be expanded
 should the reference &quot;<code>&amp;book;</code>&quot; appear in the document's
 content or an attribute value.</p>
diff --git a/testHTML.c b/testHTML.c
index d5157f6..7cd3a5e 100644
--- a/testHTML.c
+++ b/testHTML.c
@@ -49,6 +49,7 @@
 static int repeat = 0;
 static int noout = 0;
 static int push = 0;
+static char *encoding = NULL;
 
 xmlSAXHandler emptySAXHandlerStruct = {
     NULL, /* internalSubset */
@@ -638,12 +639,18 @@
      */
     if (!noout) { 
 #ifdef LIBXML_DEBUG_ENABLED
-	if (!debug)
-	    htmlDocDump(stdout, doc);
-	else
+	if (!debug) {
+	    if (encoding)
+		htmlSaveFileEnc("-", doc, encoding);
+	    else
+		htmlDocDump(stdout, doc);
+	} else
 	    xmlDebugDumpDocument(stdout, doc);
 #else
-	htmlDocDump(stdout, doc);
+	if (encoding)
+	    htmlSaveFileEnc("-", doc, encoding);
+	else
+	    htmlDocDump(stdout, doc);
 #endif
     }	
 
@@ -674,8 +681,18 @@
 	else if ((!strcmp(argv[i], "-repeat")) ||
 	         (!strcmp(argv[i], "--repeat")))
 	    repeat++;
+	else if ((!strcmp(argv[i], "-encode")) ||
+	         (!strcmp(argv[i], "--encode"))) {
+	    i++;
+	    encoding = argv[i];
+        }
     }
     for (i = 1; i < argc ; i++) {
+	if ((!strcmp(argv[i], "-encode")) ||
+	         (!strcmp(argv[i], "--encode"))) {
+	    i++;
+	    continue;
+        }
 	if (argv[i][0] != '-') {
 	    if (repeat) {
 		for (count = 0;count < 100 * repeat;count++) {
@@ -705,6 +722,7 @@
 	printf("\t--repeat : parse the file 100 times, for timing\n");
 	printf("\t--noout : do not print the result\n");
 	printf("\t--push : use the push mode parser\n");
+	printf("\t--encode encoding : output in the given encoding\n");
     }
     xmlCleanupParser();
     xmlMemoryDump();
diff --git a/testSAX.c b/testSAX.c
index 8b43517..e5568fc 100644
--- a/testSAX.c
+++ b/testSAX.c
@@ -36,6 +36,7 @@
 #endif
 
 
+#include <libxml/xml-error.h>
 #include <libxml/parser.h>
 #include <libxml/parserInternals.h> /* only for xmlNewInputFromFile() */
 #include <libxml/tree.h>
diff --git a/tree.c b/tree.c
index b81b7a6..a686ff1 100644
--- a/tree.c
+++ b/tree.c
@@ -2984,7 +2984,7 @@
     switch (cur->type) {
         case XML_DOCUMENT_FRAG_NODE:
         case XML_ELEMENT_NODE: {
-	    xmlNodePtr last = NULL, new;
+	    xmlNodePtr last = NULL, newNode;
 
 	    if (cur->children != NULL) {
 		last = cur->last;
@@ -3006,11 +3006,11 @@
 		    last = cur->last;
 		}
 	    }
-	    new = xmlNewTextLen(content, len);
-	    if (new != NULL) {
-		xmlAddChild(cur, new);
-	        if ((last != NULL) && (last->next == new)) {
-		    xmlTextMerge(last, new);
+	    newNode = xmlNewTextLen(content, len);
+	    if (newNode != NULL) {
+		xmlAddChild(cur, newNode);
+	        if ((last != NULL) && (last->next == newNode)) {
+		    xmlTextMerge(last, newNode);
 		}
 	    }
 	    break;
@@ -3470,6 +3470,54 @@
 }
 
 /**
+ * xmlHasProp:
+ * @node:  the node
+ * @name:  the attribute name
+ *
+ * Search an attribute associated to a node
+ * This function also looks in DTD attribute declaration for #FIXED or
+ * default declaration values unless DTD use has been turned off.
+ *
+ * Returns the attribute or the attribute declaration or NULL if 
+ *         neither was found.
+ */
+xmlAttrPtr
+xmlHasProp(xmlNodePtr node, const xmlChar *name) {
+    xmlAttrPtr prop;
+    xmlDocPtr doc;
+
+    if ((node == NULL) || (name == NULL)) return(NULL);
+    /*
+     * Check on the properties attached to the node
+     */
+    prop = node->properties;
+    while (prop != NULL) {
+        if (!xmlStrcmp(prop->name, name))  {
+	    return(prop);
+        }
+	prop = prop->next;
+    }
+    if (!xmlCheckDTD) return(NULL);
+
+    /*
+     * Check if there is a default declaration in the internal
+     * or external subsets
+     */
+    doc =  node->doc;
+    if (doc != NULL) {
+        xmlAttributePtr attrDecl;
+        if (doc->intSubset != NULL) {
+	    attrDecl = xmlGetDtdAttrDesc(doc->intSubset, node->name, name);
+	    if ((attrDecl == NULL) && (doc->extSubset != NULL))
+		attrDecl = xmlGetDtdAttrDesc(doc->extSubset, node->name, name);
+	    if (attrDecl != NULL)
+		return((xmlAttrPtr) attrDecl);
+	}
+    }
+    return(NULL);
+}
+
+/**
  * xmlGetProp:
  * @node:  the node
  * @name:  the attribute name
@@ -3652,7 +3700,9 @@
  * xmlIsBlankNode:
  * @node:  the node
  * 
- * Is this node a Text node ?
+ * Checks whether this node is an empty or whitespace only
+ * (and possibly ignorable) text-node.
+ *
  * Returns 1 yes, 0 no
  */
 int
@@ -3863,7 +3913,7 @@
 
     size = buf->use + len + 100;
 
-    newbuf = xmlRealloc(buf->content, size);
+    newbuf = (xmlChar *) xmlRealloc(buf->content, size);
     if (newbuf == NULL) return(-1);
     buf->content = newbuf;
     buf->size = size;
@@ -5099,7 +5149,7 @@
 	if (cur->encoding != NULL)
 	    encoding = (const char *) cur->encoding;
 	else if (cur->charset != XML_CHAR_ENCODING_UTF8)
-	    encoding = xmlGetCharEncodingName(cur->charset);
+	    encoding = xmlGetCharEncodingName((xmlCharEncoding) cur->charset);
     }
     if (encoding != NULL) {
         xmlOutputBufferWriteString(buf, " encoding=");
@@ -5224,91 +5274,6 @@
     else xmlCompressMode = mode;
 }
 
-#if 0
-/**
- * xmlDocDump:
- * @f:  the FILE*
- * @cur:  the document
- *
- * Dump an XML document to an open FILE.
- */
-void
-xmlDocDump(FILE *f, xmlDocPtr cur) {
-    xmlBufferPtr buf;
-
-    if (cur == NULL) {
-#ifdef DEBUG_TREE
-        fprintf(stderr, "xmlDocDump : document == NULL\n");
-#endif
-	return;
-    }
-    buf = xmlBufferCreate();
-    if (buf == NULL) return;
-    xmlDocContentDump(buf, cur);
-    xmlBufferDump(f, buf);
-    xmlBufferFree(buf);
-}
-
-/**
- * xmlSaveFile:
- * @filename:  the filename
- * @cur:  the document
- *
- * Dump an XML document to a file. Will use compression if
- * compiled in and enabled. If @filename is "-" the stdout file is
- * used.
- * returns: the number of file written or -1 in case of failure.
- */
-int
-xmlSaveFile(const char *filename, xmlDocPtr cur) {
-    xmlBufferPtr buf;
-#ifdef HAVE_ZLIB_H
-    gzFile zoutput = NULL;
-    char mode[15];
-#endif
-    FILE *output = NULL;
-    int ret;
-
-    /* 
-     * save the content to a temp buffer.
-     */
-    buf = xmlBufferCreate();
-    if (buf == NULL) return(0);
-    xmlDocContentDump(buf, cur);
-
-#ifdef HAVE_ZLIB_H
-    if (cur->compression < 0) cur->compression = xmlCompressMode;
-    if ((cur->compression > 0) && (cur->compression <= 9)) {
-        sprintf(mode, "w%d", cur->compression);
-	if (!strcmp(filename, "-")) 
-	    zoutput = gzdopen(1, mode);
-	else
-	    zoutput = gzopen(filename, mode);
-    }
-    if (zoutput == NULL) {
-#endif
-        output = fopen(filename, "w");
-	if (output == NULL) {
-	    xmlBufferFree(buf);
-	    return(-1);
-	}
-#ifdef HAVE_ZLIB_H
-    }
-
-    if (zoutput != NULL) {
-        ret = gzwrite(zoutput, buf->content, sizeof(xmlChar) * buf->use);
-	gzclose(zoutput);
-    } else {
-#endif
-        ret = xmlBufferDump(output, buf);
-	fclose(output);
-#ifdef HAVE_ZLIB_H
-    }
-#endif
-    xmlBufferFree(buf);
-    return(ret * sizeof(xmlChar));
-}
-#else
 /**
  * xmlDocDump:
  * @f:  the FILE*
@@ -5316,11 +5281,13 @@
  *
  * Dump an XML document to an open FILE.
  *
- * returns: the number of file written or -1 in case of failure.
+ * returns: the number of byte written or -1 in case of failure.
  */
 int
 xmlDocDump(FILE *f, xmlDocPtr cur) {
     xmlOutputBufferPtr buf;
+    const char * encoding;
+    xmlCharEncodingHandlerPtr handler = NULL;
     int ret;
 
     if (cur == NULL) {
@@ -5329,7 +5296,26 @@
 #endif
 	return(-1);
     }
-    buf = xmlOutputBufferCreateFile(f, NULL);
+    encoding = (const char *) cur->encoding;
+
+    if (encoding != NULL) {
+	xmlCharEncoding enc;
+
+	enc = xmlParseCharEncoding(encoding);
+
+	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+	    fprintf(stderr, "xmlDocDump: document not in UTF8\n");
+	    return(-1);
+	}
+	if (enc != XML_CHAR_ENCODING_UTF8) {
+	    handler = xmlFindCharEncodingHandler(encoding);
+	    if (handler == NULL) {
+		xmlFree((char *) cur->encoding);
+		cur->encoding = NULL;
+	    }
+	}
+    }
+    buf = xmlOutputBufferCreateFile(f, handler);
     if (buf == NULL) return(-1);
     xmlDocContentDumpOutput(buf, cur, NULL);
 
@@ -5338,36 +5324,6 @@
 }
 
 /**
- * xmlSaveFile:
- * @filename:  the filename (or URL)
- * @cur:  the document
- *
- * Dump an XML document to a file. Will use compression if
- * compiled in and enabled. If @filename is "-" the stdout file is
- * used.
- * returns: the number of file written or -1 in case of failure.
- */
-int
-xmlSaveFile(const char *filename, xmlDocPtr cur) {
-    xmlOutputBufferPtr buf;
-    int ret;
-
-    /* 
-     * save the content to a temp buffer.
-     */
-#ifdef HAVE_ZLIB_H
-    if (cur->compression < 0) cur->compression = xmlCompressMode;
-#endif
-    buf = xmlOutputBufferCreateFilename(filename, NULL, cur->compression);
-    if (buf == NULL) return(0);
-
-    xmlDocContentDumpOutput(buf, cur, NULL);
-
-    ret = xmlOutputBufferClose(buf);
-    return(ret);
-}
-
-/**
  * xmlSaveFileTo:
  * @buf:  an output I/O buffer
  * @cur:  the document
@@ -5375,7 +5331,7 @@
  *
  * Dump an XML document to an I/O buffer.
  *
- * returns: the number of file written or -1 in case of failure.
+ * returns: the number of byte written or -1 in case of failure.
  */
 int
 xmlSaveFileTo(xmlOutputBuffer *buf, xmlDocPtr cur, const char *encoding) {
@@ -5395,7 +5351,7 @@
  *
  * Dump an XML document, converting it to the given encoding
  *
- * returns: the number of file written or -1 in case of failure.
+ * returns: the number of byte written or -1 in case of failure.
  */
 int
 xmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
@@ -5407,17 +5363,15 @@
 	xmlCharEncoding enc;
 
 	enc = xmlParseCharEncoding(encoding);
-	if (enc != cur->charset) {
-	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
-		/*
-		 * Not supported yet
-		 */
+	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+	    fprintf(stderr, "xmlSaveFileEnc: document not in UTF8\n");
+	    return(-1);
+	}
+	if (enc != XML_CHAR_ENCODING_UTF8) {
+	    handler = xmlFindCharEncodingHandler(encoding);
+	    if (handler == NULL) {
 		return(-1);
 	    }
-
-	    handler = xmlFindCharEncodingHandler(encoding);
-	    if (handler == NULL)
-		return(-1);
 	}
     }
 
@@ -5432,4 +5386,58 @@
     ret = xmlOutputBufferClose(buf);
     return(ret);
 }
+
+/**
+ * xmlSaveFile:
+ * @filename:  the filename (or URL)
+ * @cur:  the document
+ *
+ * Dump an XML document to a file. Will use compression if
+ * compiled in and enabled. If @filename is "-" the stdout file is
+ * used.
+ * returns: the number of byte written or -1 in case of failure.
+ */
+int
+xmlSaveFile(const char *filename, xmlDocPtr cur) {
+    xmlOutputBufferPtr buf;
+    const char *encoding;
+    xmlCharEncodingHandlerPtr handler = NULL;
+    int ret;
+
+    if (cur == NULL)
+	return(-1);
+    encoding = (const char *) cur->encoding;
+
+    /* 
+     * save the content to a temp buffer.
+     */
+#ifdef HAVE_ZLIB_H
+    if (cur->compression < 0) cur->compression = xmlCompressMode;
 #endif
+    if (encoding != NULL) {
+	xmlCharEncoding enc;
+
+	enc = xmlParseCharEncoding(encoding);
+
+	if (cur->charset != XML_CHAR_ENCODING_UTF8) {
+	    fprintf(stderr, "xmlSaveFile: document not in UTF8\n");
+	    return(-1);
+	}
+	if (enc != XML_CHAR_ENCODING_UTF8) {
+	    handler = xmlFindCharEncodingHandler(encoding);
+	    if (handler == NULL) {
+		xmlFree((char *) cur->encoding);
+		cur->encoding = NULL;
+	    }
+	}
+    }
+
+    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
+    if (buf == NULL) return(0);
+
+    xmlDocContentDumpOutput(buf, cur, NULL);
+
+    ret = xmlOutputBufferClose(buf);
+    return(ret);
+}
+
diff --git a/tree.h b/tree.h
index 6c68dc3..00f4ee6 100644
--- a/tree.h
+++ b/tree.h
@@ -549,6 +549,8 @@
 					 const xmlChar *value);
 xmlChar *	xmlGetProp		(xmlNodePtr node,
 					 const xmlChar *name);
+xmlAttrPtr	xmlHasProp		(xmlNodePtr node,
+					 const xmlChar *name);
 xmlChar *	xmlGetNsProp		(xmlNodePtr node,
 					 const xmlChar *name,
 					 const xmlChar *nameSpace);
diff --git a/uri.c b/uri.c
index ac7aa0b..1ed1410 100644
--- a/uri.c
+++ b/uri.c
@@ -207,7 +207,7 @@
 
 
     max = 80;
-    ret = xmlMalloc((max + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMalloc((max + 1) * sizeof(xmlChar));
     if (ret == NULL) {
 	fprintf(stderr, "xmlSaveUri: out of memory\n");
 	return(NULL);
@@ -219,7 +219,7 @@
 	while (*p != 0) {
 	    if (len >= max) {
 		max *= 2;
-		ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		if (ret == NULL) {
 		    fprintf(stderr, "xmlSaveUri: out of memory\n");
 		    return(NULL);
@@ -229,7 +229,7 @@
 	}
 	if (len >= max) {
 	    max *= 2;
-	    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+	    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 	    if (ret == NULL) {
 		fprintf(stderr, "xmlSaveUri: out of memory\n");
 		return(NULL);
@@ -242,7 +242,7 @@
 	while (*p != 0) {
 	    if (len + 3 >= max) {
 		max *= 2;
-		ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		if (ret == NULL) {
 		    fprintf(stderr, "xmlSaveUri: out of memory\n");
 		    return(NULL);
@@ -278,7 +278,7 @@
 	}
 	if (len >= max) {
 	    max *= 2;
-	    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+	    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 	    if (ret == NULL) {
 		fprintf(stderr, "xmlSaveUri: out of memory\n");
 		return(NULL);
@@ -289,7 +289,7 @@
 	if (uri->server != NULL) {
 	    if (len + 3 >= max) {
 		max *= 2;
-		ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		if (ret == NULL) {
 		    fprintf(stderr, "xmlSaveUri: out of memory\n");
 		    return(NULL);
@@ -302,7 +302,7 @@
 		while (*p != 0) {
 		    if (len + 3 >= max) {
 			max *= 2;
-			ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+			ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 			if (ret == NULL) {
 			    fprintf(stderr, "xmlSaveUri: out of memory\n");
 			    return(NULL);
@@ -338,7 +338,7 @@
 		}
 		if (len + 3 >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -350,7 +350,7 @@
 	    while (*p != 0) {
 		if (len >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -361,7 +361,7 @@
 	    if (uri->port > 0) {
 		if (len + 10 >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -372,7 +372,7 @@
 	} else if (uri->authority != NULL) {
 	    if (len + 3 >= max) {
 		max *= 2;
-		ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		if (ret == NULL) {
 		    fprintf(stderr, "xmlSaveUri: out of memory\n");
 		    return(NULL);
@@ -384,7 +384,7 @@
 	    while (*p != 0) {
 		if (len + 3 >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -424,7 +424,7 @@
 	    while (*p != 0) {
 		if (len + 3 >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -462,7 +462,7 @@
 	if (uri->query != NULL) {
 	    if (len + 3 >= max) {
 		max *= 2;
-		ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		if (ret == NULL) {
 		    fprintf(stderr, "xmlSaveUri: out of memory\n");
 		    return(NULL);
@@ -473,7 +473,7 @@
 	    while (*p != 0) {
 		if (len + 3 >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -508,7 +508,7 @@
 	if (uri->fragment != NULL) {
 	    if (len + 3 >= max) {
 		max *= 2;
-		ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		if (ret == NULL) {
 		    fprintf(stderr, "xmlSaveUri: out of memory\n");
 		    return(NULL);
@@ -519,7 +519,7 @@
 	    while (*p != 0) {
 		if (len + 3 >= max) {
 		    max *= 2;
-		    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+		    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 		    if (ret == NULL) {
 			fprintf(stderr, "xmlSaveUri: out of memory\n");
 			return(NULL);
@@ -553,7 +553,7 @@
 	}
 	if (len >= max) {
 	    max *= 2;
-	    ret = xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
+	    ret = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 	    if (ret == NULL) {
 		fprintf(stderr, "xmlSaveUri: out of memory\n");
 		return(NULL);
diff --git a/valid.c b/valid.c
index 5235991..3fb7911 100644
--- a/valid.c
+++ b/valid.c
@@ -33,7 +33,7 @@
 scope int name##VPush(xmlValidCtxtPtr ctxt, type value) {		\
     if (ctxt->name##Nr >= ctxt->name##Max) {				\
 	ctxt->name##Max *= 2;						\
-        ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab,		\
+        ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,		\
 	             ctxt->name##Max * sizeof(ctxt->name##Tab[0]));	\
         if (ctxt->name##Tab == NULL) {					\
 	    fprintf(stderr, "realloc failed !\n");			\
@@ -538,9 +538,11 @@
     /*
      * Create the Element table if needed.
      */
-    table = dtd->elements;
-    if (table == NULL) 
-        table = dtd->elements = xmlCreateElementTable();
+    table = (xmlElementTablePtr) dtd->elements;
+    if (table == NULL) {
+        table = xmlCreateElementTable();
+	dtd->elements = (void *) table;
+    }
     if (table == NULL) {
 	fprintf(stderr, "xmlAddElementDecl: Table creation failed!\n");
         return(NULL);
@@ -909,7 +911,7 @@
         fprintf(stderr, "xmlScanAttributeDecl: elem == NULL\n");
 	return(NULL);
     }
-    table = dtd->attributes;
+    table = (xmlAttributeTablePtr) dtd->attributes;
     if (table == NULL) 
         return(NULL);
 
@@ -1029,9 +1031,11 @@
     /*
      * Create the Attribute table if needed.
      */
-    table = dtd->attributes;
-    if (table == NULL) 
-        table = dtd->attributes = xmlCreateAttributeTable();
+    table = (xmlAttributeTablePtr) dtd->attributes;
+    if (table == NULL) {
+        table = xmlCreateAttributeTable();
+	dtd->attributes = (void *) table;
+    }
     if (table == NULL) {
 	fprintf(stderr, "xmlAddAttributeDecl: Table creation failed!\n");
         return(NULL);
@@ -1388,9 +1392,9 @@
     /*
      * Create the Notation table if needed.
      */
-    table = dtd->notations;
+    table = (xmlNotationTablePtr) dtd->notations;
     if (table == NULL) 
-        table = dtd->notations = xmlCreateNotationTable();
+        dtd->notations = table = xmlCreateNotationTable();
     if (table == NULL) {
 	fprintf(stderr, "xmlAddNotationDecl: Table creation failed!\n");
         return(NULL);
@@ -1657,9 +1661,9 @@
     /*
      * Create the ID table if needed.
      */
-    table = doc->ids;
+    table = (xmlIDTablePtr) doc->ids;
     if (table == NULL) 
-        table = doc->ids = xmlCreateIDTable();
+        doc->ids = table = xmlCreateIDTable();
     if (table == NULL) {
 	fprintf(stderr, "xmlAddID: Table creation failed!\n");
         return(NULL);
@@ -1804,7 +1808,7 @@
 
     if (doc == NULL) return(-1);
     if (attr == NULL) return(-1);
-    table = doc->ids;
+    table = (xmlIDTablePtr) doc->ids;
     if (table == NULL) 
         return(-1);
 
@@ -1848,7 +1852,7 @@
 	return(NULL);
     }
 
-    table = doc->ids;
+    table = (xmlIDTablePtr) doc->ids;
     if (table == NULL) 
         return(NULL);
 
@@ -1935,9 +1939,9 @@
     /*
      * Create the Ref table if needed.
      */
-    table = doc->refs;
+    table = (xmlRefTablePtr) doc->refs;
     if (table == NULL) 
-        table = doc->refs = xmlCreateRefTable();
+        doc->refs = table = xmlCreateRefTable();
     if (table == NULL) {
 	fprintf(stderr, "xmlAddRef: Table creation failed!\n");
         return(NULL);
@@ -2065,7 +2069,7 @@
 
     if (doc == NULL) return(-1);
     if (attr == NULL) return(-1);
-    table = doc->refs;
+    table = (xmlRefTablePtr) doc->refs;
     if (table == NULL) 
         return(-1);
 
@@ -2109,7 +2113,7 @@
 	return(NULL);
     }
 
-    table = doc->refs;
+    table = (xmlRefTablePtr) doc->refs;
     if (table == NULL) 
         return(NULL);
 
@@ -2150,7 +2154,7 @@
 
     if (dtd == NULL) return(NULL);
     if (dtd->elements == NULL) return(NULL);
-    table = dtd->elements;
+    table = (xmlElementTablePtr) dtd->elements;
 
     for (i = 0;i < table->nb_elements;i++) {
         cur = table->table[i];
@@ -2200,7 +2204,7 @@
 
     if (dtd == NULL) return(NULL);
     if (dtd->elements == NULL) return(NULL);
-    table = dtd->elements;
+    table = (xmlElementTablePtr) dtd->elements;
 
     for (i = 0;i < table->nb_elements;i++) {
         cur = table->table[i];
@@ -2234,7 +2238,7 @@
 
     if (dtd == NULL) return(NULL);
     if (dtd->attributes == NULL) return(NULL);
-    table = dtd->attributes;
+    table = (xmlAttributeTablePtr) dtd->attributes;
 
     for (i = 0;i < table->nb_attributes;i++) {
         cur = table->table[i];
@@ -2288,7 +2292,7 @@
 
     if (dtd == NULL) return(NULL);
     if (dtd->attributes == NULL) return(NULL);
-    table = dtd->attributes;
+    table = (xmlAttributeTablePtr) dtd->attributes;
 
     for (i = 0;i < table->nb_attributes;i++) {
         cur = table->table[i];
@@ -2320,7 +2324,7 @@
 
     if (dtd == NULL) return(NULL);
     if (dtd->notations == NULL) return(NULL);
-    table = dtd->notations;
+    table = (xmlNotationTablePtr) dtd->notations;
 
     for (i = 0;i < table->nb_notations;i++) {
         cur = table->table[i];
@@ -2890,7 +2894,7 @@
 	     * element in the external subset.
 	     */
 	    nbId = 0;
-	    table = doc->intSubset->attributes;
+	    table = (xmlAttributeTablePtr) doc->intSubset->attributes;
 	    if (table != NULL) {
 		for (i = 0;i < table->nb_attributes;i++) {
 		    if ((table->table[i]->atype == XML_ATTRIBUTE_ID) &&
@@ -3902,7 +3906,7 @@
     /*
      * Check all the IDREF/IDREFS attributes definition for validity
      */
-    table = doc->refs;
+    table = (xmlRefTablePtr) doc->refs;
     if (table != NULL) {
         for (i = 0; i < table->nb_refs; i++) {
 	    if (table->table[i]->attr->atype == XML_ATTRIBUTE_IDREF) {
@@ -4008,7 +4012,7 @@
 	return(0);
     dtd = doc->intSubset;
     if ((dtd != NULL) && (dtd->attributes != NULL)) {
-	table = dtd->attributes;
+	table = (xmlAttributeTablePtr) dtd->attributes;
 
 	for (i = 0;i < table->nb_attributes;i++) {
 	    cur = table->table[i];
@@ -4041,7 +4045,7 @@
     }
     dtd = doc->extSubset;
     if ((dtd != NULL) && (dtd->attributes != NULL)) {
-	table = dtd->attributes;
+	table = (xmlAttributeTablePtr) dtd->attributes;
 
 	for (i = 0;i < table->nb_attributes;i++) {
 	    cur = table->table[i];
diff --git a/xlink.h b/xlink.h
index 68a35fe..37a5415 100644
--- a/xlink.h
+++ b/xlink.h
@@ -16,7 +16,7 @@
 #include <libxml/tree.h>
 
 #ifdef __cplusplus
-#define extern "C" {
+extern "C" {
 #endif
 /**
  * Various defines for the various Link properties.
diff --git a/xmlIO.c b/xmlIO.c
index fdf961c..531e3aa 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -1207,7 +1207,7 @@
     if (len > buffree) 
         len = buffree;
 
-    buffer = xmlMalloc((len + 1) * sizeof(char));
+    buffer = (char *) xmlMalloc((len + 1) * sizeof(char));
     if (buffer == NULL) {
         fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
 	return(-1);
diff --git a/xmllint.c b/xmllint.c
index dcd7161..3540e7e 100644
--- a/xmllint.c
+++ b/xmllint.c
@@ -666,6 +666,7 @@
 	printf("\t--nowarning : do not emit warnings from parser/validator\n");
 	printf("\t--noblanks : drop (ignorable?) blanks spaces\n");
 	printf("\t--testIO : test user I/O support\n");
+	printf("\t--encode encoding : output in the given encoding\n");
     }
     xmlCleanupParser();
     xmlMemoryDump();
diff --git a/xmlmemory.c b/xmlmemory.c
index e03e51d..b82a6e0 100644
--- a/xmlmemory.c
+++ b/xmlmemory.c
@@ -325,7 +325,7 @@
 #ifdef MEM_LIST
     debugmem_list_add(p);
 #endif
-    s = HDR_2_CLIENT(p);
+    s = (char *) HDR_2_CLIENT(p);
     
     if (xmlMemStopAtBlock == block) xmlMallocBreakpoint();
 
@@ -382,7 +382,7 @@
 xmlMemContentShow(FILE *fp, MEMHDR *p)
 {
     int i,j,len = p->mh_size;
-    const char *buf = HDR_2_CLIENT(p);
+    const char *buf = (const char *) HDR_2_CLIENT(p);
 
     for (i = 0;i < len;i++) {
         if (buf[i] == 0) break;
diff --git a/xpath.c b/xpath.c
index 4523751..413c16c 100644
--- a/xpath.c
+++ b/xpath.c
@@ -183,7 +183,7 @@
 extern int name##Push(xmlXPathParserContextPtr ctxt, type value) {	\
     if (ctxt->name##Nr >= ctxt->name##Max) {				\
 	ctxt->name##Max *= 2;						\
-        ctxt->name##Tab = (void *) xmlRealloc(ctxt->name##Tab,		\
+        ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab,		\
 	             ctxt->name##Max * sizeof(ctxt->name##Tab[0]));	\
         if (ctxt->name##Tab == NULL) {					\
 	    fprintf(xmlXPathDebug, "realloc failed !\n");		\
@@ -849,10 +849,8 @@
     if (ctxt->namespaces != NULL)
         xmlFree(ctxt->namespaces);
 
- /***********   
     if (ctxt->nodelist != NULL) 
         xmlXPathFreeNodeSet(ctxt->nodelist);
-  ***********/  
 #ifdef DEBUG
     memset(ctxt, 0xB , (size_t) sizeof(xmlXPathContext));
 #endif
@@ -2548,7 +2546,7 @@
  */
 void
 xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs) {
-    xmlXPathObjectPtr cur, new;
+    xmlXPathObjectPtr cur, newobj;
     xmlChar *tmp;
 
     if (nargs < 2) {
@@ -2563,17 +2561,17 @@
     nargs--;
 
     while (nargs > 0) {
-	new = valuePop(ctxt);
-	if ((new == NULL) || (new->type != XPATH_STRING)) {
-	    xmlXPathFreeObject(new);
+	newobj = valuePop(ctxt);
+	if ((newobj == NULL) || (newobj->type != XPATH_STRING)) {
+	    xmlXPathFreeObject(newobj);
 	    xmlXPathFreeObject(cur);
 	    XP_ERROR(XPATH_INVALID_TYPE);
 	}
-	tmp = xmlStrcat(new->stringval, cur->stringval);
-	new->stringval = cur->stringval;
+	tmp = xmlStrcat(newobj->stringval, cur->stringval);
+	newobj->stringval = cur->stringval;
 	cur->stringval = tmp;
 
-	xmlXPathFreeObject(new);
+	xmlXPathFreeObject(newobj);
 	nargs--;
     }
     valuePush(ctxt, cur);