more cleanup of the HTML parser to force it to not bypass SAX, Daniel.
Ready for 2.1.1 it seems
diff --git a/HTMLparser.c b/HTMLparser.c
index 472d2cf..375a038 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -618,7 +618,7 @@
  */
 void
 htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *new) {
-    if (!strcmp(new, "html"))
+    if (!xmlStrcmp(new, BAD_CAST"html"))
 	return;
     if (ctxt->nameNr <= 0) {
 #ifdef DEBUG
@@ -628,12 +628,15 @@
 	if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
 	    ctxt->sax->startElement(ctxt->userData, BAD_CAST"html", NULL);
     }
-    if ((!strcmp(new, "body")) || (!strcmp(new, "head")))
+    if ((!xmlStrcmp(new, BAD_CAST"body")) || (!xmlStrcmp(new, BAD_CAST"head")))
         return;
     if (ctxt->nameNr <= 1) {
-	if ((!strcmp(new, "script")) || (!strcmp(new, "style")) ||
-	    (!strcmp(new, "meta")) || (!strcmp(new, "link")) ||
-	    (!strcmp(new, "title")) || (!strcmp(new, "base"))) {
+	if ((!xmlStrcmp(new, BAD_CAST"script")) ||
+	    (!xmlStrcmp(new, BAD_CAST"style")) ||
+	    (!xmlStrcmp(new, BAD_CAST"meta")) ||
+	    (!xmlStrcmp(new, BAD_CAST"link")) ||
+	    (!xmlStrcmp(new, BAD_CAST"title")) ||
+	    (!xmlStrcmp(new, BAD_CAST"base"))) {
 	    /* 
 	     * dropped OBJECT ... i you put it first BODY will be
 	     * assumed !
@@ -2152,17 +2155,15 @@
 	    ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
 	ctxt->wellFormed = 0;
         /* We shouldn't try to resynchronize ... */
-    } else {
     }
     NEXT;
 
     /*
-     * Create the document accordingly to the DOCTYPE
+     * Create or update the document accordingly to the DOCTYPE
      */
-    if (ctxt->myDoc != NULL)
-        xmlFreeDoc(ctxt->myDoc);
-    
-    ctxt->myDoc = htmlNewDoc(URI, ExternalID);
+    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
+	(!ctxt->disableSAX))
+	ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
 
     /*
      * Cleanup, since we don't use all those identifiers
@@ -2846,13 +2847,6 @@
     SKIP_BLANKS;
 
     /*
-     * Create the document if not done already.
-     */
-    if (ctxt->myDoc == NULL) {
-        ctxt->myDoc = htmlNewDoc(NULL, NULL);
-    }
-
-    /*
      * Time to start parsing the tree itself
      */
     htmlParseContent(ctxt);
@@ -3171,6 +3165,10 @@
 		if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
 		    ctxt->sax->setDocumentLocator(ctxt->userData,
 						  &xmlDefaultSAXLocator);
+		if ((ctxt->sax) && (ctxt->sax->startDocument) &&
+	            (!ctxt->disableSAX))
+		    ctxt->sax->startDocument(ctxt->userData);
+
 		cur = in->cur[0];
 		next = in->cur[1];
 		if ((cur == '<') && (next == '!') &&
@@ -3190,7 +3188,6 @@
 		    fprintf(stderr, "HPP: entering PROLOG\n");
 #endif
                 } else {
-		    ctxt->myDoc = htmlNewDoc(NULL, NULL);
 		    ctxt->instate = XML_PARSER_MISC;
 		}
 #ifdef DEBUG_PUSH