fixed a nasty bug #119387, bad heuristic from the progressive HTML parser

* HTMLparser.c: fixed a nasty bug #119387, bad heuristic from
  the progressive HTML parser front-end on large character data
  island leading to an erroneous end of data detection by the
  parser. Some cleanup too to get closer from the XML progressive
  parser.
Daniel
diff --git a/HTMLparser.c b/HTMLparser.c
index 0aa0041..2168bbd 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4950,19 +4950,15 @@
 			/* TODO: check generation of subtrees if noent !!! */
 			htmlParseReference(ctxt);
 		    } else {
-			/* TODO Avoid the extra copy, handle directly !!!!!! */
-			/*
-			 * Goal of the following test is:
-			 *  - minimize calls to the SAX 'character' callback
-			 *    when they are mergeable
+		        /*
+			 * check that the text sequence is complete
+			 * before handing out the data to the parser
+			 * to avoid problems with erroneous end of
+			 * data detection.
 			 */
-			if ((ctxt->inputNr == 1) &&
-			    (avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
-			    if ((!terminate) &&
-				(htmlParseLookupSequence(
-					ctxt, '<', 0, 0, 0) < 0))
-				goto done;
-			}
+			if ((!terminate) &&
+			    (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
+			    goto done;
 			ctxt->checkIndex = 0;
 #ifdef DEBUG_PUSH
 			xmlGenericError(xmlGenericErrorContext,
@@ -5160,12 +5156,27 @@
 	xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
 #endif
 
+#if 0
 	if ((terminate) || (ctxt->input->buf->buffer->use > 80))
 	    htmlParseTryOrFinish(ctxt, terminate);
+#endif
     } else if (ctxt->instate != XML_PARSER_EOF) {
-	xmlParserInputBufferPush(ctxt->input->buf, 0, "");
-        htmlParseTryOrFinish(ctxt, terminate);
+	if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
+	    xmlParserInputBufferPtr in = ctxt->input->buf;
+	    if ((in->encoder != NULL) && (in->buffer != NULL) &&
+		    (in->raw != NULL)) {
+		int nbchars;
+		    
+		nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+		if (nbchars < 0) {
+		    xmlGenericError(xmlGenericErrorContext,
+				    "htmlParseChunk: encoder error\n");
+		    return(XML_ERR_INVALID_ENCODING);
+		}
+	    }
+	}
     }
+    htmlParseTryOrFinish(ctxt, terminate);
     if (terminate) {
 	if ((ctxt->instate != XML_PARSER_EOF) &&
 	    (ctxt->instate != XML_PARSER_EPILOG) &&