566012 autodetected encoding and encoding conflict

* encoding.c parser.c parserInternals.c: when we autodetect an encoding
  but it's actually not completely compatible with the one declared
  great care must be taken to not convert more than just the first line.
  Led to some refactoring, more private functions and a bit of cleanup.
diff --git a/parserInternals.c b/parserInternals.c
index a42e5ed..ff20435 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -945,6 +945,17 @@
  *									*
  ************************************************************************/
 
+/* defined in encoding.c, not public */
+int
+xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
+                       xmlBufferPtr in, int len);
+
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+                       xmlCharEncodingHandlerPtr handler, int len);
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+                          xmlCharEncodingHandlerPtr handler, int len);
 /**
  * xmlSwitchEncoding:
  * @ctxt:  the parser context
@@ -959,6 +970,7 @@
 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 {
     xmlCharEncodingHandlerPtr handler;
+    int len = -1;
 
     if (ctxt == NULL) return(-1);
     switch (enc) {
@@ -1002,9 +1014,33 @@
             (ctxt->input->cur[2] == 0xBF)) {
             ctxt->input->cur += 3;
         }
-	break ;
-	default:
-	    break;
+        len = 90;
+	break;
+    case XML_CHAR_ENCODING_UCS2:
+        len = 90;
+	break;
+    case XML_CHAR_ENCODING_UCS4BE:
+    case XML_CHAR_ENCODING_UCS4LE:
+    case XML_CHAR_ENCODING_UCS4_2143:
+    case XML_CHAR_ENCODING_UCS4_3412:
+        len = 180;
+	break;
+    case XML_CHAR_ENCODING_EBCDIC:
+    case XML_CHAR_ENCODING_8859_1:
+    case XML_CHAR_ENCODING_8859_2:
+    case XML_CHAR_ENCODING_8859_3:
+    case XML_CHAR_ENCODING_8859_4:
+    case XML_CHAR_ENCODING_8859_5:
+    case XML_CHAR_ENCODING_8859_6:
+    case XML_CHAR_ENCODING_8859_7:
+    case XML_CHAR_ENCODING_8859_8:
+    case XML_CHAR_ENCODING_8859_9:
+    case XML_CHAR_ENCODING_ASCII:
+    case XML_CHAR_ENCODING_2022_JP:
+    case XML_CHAR_ENCODING_SHIFT_JIS:
+    case XML_CHAR_ENCODING_EUC_JP:
+        len = 45;
+	break;
     }
     handler = xmlGetCharEncodingHandler(enc);
     if (handler == NULL) {
@@ -1095,7 +1131,7 @@
     if (handler == NULL)
 	return(-1);
     ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    return(xmlSwitchToEncoding(ctxt, handler));
+    return(xmlSwitchToEncodingInt(ctxt, handler, len));
 }
 
 /**
@@ -1103,15 +1139,16 @@
  * @ctxt:  the parser context
  * @input:  the input stream
  * @handler:  the encoding handler
+ * @len:  the number of bytes to convert for the first line or -1
  *
  * change the input functions when discovering the character encoding
  * of a given entity.
  *
  * Returns 0 in case of success, -1 otherwise
  */
-int
-xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
-                       xmlCharEncodingHandlerPtr handler)
+static int
+xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+                          xmlCharEncodingHandlerPtr handler, int len)
 {
     int nbchars;
 
@@ -1208,9 +1245,10 @@
                  * parsed with the autodetected encoding
                  * into the parser reading buffer.
                  */
-                nbchars = xmlCharEncFirstLine(input->buf->encoder,
-                                              input->buf->buffer,
-                                              input->buf->raw);
+                nbchars = xmlCharEncFirstLineInt(input->buf->encoder,
+                                                 input->buf->buffer,
+                                                 input->buf->raw,
+                                                 len);
             }
             if (nbchars < 0) {
                 xmlErrInternal(ctxt,
@@ -1236,6 +1274,58 @@
 }
 
 /**
+ * xmlSwitchInputEncoding:
+ * @ctxt:  the parser context
+ * @input:  the input stream
+ * @handler:  the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
+                          xmlCharEncodingHandlerPtr handler) {
+    return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
+}
+
+/**
+ * xmlSwitchToEncodingInt:
+ * @ctxt:  the parser context
+ * @handler:  the encoding handler
+ * @len: the lenght to convert or -1
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity, and convert only @len bytes of the output, this
+ * is needed on auto detect to allows any declared encoding later to
+ * convert the actual content after the xmlDecl
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+static int
+xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
+                       xmlCharEncodingHandlerPtr handler, int len) {
+    int ret = 0;
+
+    if (handler != NULL) {
+        if (ctxt->input != NULL) {
+	    ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
+	} else {
+	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
+	                   NULL);
+	    return(-1);
+	}
+	/*
+	 * The parsing is now done in UTF8 natively
+	 */
+	ctxt->charset = XML_CHAR_ENCODING_UTF8;
+    } else
+	return(-1);
+    return(ret);
+}
+
+/**
  * xmlSwitchToEncoding:
  * @ctxt:  the parser context
  * @handler:  the encoding handler
@@ -1248,23 +1338,7 @@
 int
 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
 {
-    int ret = 0;
-
-    if (handler != NULL) {
-        if (ctxt->input != NULL) {
-	    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
-	} else {
-	    xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
-	                   NULL);
-	    return(-1);
-	}
-	/*
-	 * The parsing is now done in UTF8 natively
-	 */
-	ctxt->charset = XML_CHAR_ENCODING_UTF8;
-    } else 
-	return(-1);
-    return(ret);
+    return (xmlSwitchToEncodingInt(ctxt, handler, -1));
 }
 
 /************************************************************************