This patch changes the behaviour of the UTF-16 codec family. Only the
UTF-16 codec will now interpret and remove a *leading* BOM mark. Sub-
sequent BOM characters are no longer interpreted and removed.
UTF-16-LE and -BE pass through all BOM mark characters.

These changes should get the UTF-16 codec more in line with what
the Unicode FAQ recommends w/r to BOM marks.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 475215c..d55e2a7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1001,31 +1001,39 @@
     if (byteorder)
 	bo = *byteorder;
 
+    /* Check for BOM marks (U+FEFF) in the input and adjust current
+       byte order setting accordingly. In native mode, the leading BOM
+       mark is skipped, in all other modes, it is copied to the output
+       stream as-is (giving a ZWNBSP character). */
+    if (bo == 0) {
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+	if (*q == 0xFEFF) {
+	    q++;
+	    bo = -1;
+	} else if (*q == 0xFFFE) {
+	    q++;
+	    bo = 1;
+	}
+#else    
+	if (*q == 0xFEFF) {
+	    q++;
+	    bo = 1;
+	} else if (*q == 0xFFFE) {
+	    q++;
+	    bo = -1;
+	}
+#endif
+    }
+    
     while (q < e) {
 	register Py_UNICODE ch = *q++;
 
-	/* Check for BOM marks (U+FEFF) in the input and adjust
-	   current byte order setting accordingly. Swap input
-	   bytes if needed. (This assumes sizeof(Py_UNICODE) == 2
-	   !) */
+	/* Swap input bytes if needed. (This assumes
+	   sizeof(Py_UNICODE) == 2 !) */
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
-	if (ch == 0xFEFF) {
-	    bo = -1;
-	    continue;
-	} else if (ch == 0xFFFE) {
-	    bo = 1;
-	    continue;
-	}
 	if (bo == 1)
 	    ch = (ch >> 8) | (ch << 8);
 #else    
-	if (ch == 0xFEFF) {
-	    bo = 1;
-	    continue;
-	} else if (ch == 0xFFFE) {
-	    bo = -1;
-	    continue;
-	}
 	if (bo == -1)
 	    ch = (ch >> 8) | (ch << 8);
 #endif