Issue #5915: Implement PEP 383, Non-decodable Bytes in
System Character Interfaces.
diff --git a/Python/codecs.c b/Python/codecs.c
index 633a24c..7e3ff8a 100644
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -829,6 +829,82 @@
     }
 }
 
+static PyObject *
+PyCodec_UTF8bErrors(PyObject *exc)
+{
+    PyObject *restuple;
+    PyObject *object;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    PyObject *res;
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+	Py_UNICODE *p;
+	Py_UNICODE *startp;
+	char *outp;
+	if (PyUnicodeEncodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeEncodeError_GetEnd(exc, &end))
+	    return NULL;
+	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+	    return NULL;
+	startp = PyUnicode_AS_UNICODE(object);
+	res = PyBytes_FromStringAndSize(NULL, end-start);
+	if (!res) {
+	    Py_DECREF(object);
+	    return NULL;
+	}
+	outp = PyBytes_AsString(res);
+	for (p = startp+start; p < startp+end; p++) {
+	    Py_UNICODE ch = *p;
+	    if (ch < 0xdc80 || ch > 0xdcff) {
+		/* Not a UTF-8b surrogate, fail with original exception */
+		PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+		Py_DECREF(res);
+		Py_DECREF(object);
+		return NULL;
+	    }
+	    *outp++ = ch - 0xdc00;
+	}
+	restuple = Py_BuildValue("(On)", res, end);
+	Py_DECREF(res);
+	Py_DECREF(object);
+	return restuple;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+	unsigned char *p;
+	Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+	int consumed = 0;
+	if (PyUnicodeDecodeError_GetStart(exc, &start))
+	    return NULL;
+	if (PyUnicodeDecodeError_GetEnd(exc, &end))
+	    return NULL;
+	if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+	    return NULL;
+	if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+	    Py_DECREF(object);
+	    return NULL;
+	}
+	while (consumed < 4 && consumed < end-start) {
+	    /* Refuse to escape ASCII bytes. */
+	    if (p[start+consumed] < 128)
+		break;
+	    ch[consumed] = 0xdc00 + p[start+consumed];
+	    consumed++;
+	}
+	Py_DECREF(object);
+	if (!consumed) {
+	    /* codec complained about ASCII byte. */
+	    PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+	    return NULL;
+	}	    
+	return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+    }
+    else {
+	wrong_exception_type(exc);
+	return NULL;
+    }
+}
+
 	
 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 {
@@ -864,6 +940,11 @@
     return PyCodec_SurrogateErrors(exc);
 }
 
+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_UTF8bErrors(exc);
+}
+
 static int _PyCodecRegistry_Init(void)
 {
     static struct {
@@ -918,6 +999,14 @@
 		surrogates_errors,
 		METH_O
 	    }
+	},
+	{
+	    "utf8b",
+	    {
+		"utf8b",
+		utf8b_errors,
+		METH_O
+	    }
 	}
     };
 
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
index f93403b..c75f55f 100644
--- a/Python/pythonrun.c
+++ b/Python/pythonrun.c
@@ -262,6 +262,22 @@
 
 	_PyImportHooks_Init();
 
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+	/* On Unix, set the file system encoding according to the
+	   user's preference, if the CODESET names a well-known
+	   Python codec, and Py_FileSystemDefaultEncoding isn't
+	   initialized by other means. Also set the encoding of
+	   stdin and stdout if these are terminals.  */
+
+	codeset = get_codeset();
+	if (codeset) {
+		if (!Py_FileSystemDefaultEncoding)
+			Py_FileSystemDefaultEncoding = codeset;
+		else
+			free(codeset);
+	}
+#endif
+
 	if (install_sigs)
 		initsigs(); /* Signal handling stuff, including initintr() */
 		
@@ -285,22 +301,6 @@
 #ifdef WITH_THREAD
 	_PyGILState_Init(interp, tstate);
 #endif /* WITH_THREAD */
-
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
-	/* On Unix, set the file system encoding according to the
-	   user's preference, if the CODESET names a well-known
-	   Python codec, and Py_FileSystemDefaultEncoding isn't
-	   initialized by other means. Also set the encoding of
-	   stdin and stdout if these are terminals.  */
-
-	codeset = get_codeset();
-	if (codeset) {
-		if (!Py_FileSystemDefaultEncoding)
-			Py_FileSystemDefaultEncoding = codeset;
-		else
-			free(codeset);
-	}
-#endif
 }
 
 void