Issue #3672: Reject surrogates in utf-8 codec; add surrogates error
handler.
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 1730dbe..6706507 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -541,6 +541,17 @@
         self.check_state_handling_decode(self.encoding,
                                          u, u.encode(self.encoding))
 
+    def test_lone_surrogates(self):
+        self.assertRaises(UnicodeEncodeError, "\ud800".encode, "utf-8")
+        self.assertRaises(UnicodeDecodeError, b"\xed\xa0\x80".decode, "utf-8")
+
+    def test_surrogates_handler(self):
+        self.assertEquals("abc\ud800def".encode("utf-8", "surrogates"),
+                          b"abc\xed\xa0\x80def")
+        self.assertEquals(b"abc\xed\xa0\x80def".decode("utf-8", "surrogates"),
+                          "abc\ud800def")
+        self.assertTrue(codecs.lookup_error("surrogates"))
+
 class UTF7Test(ReadTest):
     encoding = "utf-7"
 
@@ -1023,12 +1034,12 @@
                 # Skipped
                 continue
             # The Unicode strings are given in UTF-8
-            orig = str(orig, "utf-8")
+            orig = str(orig, "utf-8", "surrogates")
             if prepped is None:
                 # Input contains prohibited characters
                 self.assertRaises(UnicodeError, nameprep, orig)
             else:
-                prepped = str(prepped, "utf-8")
+                prepped = str(prepped, "utf-8", "surrogates")
                 try:
                     self.assertEquals(nameprep(orig), prepped)
                 except Exception as e: