Issue #8383: pickle and pickletools use surrogatepass error handler when
encoding unicode as utf8 to support lone surrogates and stay compatible with
Python 2.x and 3.0
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 8a2abcc..c4fc2c4 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -499,7 +499,7 @@
 
     def save_str(self, obj, pack=struct.pack):
         if self.bin:
-            encoded = obj.encode('utf-8')
+            encoded = obj.encode('utf-8', 'surrogatepass')
             n = len(encoded)
             self.write(BINUNICODE + pack("<i", n) + encoded)
         else:
@@ -966,7 +966,7 @@
 
     def load_binunicode(self):
         len = mloads(b'i' + self.read(4))
-        self.append(str(self.read(len), 'utf-8'))
+        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
     dispatch[BINUNICODE[0]] = load_binunicode
 
     def load_short_binstring(self):
diff --git a/Lib/pickletools.py b/Lib/pickletools.py
index ca11aa3..6ab75c7 100644
--- a/Lib/pickletools.py
+++ b/Lib/pickletools.py
@@ -469,7 +469,7 @@
         raise ValueError("unicodestring4 byte count < 0: %d" % n)
     data = f.read(n)
     if len(data) == n:
-        return str(data, 'utf-8')
+        return str(data, 'utf-8', 'surrogatepass')
     raise ValueError("expected %d bytes in a unicodestring4, but only %d "
                      "remain" % (n, len(data)))
 
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 79407a6..dd0ed15 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -515,7 +515,9 @@
 
     def test_unicode(self):
         endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
-                    '<\\>', '<\\\U00012345>']
+                    '<\\>', '<\\\U00012345>',
+                    # surrogates
+                    '<\udc80>']
         for proto in protocols:
             for u in endcases:
                 p = self.dumps(u, proto)