Issue #8383: pickle and pickletools use surrogatepass error handler when
encoding unicode as utf8 to support lone surrogates and stay compatible with
Python 2.x and 3.0
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 8a2abcc..c4fc2c4 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -499,7 +499,7 @@
def save_str(self, obj, pack=struct.pack):
if self.bin:
- encoded = obj.encode('utf-8')
+ encoded = obj.encode('utf-8', 'surrogatepass')
n = len(encoded)
self.write(BINUNICODE + pack("<i", n) + encoded)
else:
@@ -966,7 +966,7 @@
def load_binunicode(self):
len = mloads(b'i' + self.read(4))
- self.append(str(self.read(len), 'utf-8'))
+ self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
dispatch[BINUNICODE[0]] = load_binunicode
def load_short_binstring(self):