Bytes (which are the input for decoding) are mutable now. If a decoding
error callback changes the bytes object in the exception the decoder might
use memory that's no longer in use. Change unicode_decode_call_errorhandler()
so that it fetches the adresses of the bytes array (start and end) from the
exception object and passes them back to the caller.
diff --git a/Lib/test/test_codeccallbacks.py b/Lib/test/test_codeccallbacks.py
index 77834c4..f76ec65 100644
--- a/Lib/test/test_codeccallbacks.py
+++ b/Lib/test/test_codeccallbacks.py
@@ -806,6 +806,39 @@
text = 'abc<def>ghi'*n
text.translate(charmap)
+ def test_mutatingdecodehandler(self):
+ baddata = [
+ ("ascii", b"\xff"),
+ ("utf-7", b"++"),
+ ("utf-8", b"\xff"),
+ ("utf-16", b"\xff"),
+ ("unicode-escape", b"\\u123g"),
+ ("raw-unicode-escape", b"\\u123g"),
+ ("unicode-internal", b"\xff"),
+ ]
+
+ def replacing(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ exc.object = 42
+ return ("\u4242", 0)
+ else:
+ raise TypeError("don't know how to handle %r" % exc)
+ codecs.register_error("test.replacing", replacing)
+ for (encoding, data) in baddata:
+ self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
+
+ def mutating(exc):
+ if isinstance(exc, UnicodeDecodeError):
+ exc.object[:] = b""
+ return ("\u4242", 0)
+ else:
+ raise TypeError("don't know how to handle %r" % exc)
+ codecs.register_error("test.mutating", mutating)
+ # If the decoder doesn't pick up the modified input the following
+ # will lead to an endless loop
+ for (encoding, data) in baddata:
+ self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
+
def test_main():
test.test_support.run_unittest(CodecCallbackTest)