Issue #11461: Fix the incremental UTF-16 decoder. Original patch by
Amaury Forgeot d'Arc. Added tests for partial decoding of non-BMP
characters.
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 0f7c23e..4c58b2d 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -313,7 +313,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
[
"", # first byte of BOM read
"", # second byte of BOM read
@@ -335,6 +335,10 @@
"\x00\xff\u0100",
"\x00\xff\u0100",
"\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
]
)
@@ -369,7 +373,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
[
"",
"",
@@ -387,6 +391,10 @@
"\x00\xff\u0100",
"\x00\xff\u0100",
"\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
]
)
@@ -409,7 +417,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
[
"",
"",
@@ -427,6 +435,10 @@
"\x00\xff\u0100",
"\x00\xff\u0100",
"\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
]
)
@@ -477,7 +489,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
[
"", # first byte of BOM read
"", # second byte of BOM read => byteorder known
@@ -489,6 +501,10 @@
"\x00\xff\u0100",
"\x00\xff\u0100",
"\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
]
)
@@ -526,7 +542,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
[
"",
"\x00",
@@ -536,6 +552,10 @@
"\x00\xff\u0100",
"\x00\xff\u0100",
"\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
]
)
@@ -565,7 +585,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
[
"",
"\x00",
@@ -575,6 +595,10 @@
"\x00\xff\u0100",
"\x00\xff\u0100",
"\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff",
+ "\x00\xff\u0100\uffff\U00010000",
]
)
@@ -604,7 +628,7 @@
def test_partial(self):
self.check_partial(
- "\x00\xff\u07ff\u0800\uffff",
+ "\x00\xff\u07ff\u0800\uffff\U00010000",
[
"\x00",
"\x00",
@@ -617,6 +641,10 @@
"\x00\xff\u07ff\u0800",
"\x00\xff\u07ff\u0800",
"\x00\xff\u07ff\u0800\uffff",
+ "\x00\xff\u07ff\u0800\uffff",
+ "\x00\xff\u07ff\u0800\uffff",
+ "\x00\xff\u07ff\u0800\uffff",
+ "\x00\xff\u07ff\u0800\uffff\U00010000",
]
)
@@ -694,7 +722,7 @@
def test_partial(self):
self.check_partial(
- "\ufeff\x00\xff\u07ff\u0800\uffff",
+ "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000",
[
"",
"",
@@ -713,6 +741,10 @@
"\ufeff\x00\xff\u07ff\u0800",
"\ufeff\x00\xff\u07ff\u0800",
"\ufeff\x00\xff\u07ff\u0800\uffff",
+ "\ufeff\x00\xff\u07ff\u0800\uffff",
+ "\ufeff\x00\xff\u07ff\u0800\uffff",
+ "\ufeff\x00\xff\u07ff\u0800\uffff",
+ "\ufeff\x00\xff\u07ff\u0800\uffff\U00010000",
]
)