Issue #11461: Fix the incremental UTF-16 decoder. Original patch by
Amaury Forgeot d'Arc. Added tests for partial decoding of non-BMP
characters.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7cd0399..7f86bfd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3573,8 +3573,11 @@
/* UTF-16 code pair: */
if (e - q < 2) {
+ q -= 2;
+ if (consumed)
+ break;
errmsg = "unexpected end of data";
- startinpos = (((const char *)q) - 2) - starts;
+ startinpos = ((const char *)q) - starts;
endinpos = ((const char *)e) - starts;
goto utf16Error;
}