Issue #13333: The UTF-7 decoder now accepts lone surrogates
(the encoder already accepts them).
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 6b245aa..cdad738 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3884,21 +3884,18 @@
if (unicode_putchar(&unicode, &outpos, ch2) < 0)
goto onError;
surrogate = 0;
+ continue;
}
else {
+ if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+ goto onError;
surrogate = 0;
- errmsg = "second surrogate missing";
- goto utf7Error;
}
}
- else if (outCh >= 0xD800 && outCh <= 0xDBFF) {
+ if (outCh >= 0xD800 && outCh <= 0xDBFF) {
/* first surrogate */
surrogate = outCh;
}
- else if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
- errmsg = "unexpected second surrogate";
- goto utf7Error;
- }
else {
if (unicode_putchar(&unicode, &outpos, outCh) < 0)
goto onError;
@@ -3909,8 +3906,9 @@
inShift = 0;
s++;
if (surrogate) {
- errmsg = "second surrogate missing at end of shift sequence";
- goto utf7Error;
+ if (unicode_putchar(&unicode, &outpos, surrogate) < 0)
+ goto onError;
+ surrogate = 0;
}
if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) {