Issue #24848: Fixed bugs in UTF-7 decoding of misformed data:
1. Non-ASCII bytes were accepted after shift sequence.
2. A low surrogate could be emitted in case of error in high surrogate.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 08723ac..6c46263 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1716,29 +1716,29 @@
}
else { /* now leaving a base-64 section */
inShift = 0;
- s++;
- if (surrogate) {
- *p++ = surrogate;
- surrogate = 0;
- }
if (base64bits > 0) { /* left-over bits */
if (base64bits >= 6) {
/* We've seen at least one base-64 character */
+ s++;
errmsg = "partial character in shift sequence";
goto utf7Error;
}
else {
/* Some bits remain; they should be zero */
if (base64buffer != 0) {
+ s++;
errmsg = "non-zero padding bits in shift sequence";
goto utf7Error;
}
}
}
- if (ch != '-') {
+ if (surrogate && DECODE_DIRECT(ch))
+ *p++ = surrogate;
+ surrogate = 0;
+ if (ch == '-') {
/* '-' is absorbed; other terminating
characters are preserved */
- *p++ = ch;
+ s++;
}
}
}
@@ -1751,6 +1751,7 @@
}
else { /* begin base64-encoded section */
inShift = 1;
+ surrogate = 0;
shiftOutStart = p;
base64bits = 0;
base64buffer = 0;
@@ -1782,6 +1783,7 @@
if (inShift && !consumed) { /* in shift sequence, no more to follow */
/* if we're in an inconsistent state, that's an error */
+ inShift = 0;
if (surrogate ||
(base64bits >= 6) ||
(base64bits > 0 && base64buffer != 0)) {