Issue #24848: Fixed bugs in UTF-7 decoding of misformed data: 1. Non-ASCII bytes were accepted after shift sequence. 2. A low surrogate could be emitted in case of error in high surrogate.

commit: e12f63218603f3e15592df7fba5a484f9ff5c004 [log] [tgz]
author: Serhiy Storchaka <storchaka@gmail.com> Fri Oct 02 13:14:53 2015 +0300
committer: Serhiy Storchaka <storchaka@gmail.com> Fri Oct 02 13:14:53 2015 +0300
tree: 2b07611f51da28e88360f9afc0e34b3290c6f2f5
parent: a87633e59635a659aa7eb733094129c47187f8a3 [diff] [blame]
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 08723ac..6c46263 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c

@@ -1716,29 +1716,29 @@
             }
             else { /* now leaving a base-64 section */
                 inShift = 0;
-                s++;
-                if (surrogate) {
-                    *p++ = surrogate;
-                    surrogate = 0;
-                }
                 if (base64bits > 0) { /* left-over bits */
                     if (base64bits >= 6) {
                         /* We've seen at least one base-64 character */
+                        s++;
                         errmsg = "partial character in shift sequence";
                         goto utf7Error;
                     }
                     else {
                         /* Some bits remain; they should be zero */
                         if (base64buffer != 0) {
+                            s++;
                             errmsg = "non-zero padding bits in shift sequence";
                             goto utf7Error;
                         }
                     }
                 }
-                if (ch != '-') {
+                if (surrogate && DECODE_DIRECT(ch))
+                    *p++ = surrogate;
+                surrogate = 0;
+                if (ch == '-') {
                     /* '-' is absorbed; other terminating
                        characters are preserved */
-                    *p++ = ch;
+                    s++;
                 }
             }
         }
@@ -1751,6 +1751,7 @@
             }
             else { /* begin base64-encoded section */
                 inShift = 1;
+                surrogate = 0;
                 shiftOutStart = p;
                 base64bits = 0;
                 base64buffer = 0;
@@ -1782,6 +1783,7 @@
 
     if (inShift && !consumed) { /* in shift sequence, no more to follow */
         /* if we're in an inconsistent state, that's an error */
+        inShift = 0;
         if (surrogate ||
                 (base64bits >= 6) ||
                 (base64bits > 0 && base64buffer != 0)) {
commit	e12f63218603f3e15592df7fba5a484f9ff5c004	[log] [tgz]
author	Serhiy Storchaka <storchaka@gmail.com>	Fri Oct 02 13:14:53 2015 +0300
committer	Serhiy Storchaka <storchaka@gmail.com>	Fri Oct 02 13:14:53 2015 +0300
tree	2b07611f51da28e88360f9afc0e34b3290c6f2f5
parent	a87633e59635a659aa7eb733094129c47187f8a3 [diff] [blame]