The incremental decoder for utf-7 must preserve its state between calls.
Solves issue1460.
Might not be a backport candidate: a new API function was added,
and some code may rely on details in utf-7.py.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7043d5f..18b861b 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -944,6 +944,14 @@
Py_ssize_t size,
const char *errors)
{
+ return PyUnicode_DecodeUTF7Stateful(s, size, errors, NULL);
+}
+
+PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
+ Py_ssize_t size,
+ const char *errors,
+ Py_ssize_t *consumed)
+{
const char *starts = s;
Py_ssize_t startinpos;
Py_ssize_t endinpos;
@@ -962,8 +970,11 @@
unicode = _PyUnicode_New(size);
if (!unicode)
return NULL;
- if (size == 0)
+ if (size == 0) {
+ if (consumed)
+ *consumed = 0;
return (PyObject *)unicode;
+ }
p = unicode->str;
e = s + size;
@@ -1049,7 +1060,7 @@
goto onError;
}
- if (inShift) {
+ if (inShift && !consumed) {
outpos = p-PyUnicode_AS_UNICODE(unicode);
endinpos = size;
if (unicode_decode_call_errorhandler(
@@ -1061,6 +1072,12 @@
if (s < e)
goto restart;
}
+ if (consumed) {
+ if(inShift)
+ *consumed = startinpos;
+ else
+ *consumed = s-starts;
+ }
if (_PyUnicode_Resize(&unicode, p - PyUnicode_AS_UNICODE(unicode)) < 0)
goto onError;