Issue #27473: Fixed possible integer overflow in str, unicode and bytearray
concatenations and repetitions. Based on patch by Xiang Zhang.
diff --git a/Misc/NEWS b/Misc/NEWS
index 023f46a..1623242 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #27473: Fixed possible integer overflow in str, unicode and bytearray
+ concatenations and repetitions. Based on patch by Xiang Zhang.
+
- Issue #23908: os functions, open() and the io.FileIO constructor now reject
unicode paths with embedded null character on Windows instead of silently
truncating them.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index a90bdeb..bf8a74e 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -273,7 +273,6 @@
PyObject *
PyByteArray_Concat(PyObject *a, PyObject *b)
{
- Py_ssize_t size;
Py_buffer va, vb;
PyByteArrayObject *result = NULL;
@@ -286,13 +285,13 @@
goto done;
}
- size = va.len + vb.len;
- if (size < 0) {
- PyErr_NoMemory();
- goto done;
+ if (va.len > PY_SSIZE_T_MAX - vb.len) {
+ PyErr_NoMemory();
+ goto done;
}
- result = (PyByteArrayObject *) PyByteArray_FromStringAndSize(NULL, size);
+ result = (PyByteArrayObject *) \
+ PyByteArray_FromStringAndSize(NULL, va.len + vb.len);
if (result != NULL) {
memcpy(result->ob_bytes, va.buf, va.len);
memcpy(result->ob_bytes + va.len, vb.buf, vb.len);
@@ -328,11 +327,11 @@
}
mysize = Py_SIZE(self);
- size = mysize + vo.len;
- if (size < 0) {
+ if (mysize > PY_SSIZE_T_MAX - vo.len) {
PyBuffer_Release(&vo);
return PyErr_NoMemory();
}
+ size = mysize + vo.len;
if (size < self->ob_alloc) {
Py_SIZE(self) = size;
self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
@@ -357,9 +356,9 @@
if (count < 0)
count = 0;
mysize = Py_SIZE(self);
- size = mysize * count;
- if (count != 0 && size / count != mysize)
+ if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
return PyErr_NoMemory();
+ size = mysize * count;
result = (PyByteArrayObject *)PyByteArray_FromStringAndSize(NULL, size);
if (result != NULL && size != 0) {
if (mysize == 1)
@@ -382,9 +381,9 @@
if (count < 0)
count = 0;
mysize = Py_SIZE(self);
- size = mysize * count;
- if (count != 0 && size / count != mysize)
+ if (count != 0 && mysize > PY_SSIZE_T_MAX / count)
return PyErr_NoMemory();
+ size = mysize * count;
if (size < self->ob_alloc) {
Py_SIZE(self) = size;
self->ob_bytes[Py_SIZE(self)] = '\0'; /* Trailing null byte */
diff --git a/Objects/stringobject.c b/Objects/stringobject.c
index 1a04b78..342b2db 100644
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1040,7 +1040,6 @@
Py_INCREF(a);
return (PyObject *)a;
}
- size = Py_SIZE(a) + Py_SIZE(b);
/* Check that string sizes are not negative, to prevent an
overflow in cases where we are passed incorrectly-created
strings with negative lengths (due to a bug in other code).
@@ -1051,6 +1050,7 @@
"strings are too large to concat");
return NULL;
}
+ size = Py_SIZE(a) + Py_SIZE(b);
/* Inline PyObject_NewVar */
if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
@@ -1081,15 +1081,15 @@
size_t nbytes;
if (n < 0)
n = 0;
- /* watch out for overflows: the size can overflow int,
+ /* watch out for overflows: the size can overflow Py_ssize_t,
* and the # of bytes needed can overflow size_t
*/
- size = Py_SIZE(a) * n;
- if (n && size / n != Py_SIZE(a)) {
+ if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;
}
+ size = Py_SIZE(a) * n;
if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Py_INCREF(a);
return (PyObject *)a;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index ca6628e..151ce3c 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -6378,6 +6378,12 @@
return (PyObject *)v;
}
+ if (u->length > PY_SSIZE_T_MAX - v->length) {
+ PyErr_SetString(PyExc_OverflowError,
+ "strings are too large to concat");
+ goto onError;
+ }
+
/* Concat the two Unicode strings */
w = _PyUnicode_New(u->length + v->length);
if (w == NULL)
@@ -7223,17 +7229,17 @@
return (PyObject*) str;
}
- /* ensure # of chars needed doesn't overflow int and # of bytes
+ /* ensure # of chars needed doesn't overflow Py_ssize_t and # of bytes
* needed doesn't overflow size_t
*/
- nchars = len * str->length;
- if (len && nchars / len != str->length) {
+ if (len && str->length > PY_SSIZE_T_MAX / len) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;
}
- nbytes = (nchars + 1) * sizeof(Py_UNICODE);
- if (nbytes / sizeof(Py_UNICODE) != (size_t)(nchars + 1)) {
+ nchars = len * str->length;
+ nbytes = ((size_t)nchars + 1u) * sizeof(Py_UNICODE);
+ if (nbytes / sizeof(Py_UNICODE) != ((size_t)nchars + 1u)) {
PyErr_SetString(PyExc_OverflowError,
"repeated string is too long");
return NULL;