mmap: do all internal arithmetic with Py_ssize_t while being very careful about overflow
diff --git a/Modules/mmapmodule.c b/Modules/mmapmodule.c
index 1ebccdf..297bb07 100644
--- a/Modules/mmapmodule.c
+++ b/Modules/mmapmodule.c
@@ -91,8 +91,8 @@
typedef struct {
PyObject_HEAD
char * data;
- size_t size;
- size_t pos; /* relative to offset */
+ Py_ssize_t size;
+ Py_ssize_t pos; /* relative to offset */
#ifdef MS_WINDOWS
PY_LONG_LONG offset;
#else
@@ -204,33 +204,32 @@
PyObject *unused)
{
CHECK_VALID(NULL);
- if (self->pos < self->size) {
- char value = self->data[self->pos];
- self->pos += 1;
- return Py_BuildValue("c", value);
- } else {
+ if (self->pos >= self->size) {
PyErr_SetString(PyExc_ValueError, "read byte out of range");
return NULL;
}
+ return PyString_FromStringAndSize(&self->data[self->pos++], 1);
}
static PyObject *
mmap_read_line_method(mmap_object *self,
PyObject *unused)
{
- char *start = self->data+self->pos;
- char *eof = self->data+self->size;
- char *eol;
+ Py_ssize_t remaining;
+ char *start, *eol;
PyObject *result;
CHECK_VALID(NULL);
- eol = memchr(start, '\n', self->size - self->pos);
+ remaining = (self->pos < self->size) ? self->size - self->pos : 0;
+ if (!remaining)
+ return PyString_FromString("");
+ start = self->data + self->pos;
+ eol = memchr(start, '\n', remaining);
if (!eol)
- eol = eof;
+ eol = self->data + self->size;
else
- ++eol; /* we're interested in the position after the
- newline. */
+ ++eol; /* advance past newline */
result = PyString_FromStringAndSize(start, (eol - start));
self->pos += (eol - start);
return result;
@@ -240,28 +239,18 @@
mmap_read_method(mmap_object *self,
PyObject *args)
{
- Py_ssize_t num_bytes, n;
+ Py_ssize_t num_bytes, remaining;
PyObject *result;
CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, "n:read", &num_bytes))
- return(NULL);
+ return NULL;
/* silently 'adjust' out-of-range requests */
- assert(self->size >= self->pos);
- n = self->size - self->pos;
- /* The difference can overflow, only if self->size is greater than
- * PY_SSIZE_T_MAX. But then the operation cannot possibly succeed,
- * because the mapped area and the returned string each need more
- * than half of the addressable memory. So we clip the size, and let
- * the code below raise MemoryError.
- */
- if (n < 0)
- n = PY_SSIZE_T_MAX;
- if (num_bytes < 0 || num_bytes > n) {
- num_bytes = n;
- }
- result = Py_BuildValue("s#", self->data+self->pos, num_bytes);
+ remaining = (self->pos < self->size) ? self->size - self->pos : 0;
+ if (num_bytes < 0 || num_bytes > remaining)
+ num_bytes = remaining;
+ result = PyString_FromStringAndSize(&self->data[self->pos], num_bytes);
self->pos += num_bytes;
return result;
}
@@ -288,14 +277,14 @@
start += self->size;
if (start < 0)
start = 0;
- else if ((size_t)start > self->size)
+ else if (start > self->size)
start = self->size;
if (end < 0)
end += self->size;
if (end < 0)
end = 0;
- else if ((size_t)end > self->size)
+ else if (end > self->size)
end = self->size;
start_p = self->data + start;
@@ -362,12 +351,12 @@
if (!is_writeable(self))
return NULL;
- if ((self->pos + length) > self->size) {
+ if (self->pos > self->size || self->size - self->pos < length) {
PyErr_SetString(PyExc_ValueError, "data out of range");
return NULL;
}
- memcpy(self->data+self->pos, data, length);
- self->pos = self->pos+length;
+ memcpy(&self->data[self->pos], data, length);
+ self->pos += length;
Py_INCREF(Py_None);
return Py_None;
}
@@ -386,8 +375,7 @@
return NULL;
if (self->pos < self->size) {
- *(self->data+self->pos) = value;
- self->pos += 1;
+ self->data[self->pos++] = value;
Py_INCREF(Py_None);
return Py_None;
}
@@ -458,8 +446,14 @@
if (!PyArg_ParseTuple(args, "n:resize", &new_size) ||
!is_resizeable(self)) {
return NULL;
+ }
+ if (new_size < 0 || PY_SSIZE_T_MAX - new_size < self->offset) {
+ PyErr_SetString(PyExc_ValueError, "new size out of range");
+ return NULL;
+ }
+
+ {
#ifdef MS_WINDOWS
- } else {
DWORD dwErrCode = 0;
DWORD off_hi, off_lo, newSizeLow, newSizeHigh;
/* First, unmap the file view */
@@ -509,15 +503,13 @@
#ifdef UNIX
#ifndef HAVE_MREMAP
- } else {
PyErr_SetString(PyExc_SystemError,
"mmap: resizing not available--no mremap()");
return NULL;
#else
- } else {
void *newmap;
- if (ftruncate(self->fd, self->offset + new_size) == -1) {
+ if (self->fd != -1 && ftruncate(self->fd, self->offset + new_size) == -1) {
PyErr_SetFromErrno(mmap_module_error);
return NULL;
}
@@ -525,11 +517,11 @@
#ifdef MREMAP_MAYMOVE
newmap = mremap(self->data, self->size, new_size, MREMAP_MAYMOVE);
#else
- #if defined(__NetBSD__)
- newmap = mremap(self->data, self->size, self->data, new_size, 0);
- #else
- newmap = mremap(self->data, self->size, new_size, 0);
- #endif /* __NetBSD__ */
+#if defined(__NetBSD__)
+ newmap = mremap(self->data, self->size, self->data, new_size, 0);
+#else
+ newmap = mremap(self->data, self->size, new_size, 0);
+#endif /* __NetBSD__ */
#endif
if (newmap == (void *)-1)
{
@@ -560,7 +552,7 @@
CHECK_VALID(NULL);
if (!PyArg_ParseTuple(args, "|nn:flush", &offset, &size))
return NULL;
- if ((size_t)(offset + size) > self->size) {
+ if (size < 0 || offset < 0 || self->size - offset < size) {
PyErr_SetString(PyExc_ValueError, "flush values out of range");
return NULL;
}
@@ -601,12 +593,12 @@
where = dist;
break;
case 1: /* relative to current position */
- if ((Py_ssize_t)self->pos + dist < 0)
+ if (PY_SSIZE_T_MAX - self->pos < dist)
goto onoutofrange;
where = self->pos + dist;
break;
case 2: /* relative to end */
- if ((Py_ssize_t)self->size + dist < 0)
+ if (PY_SSIZE_T_MAX - self->size < dist)
goto onoutofrange;
where = self->size + dist;
break;
@@ -629,23 +621,27 @@
static PyObject *
mmap_move_method(mmap_object *self, PyObject *args)
{
- unsigned long dest, src, cnt;
+ Py_ssize_t dest, src, cnt;
CHECK_VALID(NULL);
- if (!PyArg_ParseTuple(args, "kkk:move", &dest, &src, &cnt) ||
+ if (!PyArg_ParseTuple(args, "nnn:move", &dest, &src, &cnt) ||
!is_writeable(self)) {
return NULL;
} else {
/* bounds check the values */
- if (cnt < 0 || (cnt + dest) < cnt || (cnt + src) < cnt ||
- src < 0 || src > self->size || (src + cnt) > self->size ||
- dest < 0 || dest > self->size || (dest + cnt) > self->size) {
- PyErr_SetString(PyExc_ValueError,
- "source, destination, or count out of range");
- return NULL;
- }
- memmove(self->data+dest, self->data+src, cnt);
+ if (dest < 0 || src < 0 || cnt < 0)
+ goto bounds;
+ if (self->size - dest < cnt || self->size - src < cnt)
+ goto bounds;
+
+ memmove(&self->data[dest], &self->data[src], cnt);
+
Py_INCREF(Py_None);
return Py_None;
+
+ bounds:
+ PyErr_SetString(PyExc_ValueError,
+ "source, destination, or count out of range");
+ return NULL;
}
}
@@ -745,7 +741,7 @@
mmap_item(mmap_object *self, Py_ssize_t i)
{
CHECK_VALID(NULL);
- if (i < 0 || (size_t)i >= self->size) {
+ if (i < 0 || i >= self->size) {
PyErr_SetString(PyExc_IndexError, "mmap index out of range");
return NULL;
}
@@ -758,13 +754,13 @@
CHECK_VALID(NULL);
if (ilow < 0)
ilow = 0;
- else if ((size_t)ilow > self->size)
+ else if (ilow > self->size)
ilow = self->size;
if (ihigh < 0)
ihigh = 0;
if (ihigh < ilow)
ihigh = ilow;
- else if ((size_t)ihigh > self->size)
+ else if (ihigh > self->size)
ihigh = self->size;
return PyString_FromStringAndSize(self->data + ilow, ihigh-ilow);
@@ -780,7 +776,7 @@
return NULL;
if (i < 0)
i += self->size;
- if (i < 0 || (size_t)i >= self->size) {
+ if (i < 0 || i >= self->size) {
PyErr_SetString(PyExc_IndexError,
"mmap index out of range");
return NULL;
@@ -850,13 +846,13 @@
CHECK_VALID(-1);
if (ilow < 0)
ilow = 0;
- else if ((size_t)ilow > self->size)
+ else if (ilow > self->size)
ilow = self->size;
if (ihigh < 0)
ihigh = 0;
if (ihigh < ilow)
ihigh = ilow;
- else if ((size_t)ihigh > self->size)
+ else if (ihigh > self->size)
ihigh = self->size;
if (v == NULL) {
@@ -887,7 +883,7 @@
const char *buf;
CHECK_VALID(-1);
- if (i < 0 || (size_t)i >= self->size) {
+ if (i < 0 || i >= self->size) {
PyErr_SetString(PyExc_IndexError, "mmap index out of range");
return -1;
}
@@ -921,7 +917,7 @@
return -1;
if (i < 0)
i += self->size;
- if (i < 0 || (size_t)i >= self->size) {
+ if (i < 0 || i >= self->size) {
PyErr_SetString(PyExc_IndexError,
"mmap index out of range");
return -1;
@@ -1092,32 +1088,6 @@
};
-/* extract the map size from the given PyObject
-
- Returns -1 on error, with an appropriate Python exception raised. On
- success, the map size is returned. */
-static Py_ssize_t
-_GetMapSize(PyObject *o, const char* param)
-{
- if (o == NULL)
- return 0;
- if (PyIndex_Check(o)) {
- Py_ssize_t i = PyNumber_AsSsize_t(o, PyExc_OverflowError);
- if (i==-1 && PyErr_Occurred())
- return -1;
- if (i < 0) {
- PyErr_Format(PyExc_OverflowError,
- "memory mapped %s must be positive",
- param);
- return -1;
- }
- return i;
- }
-
- PyErr_SetString(PyExc_TypeError, "map size must be an integral value");
- return -1;
-}
-
#ifdef UNIX
#ifdef HAVE_LARGEFILE_SUPPORT
#define _Py_PARSE_OFF_T "L"
@@ -1132,7 +1102,6 @@
struct stat st;
#endif
mmap_object *m_obj;
- PyObject *map_size_obj = NULL;
Py_ssize_t map_size;
off_t offset = 0;
int fd, flags = MAP_SHARED, prot = PROT_WRITE | PROT_READ;
@@ -1142,13 +1111,15 @@
"flags", "prot",
"access", "offset", NULL};
- if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|iii" _Py_PARSE_OFF_T, keywords,
- &fd, &map_size_obj, &flags, &prot,
+ if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|iii" _Py_PARSE_OFF_T, keywords,
+ &fd, &map_size, &flags, &prot,
&access, &offset))
return NULL;
- map_size = _GetMapSize(map_size_obj, "size");
- if (map_size < 0)
+ if (map_size < 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "memory mapped length must be postiive");
return NULL;
+ }
if (offset < 0) {
PyErr_SetString(PyExc_OverflowError,
"memory mapped offset must be positive");
@@ -1220,7 +1191,7 @@
return NULL;
}
map_size = (Py_ssize_t) (st.st_size - offset);
- } else if (offset + (size_t)map_size > st.st_size) {
+ } else if (offset > st.st_size || st.st_size - offset < map_size) {
PyErr_SetString(PyExc_ValueError,
"mmap length is greater than file size");
return NULL;
@@ -1230,8 +1201,8 @@
m_obj = (mmap_object *)type->tp_alloc(type, 0);
if (m_obj == NULL) {return NULL;}
m_obj->data = NULL;
- m_obj->size = (size_t) map_size;
- m_obj->pos = (size_t) 0;
+ m_obj->size = map_size;
+ m_obj->pos = 0;
m_obj->offset = offset;
if (fd == -1) {
m_obj->fd = -1;
@@ -1290,7 +1261,6 @@
new_mmap_object(PyTypeObject *type, PyObject *args, PyObject *kwdict)
{
mmap_object *m_obj;
- PyObject *map_size_obj = NULL;
Py_ssize_t map_size;
PY_LONG_LONG offset = 0, size;
DWORD off_hi; /* upper 32 bits of offset */
@@ -1307,8 +1277,8 @@
"tagname",
"access", "offset", NULL };
- if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iO|ziL", keywords,
- &fileno, &map_size_obj,
+ if (!PyArg_ParseTupleAndKeywords(args, kwdict, "in|ziL", keywords,
+ &fileno, &map_size,
&tagname, &access, &offset)) {
return NULL;
}
@@ -1331,9 +1301,11 @@
"mmap invalid access parameter.");
}
- map_size = _GetMapSize(map_size_obj, "size");
- if (map_size < 0)
+ if (map_size < 0) {
+ PyErr_SetString(PyExc_OverflowError,
+ "memory mapped length must be postiive");
return NULL;
+ }
if (offset < 0) {
PyErr_SetString(PyExc_OverflowError,
"memory mapped offset must be positive");