use Py_ssize_t for file offset and length computations in iteration (closes #22526)
diff --git a/Lib/test/test_file2k.py b/Lib/test/test_file2k.py
index fae1db6..14e5931 100644
--- a/Lib/test/test_file2k.py
+++ b/Lib/test/test_file2k.py
@@ -436,6 +436,18 @@
finally:
f.close()
+ @test_support.precisionbigmemtest(2**31, 1)
+ def test_very_long_line(self, maxsize):
+ # Issue #22526
+ with open(TESTFN, "wb") as fp:
+ fp.write("\0"*2**31)
+ with open(TESTFN, "rb") as fp:
+ for l in fp:
+ pass
+ self.assertEqual(len(l), 2**31)
+ self.assertEqual(l.count("\0"), 2**31)
+ l = None
+
class FileSubclassTests(unittest.TestCase):
def testExit(self):
diff --git a/Misc/NEWS b/Misc/NEWS
index 888abf5..07e8855 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@
Core and Builtins
-----------------
+- Issue #22526: Fix iterating through files with lines longer than 2^31 bytes.
+
- Issue #22519: Fix overflow checking in PyString_Repr.
- Issue #22518: Fix integer overflow issues in latin-1 encoding.
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 5594058..55e074b 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -2236,7 +2236,7 @@
(unless at EOF) and no more than bufsize. Returns negative value on
error, will set MemoryError if bufsize bytes cannot be allocated. */
static int
-readahead(PyFileObject *f, int bufsize)
+readahead(PyFileObject *f, Py_ssize_t bufsize)
{
Py_ssize_t chunksize;
@@ -2274,7 +2274,7 @@
logarithmic buffer growth to about 50 even when reading a 1gb line. */
static PyStringObject *
-readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
+readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
{
PyStringObject* s;
char *bufptr;
@@ -2294,10 +2294,10 @@
bufptr++; /* Count the '\n' */
len = bufptr - f->f_bufptr;
s = (PyStringObject *)
- PyString_FromStringAndSize(NULL, skip+len);
+ PyString_FromStringAndSize(NULL, skip + len);
if (s == NULL)
return NULL;
- memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
+ memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
f->f_bufptr = bufptr;
if (bufptr == f->f_bufend)
drop_readahead(f);
@@ -2305,14 +2305,13 @@
bufptr = f->f_bufptr;
buf = f->f_buf;
f->f_buf = NULL; /* Force new readahead buffer */
- assert(skip+len < INT_MAX);
- s = readahead_get_line_skip(
- f, (int)(skip+len), bufsize + (bufsize>>2) );
+ assert(len <= PY_SSIZE_T_MAX - skip);
+ s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
if (s == NULL) {
PyMem_Free(buf);
return NULL;
}
- memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
+ memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
PyMem_Free(buf);
}
return s;