Issue #10233: Close file objects in a timely manner in the tarfile module
and its test suite.
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index e33b982..d49e82f 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1800,20 +1800,18 @@
except (ImportError, AttributeError):
raise CompressionError("gzip module is not available")
- if fileobj is None:
- fileobj = bltn_open(name, mode + "b")
- extfileobj = False
- else:
- extfileobj = True
-
+ extfileobj = fileobj is not None
try:
- t = cls.taropen(name, mode,
- gzip.GzipFile(name, mode, compresslevel, fileobj),
- **kwargs)
+ fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
+ t = cls.taropen(name, mode, fileobj, **kwargs)
except IOError:
if not extfileobj:
fileobj.close()
raise ReadError("not a gzip file")
+ except:
+ if not extfileobj:
+ fileobj.close()
+ raise
t._extfileobj = extfileobj
return t
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index c73b7b2..302ee85 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -52,25 +52,32 @@
def test_fileobj_regular_file(self):
tarinfo = self.tar.getmember("ustar/regtype")
fobj = self.tar.extractfile(tarinfo)
- data = fobj.read()
- self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
- "regular file extraction failed")
+ try:
+ data = fobj.read()
+ self.assertTrue((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
+ "regular file extraction failed")
+ finally:
+ fobj.close()
def test_fileobj_readlines(self):
self.tar.extract("ustar/regtype", TEMPDIR)
tarinfo = self.tar.getmember("ustar/regtype")
with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
lines1 = fobj1.readlines()
- fobj2 = io.TextIOWrapper(self.tar.extractfile(tarinfo))
- lines2 = fobj2.readlines()
- self.assertTrue(lines1 == lines2,
- "fileobj.readlines() failed")
- self.assertTrue(len(lines2) == 114,
- "fileobj.readlines() failed")
- self.assertTrue(lines2[83] ==
- "I will gladly admit that Python is not the fastest running scripting language.\n",
- "fileobj.readlines() failed")
+ fobj = self.tar.extractfile(tarinfo)
+ try:
+ fobj2 = io.TextIOWrapper(fobj)
+ lines2 = fobj2.readlines()
+ self.assertTrue(lines1 == lines2,
+ "fileobj.readlines() failed")
+ self.assertTrue(len(lines2) == 114,
+ "fileobj.readlines() failed")
+ self.assertTrue(lines2[83] ==
+ "I will gladly admit that Python is not the fastest running scripting language.\n",
+ "fileobj.readlines() failed")
+ finally:
+ fobj.close()
def test_fileobj_iter(self):
self.tar.extract("ustar/regtype", TEMPDIR)
@@ -78,9 +85,12 @@
with open(os.path.join(TEMPDIR, "ustar/regtype"), "rU") as fobj1:
lines1 = fobj1.readlines()
fobj2 = self.tar.extractfile(tarinfo)
- lines2 = list(io.TextIOWrapper(fobj2))
- self.assertTrue(lines1 == lines2,
- "fileobj.__iter__() failed")
+ try:
+ lines2 = list(io.TextIOWrapper(fobj2))
+ self.assertTrue(lines1 == lines2,
+ "fileobj.__iter__() failed")
+ finally:
+ fobj2.close()
def test_fileobj_seek(self):
self.tar.extract("ustar/regtype", TEMPDIR)
@@ -138,7 +148,11 @@
def _test_fileobj_link(self, lnktype, regtype):
a = self.tar.extractfile(lnktype)
b = self.tar.extractfile(regtype)
- self.assertEqual(a.name, b.name)
+ try:
+ self.assertEqual(a.name, b.name)
+ finally:
+ a.close()
+ b.close()
def test_fileobj_link1(self):
self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
@@ -225,8 +239,8 @@
data = fobj.read()
fobj = io.BytesIO(data)
fobj.name = ""
- tar = tarfile.open(fileobj=fobj, mode=self.mode)
- self.assertEqual(tar.name, None)
+ with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
+ self.assertEqual(tar.name, None)
def test_fileobj_with_offset(self):
# Skip the first member and store values from the second member
@@ -237,7 +251,9 @@
t = tar.next()
name = t.name
offset = t.offset
- data = tar.extractfile(t).read()
+ f = tar.extractfile(t)
+ data = f.read()
+ f.close()
finally:
tar.close()
@@ -319,7 +335,8 @@
if e.errno == errno.ENOENT:
self.fail("hardlink not extracted properly")
- data = open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb").read()
+ with open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb") as f:
+ data = f.read()
self.assertEqual(md5sum(data), md5_regtype)
try:
@@ -328,7 +345,8 @@
if e.errno == errno.ENOENT:
self.fail("symlink not extracted properly")
- data = open(os.path.join(TEMPDIR, "ustar/symtype"), "rb").read()
+ with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f:
+ data = f.read()
self.assertEqual(md5sum(data), md5_regtype)
finally:
tar.close()
@@ -604,10 +622,10 @@
# the preceding extended header.
longname = self.subdir + "/" + "123/" * 125 + "longname"
offset = self.tar.getmember(longname).offset
- fobj = open(tarname, "rb")
- fobj.seek(offset)
- tarinfo = tarfile.TarInfo.frombuf(fobj.read(512), "iso8859-1", "strict")
- self.assertEqual(tarinfo.type, self.longnametype)
+ with open(tarname, "rb") as fobj:
+ fobj.seek(offset)
+ tarinfo = tarfile.TarInfo.frombuf(fobj.read(512), "iso8859-1", "strict")
+ self.assertEqual(tarinfo.type, self.longnametype)
class GNUReadTest(LongnameTest):
@@ -1353,8 +1371,11 @@
t = src.getmember("ustar/regtype")
t.name = "foo"
f = src.extractfile(t)
- with tarfile.open(self.tarname, mode) as tar:
- tar.addfile(t, f)
+ try:
+ with tarfile.open(self.tarname, mode) as tar:
+ tar.addfile(t, f)
+ finally:
+ f.close()
def _test(self, names=["bar"], fileobj=None):
with tarfile.open(self.tarname, fileobj=fileobj) as tar:
diff --git a/Misc/NEWS b/Misc/NEWS
index dd18844..964d9f7 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -54,6 +54,9 @@
Library
-------
+- Issue #10233: Close file objects in a timely manner in the tarfile module
+ and its test suite.
+
- Issue #10093: ResourceWarnings are now issued when files and sockets are
deallocated without explicit closing. These warnings are silenced by
default, except in pydebug mode.