Issue #8741: Fixed the TarFile.makelink() method that is responsible
for extracting symbolic and hard link entries as regular files as a
work-around on platforms that do not support filesystem links.
This stopped working reliably after a change in r74571. I also added
a few tests for this functionality.
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index a54b3b8..a563ffb 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -2127,8 +2127,7 @@
raise StreamError("cannot extract (sym)link as file object")
else:
# A (sym)link's file object is its target's file object.
- return self.extractfile(self._getmember(tarinfo.linkname,
- tarinfo))
+ return self.extractfile(self._find_link_target(tarinfo))
else:
# If there's no data associated with the member (directory, chrdev,
# blkdev, etc.), return None instead of a file object.
@@ -2237,27 +2236,21 @@
(platform limitation), we try to make a copy of the referenced file
instead of a link.
"""
- try:
+ if hasattr(os, "symlink") and hasattr(os, "link"):
+ # For systems that support symbolic and hard links.
if tarinfo.issym():
os.symlink(tarinfo.linkname, targetpath)
else:
# See extract().
- os.link(tarinfo._link_target, targetpath)
- except AttributeError:
- if tarinfo.issym():
- linkpath = os.path.dirname(tarinfo.name) + "/" + \
- tarinfo.linkname
- else:
- linkpath = tarinfo.linkname
-
+ if os.path.exists(tarinfo._link_target):
+ os.link(tarinfo._link_target, targetpath)
+ else:
+ self._extract_member(self._find_link_target(tarinfo), targetpath)
+ else:
try:
- self._extract_member(self.getmember(linkpath), targetpath)
- except (EnvironmentError, KeyError), e:
- linkpath = linkpath.replace("/", os.sep)
- try:
- shutil.copy2(linkpath, targetpath)
- except EnvironmentError, e:
- raise IOError("link could not be created")
+ self._extract_member(self._find_link_target(tarinfo), targetpath)
+ except KeyError:
+ raise ExtractError("unable to resolve link inside archive")
def chown(self, tarinfo, targetpath):
"""Set owner of targetpath according to tarinfo.
@@ -2356,21 +2349,28 @@
#--------------------------------------------------------------------------
# Little helper methods:
- def _getmember(self, name, tarinfo=None):
+ def _getmember(self, name, tarinfo=None, normalize=False):
"""Find an archive member by name from bottom to top.
If tarinfo is given, it is used as the starting point.
"""
# Ensure that all members have been loaded.
members = self.getmembers()
- if tarinfo is None:
- end = len(members)
- else:
- end = members.index(tarinfo)
+ # Limit the member search list up to tarinfo.
+ if tarinfo is not None:
+ members = members[:members.index(tarinfo)]
- for i in xrange(end - 1, -1, -1):
- if name == members[i].name:
- return members[i]
+ if normalize:
+ name = os.path.normpath(name)
+
+ for member in reversed(members):
+ if normalize:
+ member_name = os.path.normpath(member.name)
+ else:
+ member_name = member.name
+
+ if name == member_name:
+ return member
def _load(self):
"""Read through the entire archive file and look for readable
@@ -2391,6 +2391,25 @@
if mode is not None and self.mode not in mode:
raise IOError("bad operation for mode %r" % self.mode)
+ def _find_link_target(self, tarinfo):
+ """Find the target member of a symlink or hardlink member in the
+ archive.
+ """
+ if tarinfo.issym():
+ # Always search the entire archive.
+ linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
+ limit = None
+ else:
+ # Search the archive before the link, because a hard link is
+ # just a reference to an already archived file.
+ linkname = tarinfo.linkname
+ limit = tarinfo
+
+ member = self._getmember(linkname, tarinfo=limit, normalize=True)
+ if member is None:
+ raise KeyError("linkname %r not found" % linkname)
+ return member
+
def __iter__(self):
"""Provide an iterator object.
"""
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 0d58cda..cda5262 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -134,6 +134,26 @@
"read() after readline() failed")
fobj.close()
+ # Test if symbolic and hard links are resolved by extractfile(). The
+ # test link members each point to a regular member whose data is
+ # supposed to be exported.
+ def _test_fileobj_link(self, lnktype, regtype):
+ a = self.tar.extractfile(lnktype)
+ b = self.tar.extractfile(regtype)
+ self.assertEqual(a.name, b.name)
+
+ def test_fileobj_link1(self):
+ self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
+
+ def test_fileobj_link2(self):
+ self._test_fileobj_link("./ustar/linktest2/lnktype", "ustar/linktest1/regtype")
+
+ def test_fileobj_symlink1(self):
+ self._test_fileobj_link("ustar/symtype", "ustar/regtype")
+
+ def test_fileobj_symlink2(self):
+ self._test_fileobj_link("./ustar/linktest2/symtype", "ustar/linktest1/regtype")
+
class CommonReadTest(ReadTest):
@@ -1376,6 +1396,29 @@
fobj.close()
+class LinkEmulationTest(ReadTest):
+
+ # Test for issue #8741 regression. On platforms that do not support
+ # symbolic or hard links tarfile tries to extract these types of members as
+ # the regular files they point to.
+ def _test_link_extraction(self, name):
+ self.tar.extract(name, TEMPDIR)
+ data = open(os.path.join(TEMPDIR, name), "rb").read()
+ self.assertEqual(md5sum(data), md5_regtype)
+
+ def test_hardlink_extraction1(self):
+ self._test_link_extraction("ustar/lnktype")
+
+ def test_hardlink_extraction2(self):
+ self._test_link_extraction("./ustar/linktest2/lnktype")
+
+ def test_symlink_extraction1(self):
+ self._test_link_extraction("ustar/symtype")
+
+ def test_symlink_extraction2(self):
+ self._test_link_extraction("./ustar/linktest2/symtype")
+
+
class GzipMiscReadTest(MiscReadTest):
tarname = gzipname
mode = "r:gz"
@@ -1460,6 +1503,8 @@
if hasattr(os, "link"):
tests.append(HardlinkTest)
+ else:
+ tests.append(LinkEmulationTest)
fobj = open(tarname, "rb")
data = fobj.read()
diff --git a/Lib/test/testtar.tar b/Lib/test/testtar.tar
index b5bb46b..bac0e26 100644
--- a/Lib/test/testtar.tar
+++ b/Lib/test/testtar.tar
Binary files differ