Fix SF bug # 1330039, patch # 1331635 from Lars Gustaebel (tarfile maintainer)
Problem: if two files are assigned the same inode
number by the filesystem, the second one will be added
as a hardlink to the first, which means that the
content will be lost.
The patched code checks if the file's st_nlink is
greater 1. So only for files that actually have several
links pointing to them hardlinks will be created, which
is what GNU tar does.
Will backport.
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 2f21971..c86248c 100644
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1150,7 +1150,8 @@
stmd = statres.st_mode
if stat.S_ISREG(stmd):
inode = (statres.st_ino, statres.st_dev)
- if inode in self.inodes and not self.dereference:
+ if not self.dereference and \
+ statres.st_nlink > 1 and inode in self.inodes:
# Is it a hardlink to an already
# archived file?
type = LNKTYPE
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 66409cd..b202ea5 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -372,6 +372,53 @@
if e.errno == errno.ENOENT:
self.fail("hardlink not extracted properly")
+class CreateHardlinkTest(BaseTest):
+ """Test the creation of LNKTYPE (hardlink) members in an archive.
+ In this respect tarfile.py mimics the behaviour of GNU tar: If
+ a file has a st_nlink > 1, it will be added a REGTYPE member
+ only the first time.
+ """
+
+ def setUp(self):
+ self.tar = tarfile.open(tmpname(), "w")
+
+ self.foo = os.path.join(dirname(), "foo")
+ self.bar = os.path.join(dirname(), "bar")
+
+ if os.path.exists(self.foo):
+ os.remove(self.foo)
+ if os.path.exists(self.bar):
+ os.remove(self.bar)
+
+ file(self.foo, "w").write("foo")
+ self.tar.add(self.foo)
+
+ def test_add_twice(self):
+ # If st_nlink == 1 then the same file will be added as
+ # REGTYPE every time.
+ tarinfo = self.tar.gettarinfo(self.foo)
+ self.assertEqual(tarinfo.type, tarfile.REGTYPE,
+ "add file as regular failed")
+
+ def test_add_hardlink(self):
+ # If st_nlink > 1 then the same file will be added as
+ # LNKTYPE.
+ os.link(self.foo, self.bar)
+ tarinfo = self.tar.gettarinfo(self.foo)
+ self.assertEqual(tarinfo.type, tarfile.LNKTYPE,
+ "add file as hardlink failed")
+
+ tarinfo = self.tar.gettarinfo(self.bar)
+ self.assertEqual(tarinfo.type, tarfile.LNKTYPE,
+ "add file as hardlink failed")
+
+ def test_dereference_hardlink(self):
+ self.tar.dereference = True
+ os.link(self.foo, self.bar)
+ tarinfo = self.tar.gettarinfo(self.bar)
+ self.assertEqual(tarinfo.type, tarfile.REGTYPE,
+ "dereferencing hardlink failed")
+
# Gzip TestCases
class ReadTestGzip(ReadTest):
@@ -387,7 +434,6 @@
class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest):
comp = "gz"
-
# Filemode test cases
class FileModeTest(unittest.TestCase):
@@ -440,6 +486,7 @@
if hasattr(os, "link"):
tests.append(ExtractHardlinkTest)
+ tests.append(CreateHardlinkTest)
if gzip:
tests.extend([