blob: 4ef20db097163629848281566caf8f3349cc194f [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001import sys
2import os
Lars Gustäbelb506dc32007-08-07 18:36:16 +00003import io
Christian Heimesc64a1a62019-09-25 16:30:20 +02004from hashlib import sha256
Eric V. Smith7a803892015-04-15 10:27:58 -04005from contextlib import contextmanager
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02006from random import Random
Serhiy Storchakac45cd162017-03-08 10:32:44 +02007import pathlib
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00008
9import unittest
Eric V. Smith7a803892015-04-15 10:27:58 -040010import unittest.mock
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000011import tarfile
12
Berker Peksagce643912015-05-06 06:33:17 +030013from test import support
Hai Shia7f5d932020-08-04 00:41:24 +080014from test.support import os_helper
Hai Shi66abe982020-04-29 09:11:29 +080015from test.support import script_helper
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000016
17# Check for our compression modules.
18try:
19 import gzip
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000021 gzip = None
22try:
23 import bz2
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000025 bz2 = None
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +010026try:
27 import lzma
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +010029 lzma = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000030
Christian Heimesc64a1a62019-09-25 16:30:20 +020031def sha256sum(data):
32 return sha256(data).hexdigest()
Guido van Rossumd8faa362007-04-27 19:54:29 +000033
Hai Shia7f5d932020-08-04 00:41:24 +080034TEMPDIR = os.path.abspath(os_helper.TESTFN) + "-tardir"
Serhiy Storchakad27b4552013-11-24 01:53:29 +020035tarextdir = TEMPDIR + '-extract-test'
Antoine Pitrou941ee882009-11-11 20:59:38 +000036tarname = support.findfile("testtar.tar")
Guido van Rossumd8faa362007-04-27 19:54:29 +000037gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
38bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +010039xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
Guido van Rossumd8faa362007-04-27 19:54:29 +000040tmpname = os.path.join(TEMPDIR, "tmp.tar")
Serhiy Storchakad27b4552013-11-24 01:53:29 +020041dotlessname = os.path.join(TEMPDIR, "testtar")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000042
Christian Heimesc64a1a62019-09-25 16:30:20 +020043sha256_regtype = (
44 "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
45)
46sha256_sparse = (
47 "4f05a776071146756345ceee937b33fc5644f5a96b9780d1c7d6a32cdf164d7b"
48)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50
Serhiy Storchaka8b562922013-06-17 15:38:50 +030051class TarTest:
Guido van Rossumd8faa362007-04-27 19:54:29 +000052 tarname = tarname
Serhiy Storchaka8b562922013-06-17 15:38:50 +030053 suffix = ''
54 open = io.FileIO
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020055 taropen = tarfile.TarFile.taropen
Serhiy Storchaka8b562922013-06-17 15:38:50 +030056
57 @property
58 def mode(self):
59 return self.prefix + self.suffix
60
Hai Shia3ec3ad2020-05-19 06:02:57 +080061@support.requires_gzip()
Serhiy Storchaka8b562922013-06-17 15:38:50 +030062class GzipTest:
63 tarname = gzipname
64 suffix = 'gz'
65 open = gzip.GzipFile if gzip else None
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020066 taropen = tarfile.TarFile.gzopen
Serhiy Storchaka8b562922013-06-17 15:38:50 +030067
Hai Shia3ec3ad2020-05-19 06:02:57 +080068@support.requires_bz2()
Serhiy Storchaka8b562922013-06-17 15:38:50 +030069class Bz2Test:
70 tarname = bz2name
71 suffix = 'bz2'
72 open = bz2.BZ2File if bz2 else None
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020073 taropen = tarfile.TarFile.bz2open
Serhiy Storchaka8b562922013-06-17 15:38:50 +030074
Hai Shia3ec3ad2020-05-19 06:02:57 +080075@support.requires_lzma()
Serhiy Storchaka8b562922013-06-17 15:38:50 +030076class LzmaTest:
77 tarname = xzname
78 suffix = 'xz'
79 open = lzma.LZMAFile if lzma else None
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020080 taropen = tarfile.TarFile.xzopen
Serhiy Storchaka8b562922013-06-17 15:38:50 +030081
82
83class ReadTest(TarTest):
84
85 prefix = "r:"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
87 def setUp(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +030088 self.tar = tarfile.open(self.tarname, mode=self.mode,
89 encoding="iso8859-1")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000090
91 def tearDown(self):
92 self.tar.close()
93
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000094
Serhiy Storchaka8b562922013-06-17 15:38:50 +030095class UstarReadTest(ReadTest, unittest.TestCase):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Guido van Rossumd8faa362007-04-27 19:54:29 +000097 def test_fileobj_regular_file(self):
98 tarinfo = self.tar.getmember("ustar/regtype")
Lars Gustäbel7a919e92012-05-05 18:15:03 +020099 with self.tar.extractfile(tarinfo) as fobj:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000100 data = fobj.read()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300101 self.assertEqual(len(data), tarinfo.size,
102 "regular file extraction failed")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200103 self.assertEqual(sha256sum(data), sha256_regtype,
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000104 "regular file extraction failed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
Guido van Rossumd8faa362007-04-27 19:54:29 +0000106 def test_fileobj_readlines(self):
107 self.tar.extract("ustar/regtype", TEMPDIR)
108 tarinfo = self.tar.getmember("ustar/regtype")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000109 with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
110 lines1 = fobj1.readlines()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000111
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200112 with self.tar.extractfile(tarinfo) as fobj:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000113 fobj2 = io.TextIOWrapper(fobj)
114 lines2 = fobj2.readlines()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300115 self.assertEqual(lines1, lines2,
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000116 "fileobj.readlines() failed")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300117 self.assertEqual(len(lines2), 114,
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000118 "fileobj.readlines() failed")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300119 self.assertEqual(lines2[83],
120 "I will gladly admit that Python is not the fastest "
121 "running scripting language.\n",
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000122 "fileobj.readlines() failed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000123
Guido van Rossumd8faa362007-04-27 19:54:29 +0000124 def test_fileobj_iter(self):
125 self.tar.extract("ustar/regtype", TEMPDIR)
126 tarinfo = self.tar.getmember("ustar/regtype")
Victor Stinner4e86d5b2011-05-04 13:55:36 +0200127 with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000128 lines1 = fobj1.readlines()
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200129 with self.tar.extractfile(tarinfo) as fobj2:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000130 lines2 = list(io.TextIOWrapper(fobj2))
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300131 self.assertEqual(lines1, lines2,
132 "fileobj.__iter__() failed")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000133
Guido van Rossumd8faa362007-04-27 19:54:29 +0000134 def test_fileobj_seek(self):
135 self.tar.extract("ustar/regtype", TEMPDIR)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000136 with open(os.path.join(TEMPDIR, "ustar/regtype"), "rb") as fobj:
137 data = fobj.read()
Neal Norwitzf3396542005-10-28 05:52:22 +0000138
Guido van Rossumd8faa362007-04-27 19:54:29 +0000139 tarinfo = self.tar.getmember("ustar/regtype")
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +0200140 with self.tar.extractfile(tarinfo) as fobj:
141 text = fobj.read()
142 fobj.seek(0)
143 self.assertEqual(0, fobj.tell(),
144 "seek() to file's start failed")
145 fobj.seek(2048, 0)
146 self.assertEqual(2048, fobj.tell(),
147 "seek() to absolute position failed")
148 fobj.seek(-1024, 1)
149 self.assertEqual(1024, fobj.tell(),
150 "seek() to negative relative position failed")
151 fobj.seek(1024, 1)
152 self.assertEqual(2048, fobj.tell(),
153 "seek() to positive relative position failed")
154 s = fobj.read(10)
155 self.assertEqual(s, data[2048:2058],
156 "read() after seek failed")
157 fobj.seek(0, 2)
158 self.assertEqual(tarinfo.size, fobj.tell(),
159 "seek() to file's end failed")
160 self.assertEqual(fobj.read(), b"",
161 "read() at file's end did not return empty string")
162 fobj.seek(-tarinfo.size, 2)
163 self.assertEqual(0, fobj.tell(),
164 "relative seek() to file's end failed")
165 fobj.seek(512)
166 s1 = fobj.readlines()
167 fobj.seek(512)
168 s2 = fobj.readlines()
169 self.assertEqual(s1, s2,
170 "readlines() after seek failed")
171 fobj.seek(0)
172 self.assertEqual(len(fobj.readline()), fobj.tell(),
173 "tell() after readline() failed")
174 fobj.seek(512)
175 self.assertEqual(len(fobj.readline()) + 512, fobj.tell(),
176 "tell() after seek() and readline() failed")
177 fobj.seek(0)
178 line = fobj.readline()
179 self.assertEqual(fobj.read(), data[len(line):],
180 "read() after readline() failed")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200182 def test_fileobj_text(self):
183 with self.tar.extractfile("ustar/regtype") as fobj:
184 fobj = io.TextIOWrapper(fobj)
185 data = fobj.read().encode("iso8859-1")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200186 self.assertEqual(sha256sum(data), sha256_regtype)
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200187 try:
188 fobj.seek(100)
189 except AttributeError:
190 # Issue #13815: seek() complained about a missing
191 # flush() method.
192 self.fail("seeking failed in text mode")
193
Lars Gustäbel1b512722010-06-03 12:45:16 +0000194 # Test if symbolic and hard links are resolved by extractfile(). The
195 # test link members each point to a regular member whose data is
196 # supposed to be exported.
197 def _test_fileobj_link(self, lnktype, regtype):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300198 with self.tar.extractfile(lnktype) as a, \
199 self.tar.extractfile(regtype) as b:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000200 self.assertEqual(a.name, b.name)
Lars Gustäbel1b512722010-06-03 12:45:16 +0000201
202 def test_fileobj_link1(self):
203 self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
204
205 def test_fileobj_link2(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300206 self._test_fileobj_link("./ustar/linktest2/lnktype",
207 "ustar/linktest1/regtype")
Lars Gustäbel1b512722010-06-03 12:45:16 +0000208
209 def test_fileobj_symlink1(self):
210 self._test_fileobj_link("ustar/symtype", "ustar/regtype")
211
212 def test_fileobj_symlink2(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300213 self._test_fileobj_link("./ustar/linktest2/symtype",
214 "ustar/linktest1/regtype")
Lars Gustäbel1b512722010-06-03 12:45:16 +0000215
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +0200216 def test_issue14160(self):
217 self._test_fileobj_link("symtype2", "ustar/regtype")
218
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300219class GzipUstarReadTest(GzipTest, UstarReadTest):
220 pass
221
222class Bz2UstarReadTest(Bz2Test, UstarReadTest):
223 pass
224
225class LzmaUstarReadTest(LzmaTest, UstarReadTest):
226 pass
227
Guido van Rossumd8faa362007-04-27 19:54:29 +0000228
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200229class ListTest(ReadTest, unittest.TestCase):
230
231 # Override setUp to use default encoding (UTF-8)
232 def setUp(self):
233 self.tar = tarfile.open(self.tarname, mode=self.mode)
234
235 def test_list(self):
236 tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
237 with support.swap_attr(sys, 'stdout', tio):
238 self.tar.list(verbose=False)
239 out = tio.detach().getvalue()
240 self.assertIn(b'ustar/conttype', out)
241 self.assertIn(b'ustar/regtype', out)
242 self.assertIn(b'ustar/lnktype', out)
243 self.assertIn(b'ustar' + (b'/12345' * 40) + b'67/longname', out)
244 self.assertIn(b'./ustar/linktest2/symtype', out)
245 self.assertIn(b'./ustar/linktest2/lnktype', out)
246 # Make sure it puts trailing slash for directory
247 self.assertIn(b'ustar/dirtype/', out)
248 self.assertIn(b'ustar/dirtype-with-size/', out)
249 # Make sure it is able to print unencodable characters
Serhiy Storchaka162c4772014-02-19 18:44:12 +0200250 def conv(b):
251 s = b.decode(self.tar.encoding, 'surrogateescape')
252 return s.encode('ascii', 'backslashreplace')
253 self.assertIn(conv(b'ustar/umlauts-\xc4\xd6\xdc\xe4\xf6\xfc\xdf'), out)
254 self.assertIn(conv(b'misc/regtype-hpux-signed-chksum-'
255 b'\xc4\xd6\xdc\xe4\xf6\xfc\xdf'), out)
256 self.assertIn(conv(b'misc/regtype-old-v7-signed-chksum-'
257 b'\xc4\xd6\xdc\xe4\xf6\xfc\xdf'), out)
258 self.assertIn(conv(b'pax/bad-pax-\xe4\xf6\xfc'), out)
259 self.assertIn(conv(b'pax/hdrcharset-\xe4\xf6\xfc'), out)
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200260 # Make sure it prints files separated by one newline without any
261 # 'ls -l'-like accessories if verbose flag is not being used
262 # ...
263 # ustar/conttype
264 # ustar/regtype
265 # ...
266 self.assertRegex(out, br'ustar/conttype ?\r?\n'
267 br'ustar/regtype ?\r?\n')
268 # Make sure it does not print the source of link without verbose flag
269 self.assertNotIn(b'link to', out)
270 self.assertNotIn(b'->', out)
271
272 def test_list_verbose(self):
273 tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
274 with support.swap_attr(sys, 'stdout', tio):
275 self.tar.list(verbose=True)
276 out = tio.detach().getvalue()
277 # Make sure it prints files separated by one newline with 'ls -l'-like
278 # accessories if verbose flag is being used
279 # ...
280 # ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/conttype
281 # ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype
282 # ...
Serhiy Storchaka255493c2014-02-05 20:54:43 +0200283 self.assertRegex(out, (br'\?rw-r--r-- tarfile/tarfile\s+7011 '
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200284 br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
285 br'ustar/\w+type ?\r?\n') * 2)
286 # Make sure it prints the source of link with verbose flag
287 self.assertIn(b'ustar/symtype -> regtype', out)
288 self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out)
289 self.assertIn(b'./ustar/linktest2/lnktype link to '
290 b'./ustar/linktest1/regtype', out)
291 self.assertIn(b'gnu' + (b'/123' * 125) + b'/longlink link to gnu' +
292 (b'/123' * 125) + b'/longname', out)
293 self.assertIn(b'pax' + (b'/123' * 125) + b'/longlink link to pax' +
294 (b'/123' * 125) + b'/longname', out)
295
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +0300296 def test_list_members(self):
297 tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
298 def members(tar):
299 for tarinfo in tar.getmembers():
300 if 'reg' in tarinfo.name:
301 yield tarinfo
302 with support.swap_attr(sys, 'stdout', tio):
303 self.tar.list(verbose=False, members=members(self.tar))
304 out = tio.detach().getvalue()
305 self.assertIn(b'ustar/regtype', out)
306 self.assertNotIn(b'ustar/conttype', out)
307
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200308
309class GzipListTest(GzipTest, ListTest):
310 pass
311
312
313class Bz2ListTest(Bz2Test, ListTest):
314 pass
315
316
317class LzmaListTest(LzmaTest, ListTest):
318 pass
319
320
Lars Gustäbel9520a432009-11-22 18:48:49 +0000321class CommonReadTest(ReadTest):
322
William Woodruffdd754ca2020-01-22 21:24:16 -0500323 def test_is_tarfile_erroneous(self):
324 with open(tmpname, "wb"):
325 pass
326
327 # is_tarfile works on filenames
328 self.assertFalse(tarfile.is_tarfile(tmpname))
329
330 # is_tarfile works on path-like objects
331 self.assertFalse(tarfile.is_tarfile(pathlib.Path(tmpname)))
332
333 # is_tarfile works on file objects
334 with open(tmpname, "rb") as fobj:
335 self.assertFalse(tarfile.is_tarfile(fobj))
336
337 # is_tarfile works on file-like objects
338 self.assertFalse(tarfile.is_tarfile(io.BytesIO(b"invalid")))
339
340 def test_is_tarfile_valid(self):
341 # is_tarfile works on filenames
342 self.assertTrue(tarfile.is_tarfile(self.tarname))
343
344 # is_tarfile works on path-like objects
345 self.assertTrue(tarfile.is_tarfile(pathlib.Path(self.tarname)))
346
347 # is_tarfile works on file objects
348 with open(self.tarname, "rb") as fobj:
349 self.assertTrue(tarfile.is_tarfile(fobj))
350
351 # is_tarfile works on file-like objects
352 with open(self.tarname, "rb") as fobj:
353 self.assertTrue(tarfile.is_tarfile(io.BytesIO(fobj.read())))
354
Lars Gustäbel9520a432009-11-22 18:48:49 +0000355 def test_empty_tarfile(self):
356 # Test for issue6123: Allow opening empty archives.
357 # This test checks if tarfile.open() is able to open an empty tar
358 # archive successfully. Note that an empty tar archive is not the
359 # same as an empty file!
Antoine Pitrou95f55602010-09-23 18:36:46 +0000360 with tarfile.open(tmpname, self.mode.replace("r", "w")):
361 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000362 try:
363 tar = tarfile.open(tmpname, self.mode)
364 tar.getnames()
365 except tarfile.ReadError:
366 self.fail("tarfile.open() failed on empty archive")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000367 else:
368 self.assertListEqual(tar.getmembers(), [])
369 finally:
370 tar.close()
Lars Gustäbel9520a432009-11-22 18:48:49 +0000371
Serhiy Storchakaf22fe0f2014-01-13 19:08:00 +0200372 def test_non_existent_tarfile(self):
373 # Test for issue11513: prevent non-existent gzipped tarfiles raising
374 # multiple exceptions.
375 with self.assertRaisesRegex(FileNotFoundError, "xxx"):
376 tarfile.open("xxx", self.mode)
377
Lars Gustäbel9520a432009-11-22 18:48:49 +0000378 def test_null_tarfile(self):
379 # Test for issue6123: Allow opening empty archives.
380 # This test guarantees that tarfile.open() does not treat an empty
381 # file as an empty tar archive.
Antoine Pitrou95f55602010-09-23 18:36:46 +0000382 with open(tmpname, "wb"):
383 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000384 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, self.mode)
385 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname)
386
387 def test_ignore_zeros(self):
388 # Test TarFile's ignore_zeros option.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +0200389 # generate 512 pseudorandom bytes
Victor Stinner87502dd2020-04-17 22:54:38 +0200390 data = Random(0).randbytes(512)
Lars Gustäbel9520a432009-11-22 18:48:49 +0000391 for char in (b'\0', b'a'):
392 # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
393 # are ignored correctly.
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300394 with self.open(tmpname, "w") as fobj:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000395 fobj.write(char * 1024)
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +0200396 tarinfo = tarfile.TarInfo("foo")
397 tarinfo.size = len(data)
398 fobj.write(tarinfo.tobuf())
399 fobj.write(data)
Lars Gustäbel9520a432009-11-22 18:48:49 +0000400
401 tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000402 try:
403 self.assertListEqual(tar.getnames(), ["foo"],
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300404 "ignore_zeros=True should have skipped the %r-blocks" %
405 char)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000406 finally:
407 tar.close()
Lars Gustäbel9520a432009-11-22 18:48:49 +0000408
Lars Gustäbel03572682015-07-06 09:27:24 +0200409 def test_premature_end_of_archive(self):
410 for size in (512, 600, 1024, 1200):
411 with tarfile.open(tmpname, "w:") as tar:
412 t = tarfile.TarInfo("foo")
413 t.size = 1024
414 tar.addfile(t, io.BytesIO(b"a" * 1024))
415
416 with open(tmpname, "r+b") as fobj:
417 fobj.truncate(size)
418
419 with tarfile.open(tmpname) as tar:
420 with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
421 for t in tar:
422 pass
423
424 with tarfile.open(tmpname) as tar:
425 t = tar.next()
426
427 with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
428 tar.extract(t, TEMPDIR)
429
430 with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
431 tar.extractfile(t).read()
Lars Gustäbel9520a432009-11-22 18:48:49 +0000432
Rishi5a8d1212020-07-15 13:51:00 +0200433 def test_length_zero_header(self):
434 # bpo-39017 (CVE-2019-20907): reading a zero-length header should fail
435 # with an exception
436 with self.assertRaisesRegex(tarfile.ReadError, "file could not be opened successfully"):
437 with tarfile.open(support.findfile('recursion.tar')) as tar:
438 pass
439
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300440class MiscReadTestBase(CommonReadTest):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300441 def requires_name_attribute(self):
442 pass
443
Thomas Woutersed03b412007-08-28 21:37:11 +0000444 def test_no_name_argument(self):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300445 self.requires_name_attribute()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000446 with open(self.tarname, "rb") as fobj:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300447 self.assertIsInstance(fobj.name, str)
448 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
449 self.assertIsInstance(tar.name, str)
450 self.assertEqual(tar.name, os.path.abspath(fobj.name))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451
Thomas Woutersed03b412007-08-28 21:37:11 +0000452 def test_no_name_attribute(self):
Antoine Pitrou95f55602010-09-23 18:36:46 +0000453 with open(self.tarname, "rb") as fobj:
454 data = fobj.read()
Thomas Woutersed03b412007-08-28 21:37:11 +0000455 fobj = io.BytesIO(data)
456 self.assertRaises(AttributeError, getattr, fobj, "name")
457 tar = tarfile.open(fileobj=fobj, mode=self.mode)
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300458 self.assertIsNone(tar.name)
Thomas Woutersed03b412007-08-28 21:37:11 +0000459
460 def test_empty_name_attribute(self):
Antoine Pitrou95f55602010-09-23 18:36:46 +0000461 with open(self.tarname, "rb") as fobj:
462 data = fobj.read()
Thomas Woutersed03b412007-08-28 21:37:11 +0000463 fobj = io.BytesIO(data)
464 fobj.name = ""
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000465 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300466 self.assertIsNone(tar.name)
467
468 def test_int_name_attribute(self):
469 # Issue 21044: tarfile.open() should handle fileobj with an integer
470 # 'name' attribute.
471 fd = os.open(self.tarname, os.O_RDONLY)
472 with open(fd, 'rb') as fobj:
473 self.assertIsInstance(fobj.name, int)
474 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
475 self.assertIsNone(tar.name)
476
477 def test_bytes_name_attribute(self):
478 self.requires_name_attribute()
479 tarname = os.fsencode(self.tarname)
480 with open(tarname, 'rb') as fobj:
481 self.assertIsInstance(fobj.name, bytes)
482 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
483 self.assertIsInstance(tar.name, bytes)
484 self.assertEqual(tar.name, os.path.abspath(fobj.name))
Thomas Woutersed03b412007-08-28 21:37:11 +0000485
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200486 def test_pathlike_name(self):
487 tarname = pathlib.Path(self.tarname)
488 with tarfile.open(tarname, mode=self.mode) as tar:
489 self.assertIsInstance(tar.name, str)
490 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
491 with self.taropen(tarname) as tar:
492 self.assertIsInstance(tar.name, str)
493 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
494 with tarfile.TarFile.open(tarname, mode=self.mode) as tar:
495 self.assertIsInstance(tar.name, str)
496 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
497 if self.suffix == '':
498 with tarfile.TarFile(tarname, mode='r') as tar:
499 self.assertIsInstance(tar.name, str)
500 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
501
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +0200502 def test_illegal_mode_arg(self):
503 with open(tmpname, 'wb'):
504 pass
505 with self.assertRaisesRegex(ValueError, 'mode must be '):
506 tar = self.taropen(tmpname, 'q')
507 with self.assertRaisesRegex(ValueError, 'mode must be '):
508 tar = self.taropen(tmpname, 'rw')
509 with self.assertRaisesRegex(ValueError, 'mode must be '):
510 tar = self.taropen(tmpname, '')
511
Christian Heimesd8654cf2007-12-02 15:22:16 +0000512 def test_fileobj_with_offset(self):
513 # Skip the first member and store values from the second member
514 # of the testtar.
515 tar = tarfile.open(self.tarname, mode=self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000516 try:
517 tar.next()
518 t = tar.next()
519 name = t.name
520 offset = t.offset
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200521 with tar.extractfile(t) as f:
522 data = f.read()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000523 finally:
524 tar.close()
Christian Heimesd8654cf2007-12-02 15:22:16 +0000525
526 # Open the testtar and seek to the offset of the second member.
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300527 with self.open(self.tarname) as fobj:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000528 fobj.seek(offset)
Christian Heimesd8654cf2007-12-02 15:22:16 +0000529
Antoine Pitrou95f55602010-09-23 18:36:46 +0000530 # Test if the tarfile starts with the second member.
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +0200531 with tar.open(self.tarname, mode="r:", fileobj=fobj) as tar:
532 t = tar.next()
533 self.assertEqual(t.name, name)
534 # Read to the end of fileobj and test if seeking back to the
535 # beginning works.
536 tar.getmembers()
537 self.assertEqual(tar.extractfile(t).read(), data,
538 "seek back did not work")
Christian Heimesd8654cf2007-12-02 15:22:16 +0000539
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540 def test_fail_comp(self):
541 # For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542 self.assertRaises(tarfile.ReadError, tarfile.open, tarname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000543 with open(tarname, "rb") as fobj:
544 self.assertRaises(tarfile.ReadError, tarfile.open,
545 fileobj=fobj, mode=self.mode)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000546
547 def test_v7_dirtype(self):
548 # Test old style dirtype member (bug #1336623):
549 # Old V7 tars create directory members using an AREGTYPE
550 # header with a "/" appended to the filename field.
551 tarinfo = self.tar.getmember("misc/dirtype-old-v7")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300552 self.assertEqual(tarinfo.type, tarfile.DIRTYPE,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 "v7 dirtype failed")
554
Christian Heimes126d29a2008-02-11 22:57:17 +0000555 def test_xstar_type(self):
556 # The xstar format stores extra atime and ctime fields inside the
557 # space reserved for the prefix field. The prefix field must be
558 # ignored in this case, otherwise it will mess up the name.
559 try:
560 self.tar.getmember("misc/regtype-xstar")
561 except KeyError:
562 self.fail("failed to find misc/regtype-xstar (mangled prefix?)")
563
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564 def test_check_members(self):
565 for tarinfo in self.tar:
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300566 self.assertEqual(int(tarinfo.mtime), 0o7606136617,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567 "wrong mtime for %s" % tarinfo.name)
568 if not tarinfo.name.startswith("ustar/"):
569 continue
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300570 self.assertEqual(tarinfo.uname, "tarfile",
Guido van Rossumd8faa362007-04-27 19:54:29 +0000571 "wrong uname for %s" % tarinfo.name)
572
573 def test_find_members(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300574 self.assertEqual(self.tar.getmembers()[-1].name, "misc/eof",
Guido van Rossumd8faa362007-04-27 19:54:29 +0000575 "could not find all members")
576
Brian Curtin74e45612010-07-09 15:58:59 +0000577 @unittest.skipUnless(hasattr(os, "link"),
578 "Missing hardlink implementation")
Hai Shia7f5d932020-08-04 00:41:24 +0800579 @os_helper.skip_unless_symlink
Guido van Rossumd8faa362007-04-27 19:54:29 +0000580 def test_extract_hardlink(self):
581 # Test hardlink extraction (e.g. bug #857297).
Serhiy Storchaka88339c42012-12-30 20:16:30 +0200582 with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000583 tar.extract("ustar/regtype", TEMPDIR)
Hai Shia7f5d932020-08-04 00:41:24 +0800584 self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/regtype"))
Neal Norwitzf3396542005-10-28 05:52:22 +0000585
Serhiy Storchaka88339c42012-12-30 20:16:30 +0200586 tar.extract("ustar/lnktype", TEMPDIR)
Hai Shia7f5d932020-08-04 00:41:24 +0800587 self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/lnktype"))
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000588 with open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb") as f:
589 data = f.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +0200590 self.assertEqual(sha256sum(data), sha256_regtype)
Neal Norwitzf3396542005-10-28 05:52:22 +0000591
Serhiy Storchaka88339c42012-12-30 20:16:30 +0200592 tar.extract("ustar/symtype", TEMPDIR)
Hai Shia7f5d932020-08-04 00:41:24 +0800593 self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/symtype"))
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000594 with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f:
595 data = f.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +0200596 self.assertEqual(sha256sum(data), sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597
Christian Heimesfaf2f632008-01-06 16:59:19 +0000598 def test_extractall(self):
599 # Test if extractall() correctly restores directory permissions
600 # and times (see issue1735).
Christian Heimesfaf2f632008-01-06 16:59:19 +0000601 tar = tarfile.open(tarname, encoding="iso8859-1")
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000602 DIR = os.path.join(TEMPDIR, "extractall")
603 os.mkdir(DIR)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000604 try:
605 directories = [t for t in tar if t.isdir()]
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000606 tar.extractall(DIR, directories)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000607 for tarinfo in directories:
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000608 path = os.path.join(DIR, tarinfo.name)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000609 if sys.platform != "win32":
610 # Win32 has no support for fine grained permissions.
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300611 self.assertEqual(tarinfo.mode & 0o777,
612 os.stat(path).st_mode & 0o777)
Victor Stinner26bfb5a2010-10-29 10:59:08 +0000613 def format_mtime(mtime):
614 if isinstance(mtime, float):
615 return "{} ({})".format(mtime, mtime.hex())
616 else:
617 return "{!r} (int)".format(mtime)
Victor Stinner14d8fe72010-10-29 11:02:06 +0000618 file_mtime = os.path.getmtime(path)
Victor Stinner26bfb5a2010-10-29 10:59:08 +0000619 errmsg = "tar mtime {0} != file time {1} of path {2!a}".format(
620 format_mtime(tarinfo.mtime),
621 format_mtime(file_mtime),
622 path)
623 self.assertEqual(tarinfo.mtime, file_mtime, errmsg)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000624 finally:
625 tar.close()
Hai Shia7f5d932020-08-04 00:41:24 +0800626 os_helper.rmtree(DIR)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000627
Martin v. Löwis16f344d2010-11-01 21:39:13 +0000628 def test_extract_directory(self):
629 dirtype = "ustar/dirtype"
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000630 DIR = os.path.join(TEMPDIR, "extractdir")
631 os.mkdir(DIR)
632 try:
633 with tarfile.open(tarname, encoding="iso8859-1") as tar:
634 tarinfo = tar.getmember(dirtype)
635 tar.extract(tarinfo, path=DIR)
636 extracted = os.path.join(DIR, dirtype)
637 self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime)
638 if sys.platform != "win32":
639 self.assertEqual(os.stat(extracted).st_mode & 0o777, 0o755)
640 finally:
Hai Shia7f5d932020-08-04 00:41:24 +0800641 os_helper.rmtree(DIR)
Martin v. Löwis16f344d2010-11-01 21:39:13 +0000642
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200643 def test_extractall_pathlike_name(self):
644 DIR = pathlib.Path(TEMPDIR) / "extractall"
Hai Shia7f5d932020-08-04 00:41:24 +0800645 with os_helper.temp_dir(DIR), \
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200646 tarfile.open(tarname, encoding="iso8859-1") as tar:
647 directories = [t for t in tar if t.isdir()]
648 tar.extractall(DIR, directories)
649 for tarinfo in directories:
650 path = DIR / tarinfo.name
651 self.assertEqual(os.path.getmtime(path), tarinfo.mtime)
652
653 def test_extract_pathlike_name(self):
654 dirtype = "ustar/dirtype"
655 DIR = pathlib.Path(TEMPDIR) / "extractall"
Hai Shia7f5d932020-08-04 00:41:24 +0800656 with os_helper.temp_dir(DIR), \
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200657 tarfile.open(tarname, encoding="iso8859-1") as tar:
658 tarinfo = tar.getmember(dirtype)
659 tar.extract(tarinfo, path=DIR)
660 extracted = DIR / dirtype
661 self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime)
662
Lars Gustäbelb7f09232009-11-23 15:48:33 +0000663 def test_init_close_fobj(self):
664 # Issue #7341: Close the internal file object in the TarFile
665 # constructor in case of an error. For the test we rely on
666 # the fact that opening an empty file raises a ReadError.
667 empty = os.path.join(TEMPDIR, "empty")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000668 with open(empty, "wb") as fobj:
669 fobj.write(b"")
Lars Gustäbelb7f09232009-11-23 15:48:33 +0000670
671 try:
672 tar = object.__new__(tarfile.TarFile)
673 try:
674 tar.__init__(empty)
675 except tarfile.ReadError:
676 self.assertTrue(tar.fileobj.closed)
677 else:
678 self.fail("ReadError not raised")
679 finally:
Hai Shia7f5d932020-08-04 00:41:24 +0800680 os_helper.unlink(empty)
Lars Gustäbelb7f09232009-11-23 15:48:33 +0000681
Serhiy Storchaka263fab92013-05-09 14:22:26 +0300682 def test_parallel_iteration(self):
683 # Issue #16601: Restarting iteration over tarfile continued
684 # from where it left off.
685 with tarfile.open(self.tarname) as tar:
686 for m1, m2 in zip(tar, tar):
687 self.assertEqual(m1.offset, m2.offset)
688 self.assertEqual(m1.get_info(), m2.get_info())
689
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300690class MiscReadTest(MiscReadTestBase, unittest.TestCase):
691 test_fail_comp = None
Guido van Rossumd8faa362007-04-27 19:54:29 +0000692
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300693class GzipMiscReadTest(GzipTest, MiscReadTestBase, unittest.TestCase):
Serhiy Storchakaf22fe0f2014-01-13 19:08:00 +0200694 pass
Guido van Rossumd8faa362007-04-27 19:54:29 +0000695
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300696class Bz2MiscReadTest(Bz2Test, MiscReadTestBase, unittest.TestCase):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300697 def requires_name_attribute(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300698 self.skipTest("BZ2File have no name attribute")
699
700class LzmaMiscReadTest(LzmaTest, MiscReadTestBase, unittest.TestCase):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300701 def requires_name_attribute(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300702 self.skipTest("LZMAFile have no name attribute")
703
704
705class StreamReadTest(CommonReadTest, unittest.TestCase):
706
707 prefix="r|"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000708
Lars Gustäbeldd071042011-02-23 11:42:22 +0000709 def test_read_through(self):
710 # Issue #11224: A poorly designed _FileInFile.read() method
711 # caused seeking errors with stream tar files.
712 for tarinfo in self.tar:
713 if not tarinfo.isreg():
714 continue
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200715 with self.tar.extractfile(tarinfo) as fobj:
716 while True:
717 try:
718 buf = fobj.read(512)
719 except tarfile.StreamError:
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300720 self.fail("simple read-through using "
721 "TarFile.extractfile() failed")
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200722 if not buf:
723 break
Lars Gustäbeldd071042011-02-23 11:42:22 +0000724
Guido van Rossumd8faa362007-04-27 19:54:29 +0000725 def test_fileobj_regular_file(self):
726 tarinfo = self.tar.next() # get "regtype" (can't use getmember)
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200727 with self.tar.extractfile(tarinfo) as fobj:
728 data = fobj.read()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300729 self.assertEqual(len(data), tarinfo.size,
730 "regular file extraction failed")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200731 self.assertEqual(sha256sum(data), sha256_regtype,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732 "regular file extraction failed")
733
734 def test_provoke_stream_error(self):
735 tarinfos = self.tar.getmembers()
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200736 with self.tar.extractfile(tarinfos[0]) as f: # read the first member
737 self.assertRaises(tarfile.StreamError, f.read)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000738
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739 def test_compare_members(self):
740 tar1 = tarfile.open(tarname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000741 try:
742 tar2 = self.tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000743
Antoine Pitrou95f55602010-09-23 18:36:46 +0000744 while True:
745 t1 = tar1.next()
746 t2 = tar2.next()
747 if t1 is None:
748 break
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300749 self.assertIsNotNone(t2, "stream.next() failed.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000750
Antoine Pitrou95f55602010-09-23 18:36:46 +0000751 if t2.islnk() or t2.issym():
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300752 with self.assertRaises(tarfile.StreamError):
753 tar2.extractfile(t2)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000754 continue
Guido van Rossumd8faa362007-04-27 19:54:29 +0000755
Antoine Pitrou95f55602010-09-23 18:36:46 +0000756 v1 = tar1.extractfile(t1)
757 v2 = tar2.extractfile(t2)
758 if v1 is None:
759 continue
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300760 self.assertIsNotNone(v2, "stream.extractfile() failed")
761 self.assertEqual(v1.read(), v2.read(),
762 "stream extraction failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000763 finally:
764 tar1.close()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000765
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300766class GzipStreamReadTest(GzipTest, StreamReadTest):
767 pass
Thomas Wouters89f507f2006-12-13 04:49:30 +0000768
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300769class Bz2StreamReadTest(Bz2Test, StreamReadTest):
770 pass
Thomas Wouterscf297e42007-02-23 15:07:44 +0000771
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300772class LzmaStreamReadTest(LzmaTest, StreamReadTest):
773 pass
774
775
776class DetectReadTest(TarTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777 def _testfunc_file(self, name, mode):
778 try:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000779 tar = tarfile.open(name, mode)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000780 except tarfile.ReadError as e:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781 self.fail()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000782 else:
783 tar.close()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000784
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785 def _testfunc_fileobj(self, name, mode):
786 try:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000787 with open(name, "rb") as f:
788 tar = tarfile.open(name, mode, fileobj=f)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000789 except tarfile.ReadError as e:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000790 self.fail()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000791 else:
792 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793
794 def _test_modes(self, testfunc):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300795 if self.suffix:
796 with self.assertRaises(tarfile.ReadError):
797 tarfile.open(tarname, mode="r:" + self.suffix)
798 with self.assertRaises(tarfile.ReadError):
799 tarfile.open(tarname, mode="r|" + self.suffix)
800 with self.assertRaises(tarfile.ReadError):
801 tarfile.open(self.tarname, mode="r:")
802 with self.assertRaises(tarfile.ReadError):
803 tarfile.open(self.tarname, mode="r|")
804 testfunc(self.tarname, "r")
805 testfunc(self.tarname, "r:" + self.suffix)
806 testfunc(self.tarname, "r:*")
807 testfunc(self.tarname, "r|" + self.suffix)
808 testfunc(self.tarname, "r|*")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100809
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 def test_detect_file(self):
811 self._test_modes(self._testfunc_file)
812
813 def test_detect_fileobj(self):
814 self._test_modes(self._testfunc_fileobj)
815
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300816class GzipDetectReadTest(GzipTest, DetectReadTest):
817 pass
818
819class Bz2DetectReadTest(Bz2Test, DetectReadTest):
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100820 def test_detect_stream_bz2(self):
821 # Originally, tarfile's stream detection looked for the string
822 # "BZh91" at the start of the file. This is incorrect because
Victor Stinner8c663fd2017-11-08 14:44:44 -0800823 # the '9' represents the blocksize (900,000 bytes). If the file was
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100824 # compressed using another blocksize autodetection fails.
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100825 with open(tarname, "rb") as fobj:
826 data = fobj.read()
827
Victor Stinner8c663fd2017-11-08 14:44:44 -0800828 # Compress with blocksize 100,000 bytes, the file starts with "BZh11".
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100829 with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
830 fobj.write(data)
831
832 self._testfunc_file(tmpname, "r|*")
833
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300834class LzmaDetectReadTest(LzmaTest, DetectReadTest):
835 pass
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300837
838class MemberReadTest(ReadTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839
840 def _test_member(self, tarinfo, chksum=None, **kwargs):
841 if chksum is not None:
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300842 with self.tar.extractfile(tarinfo) as f:
Christian Heimesc64a1a62019-09-25 16:30:20 +0200843 self.assertEqual(sha256sum(f.read()), chksum,
844 "wrong sha256sum for %s" % tarinfo.name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000846 kwargs["mtime"] = 0o7606136617
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 kwargs["uid"] = 1000
848 kwargs["gid"] = 100
849 if "old-v7" not in tarinfo.name:
850 # V7 tar can't handle alphabetic owners.
851 kwargs["uname"] = "tarfile"
852 kwargs["gname"] = "tarfile"
853 for k, v in kwargs.items():
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300854 self.assertEqual(getattr(tarinfo, k), v,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855 "wrong value in %s field of %s" % (k, tarinfo.name))
856
857 def test_find_regtype(self):
858 tarinfo = self.tar.getmember("ustar/regtype")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200859 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860
861 def test_find_conttype(self):
862 tarinfo = self.tar.getmember("ustar/conttype")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200863 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
865 def test_find_dirtype(self):
866 tarinfo = self.tar.getmember("ustar/dirtype")
867 self._test_member(tarinfo, size=0)
868
869 def test_find_dirtype_with_size(self):
870 tarinfo = self.tar.getmember("ustar/dirtype-with-size")
871 self._test_member(tarinfo, size=255)
872
873 def test_find_lnktype(self):
874 tarinfo = self.tar.getmember("ustar/lnktype")
875 self._test_member(tarinfo, size=0, linkname="ustar/regtype")
876
877 def test_find_symtype(self):
878 tarinfo = self.tar.getmember("ustar/symtype")
879 self._test_member(tarinfo, size=0, linkname="regtype")
880
881 def test_find_blktype(self):
882 tarinfo = self.tar.getmember("ustar/blktype")
883 self._test_member(tarinfo, size=0, devmajor=3, devminor=0)
884
885 def test_find_chrtype(self):
886 tarinfo = self.tar.getmember("ustar/chrtype")
887 self._test_member(tarinfo, size=0, devmajor=1, devminor=3)
888
889 def test_find_fifotype(self):
890 tarinfo = self.tar.getmember("ustar/fifotype")
891 self._test_member(tarinfo, size=0)
892
893 def test_find_sparse(self):
894 tarinfo = self.tar.getmember("ustar/sparse")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200895 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000897 def test_find_gnusparse(self):
898 tarinfo = self.tar.getmember("gnu/sparse")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200899 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000900
901 def test_find_gnusparse_00(self):
902 tarinfo = self.tar.getmember("gnu/sparse-0.0")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200903 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000904
905 def test_find_gnusparse_01(self):
906 tarinfo = self.tar.getmember("gnu/sparse-0.1")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200907 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000908
909 def test_find_gnusparse_10(self):
910 tarinfo = self.tar.getmember("gnu/sparse-1.0")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200911 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000912
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 def test_find_umlauts(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300914 tarinfo = self.tar.getmember("ustar/umlauts-"
915 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200916 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917
918 def test_find_ustar_longname(self):
919 name = "ustar/" + "12345/" * 39 + "1234567/longname"
Benjamin Peterson577473f2010-01-19 00:09:57 +0000920 self.assertIn(name, self.tar.getnames())
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
922 def test_find_regtype_oldv7(self):
923 tarinfo = self.tar.getmember("misc/regtype-old-v7")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200924 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925
926 def test_find_pax_umlauts(self):
Antoine Pitrouab58b5f2010-09-23 19:39:35 +0000927 self.tar.close()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300928 self.tar = tarfile.open(self.tarname, mode=self.mode,
929 encoding="iso8859-1")
930 tarinfo = self.tar.getmember("pax/umlauts-"
931 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200932 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933
934
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300935class LongnameTest:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936
937 def test_read_longname(self):
938 # Test reading of longname (bug #1471427).
Guido van Rossume7ba4952007-06-06 23:52:48 +0000939 longname = self.subdir + "/" + "123/" * 125 + "longname"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000940 try:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000941 tarinfo = self.tar.getmember(longname)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942 except KeyError:
943 self.fail("longname not found")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300944 self.assertNotEqual(tarinfo.type, tarfile.DIRTYPE,
945 "read longname as dirtype")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946
947 def test_read_longlink(self):
948 longname = self.subdir + "/" + "123/" * 125 + "longname"
949 longlink = self.subdir + "/" + "123/" * 125 + "longlink"
950 try:
951 tarinfo = self.tar.getmember(longlink)
952 except KeyError:
953 self.fail("longlink not found")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300954 self.assertEqual(tarinfo.linkname, longname, "linkname wrong")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
956 def test_truncated_longname(self):
957 longname = self.subdir + "/" + "123/" * 125 + "longname"
958 tarinfo = self.tar.getmember(longname)
959 offset = tarinfo.offset
960 self.tar.fileobj.seek(offset)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000961 fobj = io.BytesIO(self.tar.fileobj.read(3 * 512))
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300962 with self.assertRaises(tarfile.ReadError):
963 tarfile.open(name="foo.tar", fileobj=fobj)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964
Guido van Rossume7ba4952007-06-06 23:52:48 +0000965 def test_header_offset(self):
966 # Test if the start offset of the TarInfo object includes
967 # the preceding extended header.
968 longname = self.subdir + "/" + "123/" * 125 + "longname"
969 offset = self.tar.getmember(longname).offset
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000970 with open(tarname, "rb") as fobj:
971 fobj.seek(offset)
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300972 tarinfo = tarfile.TarInfo.frombuf(fobj.read(512),
973 "iso8859-1", "strict")
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000974 self.assertEqual(tarinfo.type, self.longnametype)
Guido van Rossume7ba4952007-06-06 23:52:48 +0000975
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300977class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000978
979 subdir = "gnu"
Guido van Rossume7ba4952007-06-06 23:52:48 +0000980 longnametype = tarfile.GNUTYPE_LONGNAME
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000982 # Since 3.2 tarfile is supposed to accurately restore sparse members and
983 # produce files with holes. This is what we actually want to test here.
984 # Unfortunately, not all platforms/filesystems support sparse files, and
985 # even on platforms that do it is non-trivial to make reliable assertions
986 # about holes in files. Therefore, we first do one basic test which works
987 # an all platforms, and after that a test that will work only on
988 # platforms/filesystems that prove to support sparse files.
989 def _test_sparse_file(self, name):
990 self.tar.extract(name, TEMPDIR)
991 filename = os.path.join(TEMPDIR, name)
992 with open(filename, "rb") as fobj:
993 data = fobj.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +0200994 self.assertEqual(sha256sum(data), sha256_sparse,
995 "wrong sha256sum for %s" % name)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000996
997 if self._fs_supports_holes():
998 s = os.stat(filename)
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300999 self.assertLess(s.st_blocks * 512, s.st_size)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001000
1001 def test_sparse_file_old(self):
1002 self._test_sparse_file("gnu/sparse")
1003
1004 def test_sparse_file_00(self):
1005 self._test_sparse_file("gnu/sparse-0.0")
1006
1007 def test_sparse_file_01(self):
1008 self._test_sparse_file("gnu/sparse-0.1")
1009
1010 def test_sparse_file_10(self):
1011 self._test_sparse_file("gnu/sparse-1.0")
1012
1013 @staticmethod
1014 def _fs_supports_holes():
1015 # Return True if the platform knows the st_blocks stat attribute and
1016 # uses st_blocks units of 512 bytes, and if the filesystem is able to
Victor Stinnerb2385452019-01-21 10:24:12 +01001017 # store holes of 4 KiB in files.
1018 #
1019 # The function returns False if page size is larger than 4 KiB.
1020 # For example, ppc64 uses pages of 64 KiB.
Victor Stinner9c3de4a2011-08-17 20:49:41 +02001021 if sys.platform.startswith("linux"):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001022 # Linux evidentially has 512 byte st_blocks units.
1023 name = os.path.join(TEMPDIR, "sparse-test")
1024 with open(name, "wb") as fobj:
Victor Stinnerb2385452019-01-21 10:24:12 +01001025 # Seek to "punch a hole" of 4 KiB
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001026 fobj.seek(4096)
Victor Stinnerb2385452019-01-21 10:24:12 +01001027 fobj.write(b'x' * 4096)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001028 fobj.truncate()
1029 s = os.stat(name)
Hai Shia7f5d932020-08-04 00:41:24 +08001030 os_helper.unlink(name)
Victor Stinnerb2385452019-01-21 10:24:12 +01001031 return (s.st_blocks * 512 < s.st_size)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001032 else:
1033 return False
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034
1035
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001036class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037
1038 subdir = "pax"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001039 longnametype = tarfile.XHDTYPE
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040
Guido van Rossume7ba4952007-06-06 23:52:48 +00001041 def test_pax_global_headers(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001042 tar = tarfile.open(tarname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001043 try:
1044 tarinfo = tar.getmember("pax/regtype1")
1045 self.assertEqual(tarinfo.uname, "foo")
1046 self.assertEqual(tarinfo.gname, "bar")
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001047 self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"),
1048 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Guido van Rossume7ba4952007-06-06 23:52:48 +00001049
Antoine Pitrou95f55602010-09-23 18:36:46 +00001050 tarinfo = tar.getmember("pax/regtype2")
1051 self.assertEqual(tarinfo.uname, "")
1052 self.assertEqual(tarinfo.gname, "bar")
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001053 self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"),
1054 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055
Antoine Pitrou95f55602010-09-23 18:36:46 +00001056 tarinfo = tar.getmember("pax/regtype3")
1057 self.assertEqual(tarinfo.uname, "tarfile")
1058 self.assertEqual(tarinfo.gname, "tarfile")
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001059 self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"),
1060 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001061 finally:
1062 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001063
1064 def test_pax_number_fields(self):
1065 # All following number fields are read from the pax header.
1066 tar = tarfile.open(tarname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001067 try:
1068 tarinfo = tar.getmember("pax/regtype4")
1069 self.assertEqual(tarinfo.size, 7011)
1070 self.assertEqual(tarinfo.uid, 123)
1071 self.assertEqual(tarinfo.gid, 123)
1072 self.assertEqual(tarinfo.mtime, 1041808783.0)
1073 self.assertEqual(type(tarinfo.mtime), float)
1074 self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0)
1075 self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0)
1076 finally:
1077 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001078
1079
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001080class WriteTestBase(TarTest):
Georg Brandlf08a9dd2008-06-10 16:57:31 +00001081 # Put all write tests in here that are supposed to be tested
1082 # in all possible mode combinations.
1083
1084 def test_fileobj_no_close(self):
1085 fobj = io.BytesIO()
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +02001086 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
1087 tar.addfile(tarfile.TarInfo("foo"))
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001088 self.assertFalse(fobj.closed, "external fileobjs must never closed")
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001089 # Issue #20238: Incomplete gzip output with mode="w:gz"
1090 data = fobj.getvalue()
1091 del tar
1092 support.gc_collect()
1093 self.assertFalse(fobj.closed)
1094 self.assertEqual(data, fobj.getvalue())
Georg Brandlf08a9dd2008-06-10 16:57:31 +00001095
Lars Gustäbel20703c62015-05-27 12:53:44 +02001096 def test_eof_marker(self):
1097 # Make sure an end of archive marker is written (two zero blocks).
1098 # tarfile insists on aligning archives to a 20 * 512 byte recordsize.
1099 # So, we create an archive that has exactly 10240 bytes without the
1100 # marker, and has 20480 bytes once the marker is written.
1101 with tarfile.open(tmpname, self.mode) as tar:
1102 t = tarfile.TarInfo("foo")
1103 t.size = tarfile.RECORDSIZE - tarfile.BLOCKSIZE
1104 tar.addfile(t, io.BytesIO(b"a" * t.size))
1105
1106 with self.open(tmpname, "rb") as fobj:
1107 self.assertEqual(len(fobj.read()), tarfile.RECORDSIZE * 2)
1108
Georg Brandlf08a9dd2008-06-10 16:57:31 +00001109
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001110class WriteTest(WriteTestBase, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001111
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001112 prefix = "w:"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113
1114 def test_100_char_name(self):
1115 # The name field in a tar header stores strings of at most 100 chars.
1116 # If a string is shorter than 100 chars it has to be padded with '\0',
1117 # which implies that a string of exactly 100 chars is stored without
1118 # a trailing '\0'.
1119 name = "0123456789" * 10
1120 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001121 try:
1122 t = tarfile.TarInfo(name)
1123 tar.addfile(t)
1124 finally:
1125 tar.close()
Thomas Wouterscf297e42007-02-23 15:07:44 +00001126
Guido van Rossumd8faa362007-04-27 19:54:29 +00001127 tar = tarfile.open(tmpname)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001128 try:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001129 self.assertEqual(tar.getnames()[0], name,
Antoine Pitrou95f55602010-09-23 18:36:46 +00001130 "failed to store 100 char filename")
1131 finally:
1132 tar.close()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001133
Guido van Rossumd8faa362007-04-27 19:54:29 +00001134 def test_tar_size(self):
1135 # Test for bug #1013882.
1136 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001137 try:
1138 path = os.path.join(TEMPDIR, "file")
1139 with open(path, "wb") as fobj:
1140 fobj.write(b"aaa")
1141 tar.add(path)
1142 finally:
1143 tar.close()
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001144 self.assertGreater(os.path.getsize(tmpname), 0,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145 "tarfile is empty")
Thomas Wouters89f507f2006-12-13 04:49:30 +00001146
Guido van Rossumd8faa362007-04-27 19:54:29 +00001147 # The test_*_size tests test for bug #1167128.
1148 def test_file_size(self):
1149 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001150 try:
1151 path = os.path.join(TEMPDIR, "file")
1152 with open(path, "wb"):
1153 pass
1154 tarinfo = tar.gettarinfo(path)
1155 self.assertEqual(tarinfo.size, 0)
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001156
Antoine Pitrou95f55602010-09-23 18:36:46 +00001157 with open(path, "wb") as fobj:
1158 fobj.write(b"aaa")
1159 tarinfo = tar.gettarinfo(path)
1160 self.assertEqual(tarinfo.size, 3)
1161 finally:
1162 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001163
1164 def test_directory_size(self):
1165 path = os.path.join(TEMPDIR, "directory")
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001166 os.mkdir(path)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167 try:
1168 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001169 try:
1170 tarinfo = tar.gettarinfo(path)
1171 self.assertEqual(tarinfo.size, 0)
1172 finally:
1173 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001174 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001175 os_helper.rmdir(path)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001177 # mock the following:
1178 # os.listdir: so we know that files are in the wrong order
Bernhard M. Wiedemann4ad703b2018-02-06 19:08:53 +01001179 def test_ordered_recursion(self):
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001180 path = os.path.join(TEMPDIR, "directory")
1181 os.mkdir(path)
1182 open(os.path.join(path, "1"), "a").close()
1183 open(os.path.join(path, "2"), "a").close()
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001184 try:
1185 tar = tarfile.open(tmpname, self.mode)
1186 try:
Bernhard M. Wiedemann4ad703b2018-02-06 19:08:53 +01001187 with unittest.mock.patch('os.listdir') as mock_listdir:
1188 mock_listdir.return_value = ["2", "1"]
1189 tar.add(path)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001190 paths = []
1191 for m in tar.getmembers():
1192 paths.append(os.path.split(m.name)[-1])
1193 self.assertEqual(paths, ["directory", "1", "2"]);
1194 finally:
1195 tar.close()
1196 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001197 os_helper.unlink(os.path.join(path, "1"))
1198 os_helper.unlink(os.path.join(path, "2"))
1199 os_helper.rmdir(path)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001200
Serhiy Storchakac45cd162017-03-08 10:32:44 +02001201 def test_gettarinfo_pathlike_name(self):
1202 with tarfile.open(tmpname, self.mode) as tar:
1203 path = pathlib.Path(TEMPDIR) / "file"
1204 with open(path, "wb") as fobj:
1205 fobj.write(b"aaa")
1206 tarinfo = tar.gettarinfo(path)
1207 tarinfo2 = tar.gettarinfo(os.fspath(path))
1208 self.assertIsInstance(tarinfo.name, str)
1209 self.assertEqual(tarinfo.name, tarinfo2.name)
1210 self.assertEqual(tarinfo.size, 3)
1211
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001212 @unittest.skipUnless(hasattr(os, "link"),
1213 "Missing hardlink implementation")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001214 def test_link_size(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001215 link = os.path.join(TEMPDIR, "link")
1216 target = os.path.join(TEMPDIR, "link_target")
1217 with open(target, "wb") as fobj:
1218 fobj.write(b"aaa")
xdegayed7d4fea2017-11-12 18:02:06 +01001219 try:
1220 os.link(target, link)
1221 except PermissionError as e:
1222 self.skipTest('os.link(): %s' % e)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001223 try:
1224 tar = tarfile.open(tmpname, self.mode)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 try:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001226 # Record the link target in the inodes list.
1227 tar.gettarinfo(target)
1228 tarinfo = tar.gettarinfo(link)
1229 self.assertEqual(tarinfo.size, 0)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001230 finally:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001231 tar.close()
1232 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001233 os_helper.unlink(target)
1234 os_helper.unlink(link)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001235
Hai Shia7f5d932020-08-04 00:41:24 +08001236 @os_helper.skip_unless_symlink
Guido van Rossumd8faa362007-04-27 19:54:29 +00001237 def test_symlink_size(self):
Brian Curtind40e6f72010-07-08 21:39:08 +00001238 path = os.path.join(TEMPDIR, "symlink")
1239 os.symlink("link_target", path)
1240 try:
1241 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001242 try:
1243 tarinfo = tar.gettarinfo(path)
1244 self.assertEqual(tarinfo.size, 0)
1245 finally:
1246 tar.close()
Brian Curtind40e6f72010-07-08 21:39:08 +00001247 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001248 os_helper.unlink(path)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001249
1250 def test_add_self(self):
1251 # Test for #1257255.
1252 dstname = os.path.abspath(tmpname)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001254 try:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001255 self.assertEqual(tar.name, dstname,
1256 "archive name must be absolute")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001257 tar.add(dstname)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001258 self.assertEqual(tar.getnames(), [],
1259 "added the archive to itself")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001260
Hai Shia7f5d932020-08-04 00:41:24 +08001261 with os_helper.change_cwd(TEMPDIR):
Serhiy Storchaka2a23adf2015-09-06 14:13:25 +03001262 tar.add(dstname)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001263 self.assertEqual(tar.getnames(), [],
1264 "added the archive to itself")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001265 finally:
1266 tar.close()
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001267
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001268 def test_filter(self):
1269 tempdir = os.path.join(TEMPDIR, "filter")
1270 os.mkdir(tempdir)
1271 try:
1272 for name in ("foo", "bar", "baz"):
1273 name = os.path.join(tempdir, name)
Hai Shia7f5d932020-08-04 00:41:24 +08001274 os_helper.create_empty_file(name)
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001275
1276 def filter(tarinfo):
1277 if os.path.basename(tarinfo.name) == "bar":
1278 return
1279 tarinfo.uid = 123
1280 tarinfo.uname = "foo"
1281 return tarinfo
1282
1283 tar = tarfile.open(tmpname, self.mode, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001284 try:
1285 tar.add(tempdir, arcname="empty_dir", filter=filter)
1286 finally:
1287 tar.close()
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001288
Raymond Hettingera63a3122011-01-26 20:34:14 +00001289 # Verify that filter is a keyword-only argument
1290 with self.assertRaises(TypeError):
1291 tar.add(tempdir, "empty_dir", True, None, filter)
1292
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001293 tar = tarfile.open(tmpname, "r")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001294 try:
1295 for tarinfo in tar:
1296 self.assertEqual(tarinfo.uid, 123)
1297 self.assertEqual(tarinfo.uname, "foo")
1298 self.assertEqual(len(tar.getmembers()), 3)
1299 finally:
1300 tar.close()
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001301 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001302 os_helper.rmtree(tempdir)
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001303
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001304 # Guarantee that stored pathnames are not modified. Don't
1305 # remove ./ or ../ or double slashes. Still make absolute
1306 # pathnames relative.
1307 # For details see bug #6054.
1308 def _test_pathname(self, path, cmp_path=None, dir=False):
1309 # Create a tarfile with an empty member named path
1310 # and compare the stored name with the original.
1311 foo = os.path.join(TEMPDIR, "foo")
1312 if not dir:
Hai Shia7f5d932020-08-04 00:41:24 +08001313 os_helper.create_empty_file(foo)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001314 else:
1315 os.mkdir(foo)
1316
1317 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001318 try:
1319 tar.add(foo, arcname=path)
1320 finally:
1321 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001322
1323 tar = tarfile.open(tmpname, "r")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001324 try:
1325 t = tar.next()
1326 finally:
1327 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001328
1329 if not dir:
Hai Shia7f5d932020-08-04 00:41:24 +08001330 os_helper.unlink(foo)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001331 else:
Hai Shia7f5d932020-08-04 00:41:24 +08001332 os_helper.rmdir(foo)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001333
1334 self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/"))
1335
Senthil Kumaranbe5dbeb2011-04-30 06:09:51 +08001336
Hai Shia7f5d932020-08-04 00:41:24 +08001337 @os_helper.skip_unless_symlink
Senthil Kumaran123932f2011-04-28 15:38:12 +08001338 def test_extractall_symlinks(self):
1339 # Test if extractall works properly when tarfile contains symlinks
1340 tempdir = os.path.join(TEMPDIR, "testsymlinks")
1341 temparchive = os.path.join(TEMPDIR, "testsymlinks.tar")
1342 os.mkdir(tempdir)
1343 try:
1344 source_file = os.path.join(tempdir,'source')
1345 target_file = os.path.join(tempdir,'symlink')
1346 with open(source_file,'w') as f:
1347 f.write('something\n')
1348 os.symlink(source_file, target_file)
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +02001349 with tarfile.open(temparchive, 'w') as tar:
1350 tar.add(source_file)
1351 tar.add(target_file)
Senthil Kumaran123932f2011-04-28 15:38:12 +08001352 # Let's extract it to the location which contains the symlink
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +02001353 with tarfile.open(temparchive) as tar:
1354 # this should not raise OSError: [Errno 17] File exists
1355 try:
1356 tar.extractall(path=tempdir)
1357 except OSError:
1358 self.fail("extractall failed with symlinked files")
Senthil Kumaran123932f2011-04-28 15:38:12 +08001359 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001360 os_helper.unlink(temparchive)
1361 os_helper.rmtree(tempdir)
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001362
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001363 def test_pathnames(self):
1364 self._test_pathname("foo")
1365 self._test_pathname(os.path.join("foo", ".", "bar"))
1366 self._test_pathname(os.path.join("foo", "..", "bar"))
1367 self._test_pathname(os.path.join(".", "foo"))
1368 self._test_pathname(os.path.join(".", "foo", "."))
1369 self._test_pathname(os.path.join(".", "foo", ".", "bar"))
1370 self._test_pathname(os.path.join(".", "foo", "..", "bar"))
1371 self._test_pathname(os.path.join(".", "foo", "..", "bar"))
1372 self._test_pathname(os.path.join("..", "foo"))
1373 self._test_pathname(os.path.join("..", "foo", ".."))
1374 self._test_pathname(os.path.join("..", "foo", ".", "bar"))
1375 self._test_pathname(os.path.join("..", "foo", "..", "bar"))
1376
1377 self._test_pathname("foo" + os.sep + os.sep + "bar")
1378 self._test_pathname("foo" + os.sep + os.sep, "foo", dir=True)
1379
1380 def test_abs_pathnames(self):
1381 if sys.platform == "win32":
1382 self._test_pathname("C:\\foo", "foo")
1383 else:
1384 self._test_pathname("/foo", "foo")
1385 self._test_pathname("///foo", "foo")
1386
1387 def test_cwd(self):
1388 # Test adding the current working directory.
Hai Shia7f5d932020-08-04 00:41:24 +08001389 with os_helper.change_cwd(TEMPDIR):
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001390 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001391 try:
1392 tar.add(".")
1393 finally:
1394 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001395
1396 tar = tarfile.open(tmpname, "r")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001397 try:
1398 for t in tar:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001399 if t.name != ".":
1400 self.assertTrue(t.name.startswith("./"), t.name)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001401 finally:
1402 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001403
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001404 def test_open_nonwritable_fileobj(self):
1405 for exctype in OSError, EOFError, RuntimeError:
1406 class BadFile(io.BytesIO):
1407 first = True
1408 def write(self, data):
1409 if self.first:
1410 self.first = False
1411 raise exctype
1412
1413 f = BadFile()
1414 with self.assertRaises(exctype):
1415 tar = tarfile.open(tmpname, self.mode, fileobj=f,
1416 format=tarfile.PAX_FORMAT,
1417 pax_headers={'non': 'empty'})
1418 self.assertFalse(f.closed)
1419
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001420class GzipWriteTest(GzipTest, WriteTest):
1421 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001422
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001423class Bz2WriteTest(Bz2Test, WriteTest):
1424 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001425
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001426class LzmaWriteTest(LzmaTest, WriteTest):
1427 pass
1428
1429
1430class StreamWriteTest(WriteTestBase, unittest.TestCase):
1431
1432 prefix = "w|"
1433 decompressor = None
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001434
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 def test_stream_padding(self):
1436 # Test for bug #1543303.
1437 tar = tarfile.open(tmpname, self.mode)
1438 tar.close()
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001439 if self.decompressor:
1440 dec = self.decompressor()
Antoine Pitrou95f55602010-09-23 18:36:46 +00001441 with open(tmpname, "rb") as fobj:
1442 data = fobj.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001443 data = dec.decompress(data)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001444 self.assertFalse(dec.unused_data, "found trailing data")
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001445 else:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001446 with self.open(tmpname) as fobj:
Antoine Pitrou95f55602010-09-23 18:36:46 +00001447 data = fobj.read()
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001448 self.assertEqual(data.count(b"\0"), tarfile.RECORDSIZE,
1449 "incorrect zero padding")
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001450
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001451 @unittest.skipUnless(sys.platform != "win32" and hasattr(os, "umask"),
1452 "Missing umask implementation")
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +00001453 def test_file_mode(self):
1454 # Test for issue #8464: Create files with correct
1455 # permissions.
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +00001456 if os.path.exists(tmpname):
Hai Shia7f5d932020-08-04 00:41:24 +08001457 os_helper.unlink(tmpname)
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +00001458
1459 original_umask = os.umask(0o022)
1460 try:
1461 tar = tarfile.open(tmpname, self.mode)
1462 tar.close()
1463 mode = os.stat(tmpname).st_mode & 0o777
1464 self.assertEqual(mode, 0o644, "wrong file permissions")
1465 finally:
1466 os.umask(original_umask)
1467
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001468class GzipStreamWriteTest(GzipTest, StreamWriteTest):
1469 pass
1470
1471class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
1472 decompressor = bz2.BZ2Decompressor if bz2 else None
1473
1474class LzmaStreamWriteTest(LzmaTest, StreamWriteTest):
1475 decompressor = lzma.LZMADecompressor if lzma else None
1476
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001477
Guido van Rossumd8faa362007-04-27 19:54:29 +00001478class GNUWriteTest(unittest.TestCase):
1479 # This testcase checks for correct creation of GNU Longname
1480 # and Longlink extended headers (cp. bug #812325).
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001481
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001482 def _length(self, s):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001483 blocks = len(s) // 512 + 1
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001484 return blocks * 512
1485
1486 def _calc_size(self, name, link=None):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001487 # Initial tar header
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001488 count = 512
1489
1490 if len(name) > tarfile.LENGTH_NAME:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001491 # GNU longname extended header + longname
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001492 count += 512
1493 count += self._length(name)
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001494 if link is not None and len(link) > tarfile.LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001495 # GNU longlink extended header + longlink
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001496 count += 512
1497 count += self._length(link)
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001498 return count
1499
1500 def _test(self, name, link=None):
1501 tarinfo = tarfile.TarInfo(name)
1502 if link:
1503 tarinfo.linkname = link
1504 tarinfo.type = tarfile.LNKTYPE
1505
Guido van Rossumd8faa362007-04-27 19:54:29 +00001506 tar = tarfile.open(tmpname, "w")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001507 try:
1508 tar.format = tarfile.GNU_FORMAT
1509 tar.addfile(tarinfo)
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001510
Antoine Pitrou95f55602010-09-23 18:36:46 +00001511 v1 = self._calc_size(name, link)
1512 v2 = tar.offset
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001513 self.assertEqual(v1, v2, "GNU longname/longlink creation failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001514 finally:
1515 tar.close()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001516
Guido van Rossumd8faa362007-04-27 19:54:29 +00001517 tar = tarfile.open(tmpname)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001518 try:
1519 member = tar.next()
1520 self.assertIsNotNone(member,
1521 "unable to read longname member")
1522 self.assertEqual(tarinfo.name, member.name,
1523 "unable to read longname member")
1524 self.assertEqual(tarinfo.linkname, member.linkname,
1525 "unable to read longname member")
1526 finally:
1527 tar.close()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001528
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001529 def test_longname_1023(self):
1530 self._test(("longnam/" * 127) + "longnam")
1531
1532 def test_longname_1024(self):
1533 self._test(("longnam/" * 127) + "longname")
1534
1535 def test_longname_1025(self):
1536 self._test(("longnam/" * 127) + "longname_")
1537
1538 def test_longlink_1023(self):
1539 self._test("name", ("longlnk/" * 127) + "longlnk")
1540
1541 def test_longlink_1024(self):
1542 self._test("name", ("longlnk/" * 127) + "longlink")
1543
1544 def test_longlink_1025(self):
1545 self._test("name", ("longlnk/" * 127) + "longlink_")
1546
1547 def test_longnamelink_1023(self):
1548 self._test(("longnam/" * 127) + "longnam",
1549 ("longlnk/" * 127) + "longlnk")
1550
1551 def test_longnamelink_1024(self):
1552 self._test(("longnam/" * 127) + "longname",
1553 ("longlnk/" * 127) + "longlink")
1554
1555 def test_longnamelink_1025(self):
1556 self._test(("longnam/" * 127) + "longname_",
1557 ("longlnk/" * 127) + "longlink_")
1558
Guido van Rossumd8faa362007-04-27 19:54:29 +00001559
William Chargin674935b2020-02-12 11:56:02 -08001560class DeviceHeaderTest(WriteTestBase, unittest.TestCase):
1561
1562 prefix = "w:"
1563
1564 def test_headers_written_only_for_device_files(self):
1565 # Regression test for bpo-18819.
1566 tempdir = os.path.join(TEMPDIR, "device_header_test")
1567 os.mkdir(tempdir)
1568 try:
1569 tar = tarfile.open(tmpname, self.mode)
1570 try:
1571 input_blk = tarfile.TarInfo(name="my_block_device")
1572 input_reg = tarfile.TarInfo(name="my_regular_file")
1573 input_blk.type = tarfile.BLKTYPE
1574 input_reg.type = tarfile.REGTYPE
1575 tar.addfile(input_blk)
1576 tar.addfile(input_reg)
1577 finally:
1578 tar.close()
1579
1580 # devmajor and devminor should be *interpreted* as 0 in both...
1581 tar = tarfile.open(tmpname, "r")
1582 try:
1583 output_blk = tar.getmember("my_block_device")
1584 output_reg = tar.getmember("my_regular_file")
1585 finally:
1586 tar.close()
1587 self.assertEqual(output_blk.devmajor, 0)
1588 self.assertEqual(output_blk.devminor, 0)
1589 self.assertEqual(output_reg.devmajor, 0)
1590 self.assertEqual(output_reg.devminor, 0)
1591
1592 # ...but the fields should not actually be set on regular files:
1593 with open(tmpname, "rb") as infile:
1594 buf = infile.read()
1595 buf_blk = buf[output_blk.offset:output_blk.offset_data]
1596 buf_reg = buf[output_reg.offset:output_reg.offset_data]
1597 # See `struct posixheader` in GNU docs for byte offsets:
1598 # <https://www.gnu.org/software/tar/manual/html_node/Standard.html>
1599 device_headers = slice(329, 329 + 16)
1600 self.assertEqual(buf_blk[device_headers], b"0000000\0" * 2)
1601 self.assertEqual(buf_reg[device_headers], b"\0" * 16)
1602 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001603 os_helper.rmtree(tempdir)
William Chargin674935b2020-02-12 11:56:02 -08001604
1605
Lars Gustäbel20703c62015-05-27 12:53:44 +02001606class CreateTest(WriteTestBase, unittest.TestCase):
Berker Peksag0fe63252015-02-13 21:02:12 +02001607
1608 prefix = "x:"
1609
1610 file_path = os.path.join(TEMPDIR, "spameggs42")
1611
1612 def setUp(self):
Hai Shia7f5d932020-08-04 00:41:24 +08001613 os_helper.unlink(tmpname)
Berker Peksag0fe63252015-02-13 21:02:12 +02001614
1615 @classmethod
1616 def setUpClass(cls):
1617 with open(cls.file_path, "wb") as fobj:
1618 fobj.write(b"aaa")
1619
1620 @classmethod
1621 def tearDownClass(cls):
Hai Shia7f5d932020-08-04 00:41:24 +08001622 os_helper.unlink(cls.file_path)
Berker Peksag0fe63252015-02-13 21:02:12 +02001623
1624 def test_create(self):
1625 with tarfile.open(tmpname, self.mode) as tobj:
1626 tobj.add(self.file_path)
1627
1628 with self.taropen(tmpname) as tobj:
1629 names = tobj.getnames()
1630 self.assertEqual(len(names), 1)
1631 self.assertIn('spameggs42', names[0])
1632
1633 def test_create_existing(self):
1634 with tarfile.open(tmpname, self.mode) as tobj:
1635 tobj.add(self.file_path)
1636
1637 with self.assertRaises(FileExistsError):
1638 tobj = tarfile.open(tmpname, self.mode)
1639
1640 with self.taropen(tmpname) as tobj:
1641 names = tobj.getnames()
1642 self.assertEqual(len(names), 1)
1643 self.assertIn('spameggs42', names[0])
1644
1645 def test_create_taropen(self):
1646 with self.taropen(tmpname, "x") as tobj:
1647 tobj.add(self.file_path)
1648
1649 with self.taropen(tmpname) as tobj:
1650 names = tobj.getnames()
1651 self.assertEqual(len(names), 1)
1652 self.assertIn('spameggs42', names[0])
1653
1654 def test_create_existing_taropen(self):
1655 with self.taropen(tmpname, "x") as tobj:
1656 tobj.add(self.file_path)
1657
1658 with self.assertRaises(FileExistsError):
1659 with self.taropen(tmpname, "x"):
1660 pass
1661
1662 with self.taropen(tmpname) as tobj:
1663 names = tobj.getnames()
1664 self.assertEqual(len(names), 1)
1665 self.assertIn("spameggs42", names[0])
1666
Serhiy Storchakac45cd162017-03-08 10:32:44 +02001667 def test_create_pathlike_name(self):
1668 with tarfile.open(pathlib.Path(tmpname), self.mode) as tobj:
1669 self.assertIsInstance(tobj.name, str)
1670 self.assertEqual(tobj.name, os.path.abspath(tmpname))
1671 tobj.add(pathlib.Path(self.file_path))
1672 names = tobj.getnames()
1673 self.assertEqual(len(names), 1)
1674 self.assertIn('spameggs42', names[0])
1675
1676 with self.taropen(tmpname) as tobj:
1677 names = tobj.getnames()
1678 self.assertEqual(len(names), 1)
1679 self.assertIn('spameggs42', names[0])
1680
1681 def test_create_taropen_pathlike_name(self):
1682 with self.taropen(pathlib.Path(tmpname), "x") as tobj:
1683 self.assertIsInstance(tobj.name, str)
1684 self.assertEqual(tobj.name, os.path.abspath(tmpname))
1685 tobj.add(pathlib.Path(self.file_path))
1686 names = tobj.getnames()
1687 self.assertEqual(len(names), 1)
1688 self.assertIn('spameggs42', names[0])
1689
1690 with self.taropen(tmpname) as tobj:
1691 names = tobj.getnames()
1692 self.assertEqual(len(names), 1)
1693 self.assertIn('spameggs42', names[0])
1694
Berker Peksag0fe63252015-02-13 21:02:12 +02001695
1696class GzipCreateTest(GzipTest, CreateTest):
1697 pass
1698
1699
1700class Bz2CreateTest(Bz2Test, CreateTest):
1701 pass
1702
1703
1704class LzmaCreateTest(LzmaTest, CreateTest):
1705 pass
1706
1707
1708class CreateWithXModeTest(CreateTest):
1709
1710 prefix = "x"
1711
1712 test_create_taropen = None
1713 test_create_existing_taropen = None
1714
1715
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001716@unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001717class HardlinkTest(unittest.TestCase):
1718 # Test the creation of LNKTYPE (hardlink) members in an archive.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001719
1720 def setUp(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001721 self.foo = os.path.join(TEMPDIR, "foo")
1722 self.bar = os.path.join(TEMPDIR, "bar")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001723
Antoine Pitrou95f55602010-09-23 18:36:46 +00001724 with open(self.foo, "wb") as fobj:
1725 fobj.write(b"foo")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001726
xdegayed7d4fea2017-11-12 18:02:06 +01001727 try:
1728 os.link(self.foo, self.bar)
1729 except PermissionError as e:
1730 self.skipTest('os.link(): %s' % e)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731
Guido van Rossumd8faa362007-04-27 19:54:29 +00001732 self.tar = tarfile.open(tmpname, "w")
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001733 self.tar.add(self.foo)
1734
Guido van Rossumd8faa362007-04-27 19:54:29 +00001735 def tearDown(self):
Hirokazu Yamamotoaf079d42008-09-21 11:50:03 +00001736 self.tar.close()
Hai Shia7f5d932020-08-04 00:41:24 +08001737 os_helper.unlink(self.foo)
1738 os_helper.unlink(self.bar)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001739
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001740 def test_add_twice(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001741 # The same name will be added as a REGTYPE every
1742 # time regardless of st_nlink.
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001743 tarinfo = self.tar.gettarinfo(self.foo)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001744 self.assertEqual(tarinfo.type, tarfile.REGTYPE,
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001745 "add file as regular failed")
1746
1747 def test_add_hardlink(self):
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001748 tarinfo = self.tar.gettarinfo(self.bar)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001749 self.assertEqual(tarinfo.type, tarfile.LNKTYPE,
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001750 "add file as hardlink failed")
1751
1752 def test_dereference_hardlink(self):
1753 self.tar.dereference = True
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001754 tarinfo = self.tar.gettarinfo(self.bar)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001755 self.assertEqual(tarinfo.type, tarfile.REGTYPE,
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001756 "dereferencing hardlink failed")
1757
Neal Norwitza4f651a2004-07-20 22:07:44 +00001758
Guido van Rossumd8faa362007-04-27 19:54:29 +00001759class PaxWriteTest(GNUWriteTest):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001760
Guido van Rossumd8faa362007-04-27 19:54:29 +00001761 def _test(self, name, link=None):
1762 # See GNUWriteTest.
1763 tarinfo = tarfile.TarInfo(name)
1764 if link:
1765 tarinfo.linkname = link
1766 tarinfo.type = tarfile.LNKTYPE
Andrew M. Kuchlingd4f25522004-10-20 11:47:01 +00001767
Guido van Rossumd8faa362007-04-27 19:54:29 +00001768 tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001769 try:
1770 tar.addfile(tarinfo)
1771 finally:
1772 tar.close()
Andrew M. Kuchlingd4f25522004-10-20 11:47:01 +00001773
Guido van Rossumd8faa362007-04-27 19:54:29 +00001774 tar = tarfile.open(tmpname)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001775 try:
1776 if link:
1777 l = tar.getmembers()[0].linkname
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001778 self.assertEqual(link, l, "PAX longlink creation failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001779 else:
1780 n = tar.getmembers()[0].name
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001781 self.assertEqual(name, n, "PAX longname creation failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001782 finally:
1783 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001784
Guido van Rossume7ba4952007-06-06 23:52:48 +00001785 def test_pax_global_header(self):
1786 pax_headers = {
Guido van Rossum9cbfffd2007-06-07 00:54:15 +00001787 "foo": "bar",
1788 "uid": "0",
1789 "mtime": "1.23",
Guido van Rossuma0557702007-08-07 23:19:53 +00001790 "test": "\xe4\xf6\xfc",
1791 "\xe4\xf6\xfc": "test"}
Guido van Rossume7ba4952007-06-06 23:52:48 +00001792
Benjamin Peterson886af962010-03-21 23:13:07 +00001793 tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001794 pax_headers=pax_headers)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001795 try:
1796 tar.addfile(tarfile.TarInfo("test"))
1797 finally:
1798 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001799
1800 # Test if the global header was written correctly.
1801 tar = tarfile.open(tmpname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001802 try:
1803 self.assertEqual(tar.pax_headers, pax_headers)
1804 self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)
1805 # Test if all the fields are strings.
1806 for key, val in tar.pax_headers.items():
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001807 self.assertIsNot(type(key), bytes)
1808 self.assertIsNot(type(val), bytes)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001809 if key in tarfile.PAX_NUMBER_FIELDS:
1810 try:
1811 tarfile.PAX_NUMBER_FIELDS[key](val)
1812 except (TypeError, ValueError):
1813 self.fail("unable to convert pax header field")
1814 finally:
1815 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001816
1817 def test_pax_extended_header(self):
1818 # The fields from the pax header have priority over the
1819 # TarInfo.
Guido van Rossum9cbfffd2007-06-07 00:54:15 +00001820 pax_headers = {"path": "foo", "uid": "123"}
Guido van Rossume7ba4952007-06-06 23:52:48 +00001821
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001822 tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
1823 encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001824 try:
1825 t = tarfile.TarInfo()
1826 t.name = "\xe4\xf6\xfc" # non-ASCII
1827 t.uid = 8**8 # too large
1828 t.pax_headers = pax_headers
1829 tar.addfile(t)
1830 finally:
1831 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001832
1833 tar = tarfile.open(tmpname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001834 try:
1835 t = tar.getmembers()[0]
1836 self.assertEqual(t.pax_headers, pax_headers)
1837 self.assertEqual(t.name, "foo")
1838 self.assertEqual(t.uid, 123)
1839 finally:
1840 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001841
1842
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001843class UnicodeTest:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001844
1845 def test_iso8859_1_filename(self):
1846 self._test_unicode_filename("iso8859-1")
1847
1848 def test_utf7_filename(self):
1849 self._test_unicode_filename("utf7")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001850
1851 def test_utf8_filename(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001852 self._test_unicode_filename("utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001853
Guido van Rossumd8faa362007-04-27 19:54:29 +00001854 def _test_unicode_filename(self, encoding):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001855 tar = tarfile.open(tmpname, "w", format=self.format,
1856 encoding=encoding, errors="strict")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001857 try:
1858 name = "\xe4\xf6\xfc"
1859 tar.addfile(tarfile.TarInfo(name))
1860 finally:
1861 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001862
1863 tar = tarfile.open(tmpname, encoding=encoding)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001864 try:
1865 self.assertEqual(tar.getmembers()[0].name, name)
1866 finally:
1867 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001868
1869 def test_unicode_filename_error(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001870 tar = tarfile.open(tmpname, "w", format=self.format,
1871 encoding="ascii", errors="strict")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001872 try:
1873 tarinfo = tarfile.TarInfo()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001874
Antoine Pitrou95f55602010-09-23 18:36:46 +00001875 tarinfo.name = "\xe4\xf6\xfc"
1876 self.assertRaises(UnicodeError, tar.addfile, tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001877
Antoine Pitrou95f55602010-09-23 18:36:46 +00001878 tarinfo.name = "foo"
1879 tarinfo.uname = "\xe4\xf6\xfc"
1880 self.assertRaises(UnicodeError, tar.addfile, tarinfo)
1881 finally:
1882 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001883
1884 def test_unicode_argument(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001885 tar = tarfile.open(tarname, "r",
1886 encoding="iso8859-1", errors="strict")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001887 try:
1888 for t in tar:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001889 self.assertIs(type(t.name), str)
1890 self.assertIs(type(t.linkname), str)
1891 self.assertIs(type(t.uname), str)
1892 self.assertIs(type(t.gname), str)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001893 finally:
1894 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001895
Guido van Rossume7ba4952007-06-06 23:52:48 +00001896 def test_uname_unicode(self):
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001897 t = tarfile.TarInfo("foo")
1898 t.uname = "\xe4\xf6\xfc"
1899 t.gname = "\xe4\xf6\xfc"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001900
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001901 tar = tarfile.open(tmpname, mode="w", format=self.format,
1902 encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001903 try:
1904 tar.addfile(t)
1905 finally:
1906 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001907
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001908 tar = tarfile.open(tmpname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001909 try:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001910 t = tar.getmember("foo")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001911 self.assertEqual(t.uname, "\xe4\xf6\xfc")
1912 self.assertEqual(t.gname, "\xe4\xf6\xfc")
1913
1914 if self.format != tarfile.PAX_FORMAT:
Antoine Pitrouab58b5f2010-09-23 19:39:35 +00001915 tar.close()
Antoine Pitrou95f55602010-09-23 18:36:46 +00001916 tar = tarfile.open(tmpname, encoding="ascii")
1917 t = tar.getmember("foo")
1918 self.assertEqual(t.uname, "\udce4\udcf6\udcfc")
1919 self.assertEqual(t.gname, "\udce4\udcf6\udcfc")
1920 finally:
1921 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001922
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001923
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001924class UstarUnicodeTest(UnicodeTest, unittest.TestCase):
1925
1926 format = tarfile.USTAR_FORMAT
1927
1928 # Test whether the utf-8 encoded version of a filename exceeds the 100
1929 # bytes name field limit (every occurrence of '\xff' will be expanded to 2
1930 # bytes).
1931 def test_unicode_name1(self):
1932 self._test_ustar_name("0123456789" * 10)
1933 self._test_ustar_name("0123456789" * 10 + "0", ValueError)
1934 self._test_ustar_name("0123456789" * 9 + "01234567\xff")
1935 self._test_ustar_name("0123456789" * 9 + "012345678\xff", ValueError)
1936
1937 def test_unicode_name2(self):
1938 self._test_ustar_name("0123456789" * 9 + "012345\xff\xff")
1939 self._test_ustar_name("0123456789" * 9 + "0123456\xff\xff", ValueError)
1940
1941 # Test whether the utf-8 encoded version of a filename exceeds the 155
1942 # bytes prefix + '/' + 100 bytes name limit.
1943 def test_unicode_longname1(self):
1944 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 10)
1945 self._test_ustar_name("0123456789" * 15 + "0123/4" + "0123456789" * 10, ValueError)
1946 self._test_ustar_name("0123456789" * 15 + "012\xff/" + "0123456789" * 10)
1947 self._test_ustar_name("0123456789" * 15 + "0123\xff/" + "0123456789" * 10, ValueError)
1948
1949 def test_unicode_longname2(self):
1950 self._test_ustar_name("0123456789" * 15 + "01\xff/2" + "0123456789" * 10, ValueError)
1951 self._test_ustar_name("0123456789" * 15 + "01\xff\xff/" + "0123456789" * 10, ValueError)
1952
1953 def test_unicode_longname3(self):
1954 self._test_ustar_name("0123456789" * 15 + "01\xff\xff/2" + "0123456789" * 10, ValueError)
1955 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "01234567\xff")
1956 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345678\xff", ValueError)
1957
1958 def test_unicode_longname4(self):
1959 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345\xff\xff")
1960 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "0123456\xff\xff", ValueError)
1961
1962 def _test_ustar_name(self, name, exc=None):
1963 with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
1964 t = tarfile.TarInfo(name)
1965 if exc is None:
1966 tar.addfile(t)
1967 else:
1968 self.assertRaises(exc, tar.addfile, t)
1969
1970 if exc is None:
Lars Gustäbelddd99172016-04-19 11:58:41 +02001971 with tarfile.open(tmpname, "r", encoding="utf-8") as tar:
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001972 for t in tar:
1973 self.assertEqual(name, t.name)
1974 break
1975
1976 # Test the same as above for the 100 bytes link field.
1977 def test_unicode_link1(self):
1978 self._test_ustar_link("0123456789" * 10)
1979 self._test_ustar_link("0123456789" * 10 + "0", ValueError)
1980 self._test_ustar_link("0123456789" * 9 + "01234567\xff")
1981 self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError)
1982
1983 def test_unicode_link2(self):
1984 self._test_ustar_link("0123456789" * 9 + "012345\xff\xff")
1985 self._test_ustar_link("0123456789" * 9 + "0123456\xff\xff", ValueError)
1986
1987 def _test_ustar_link(self, name, exc=None):
1988 with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
1989 t = tarfile.TarInfo("foo")
1990 t.linkname = name
1991 if exc is None:
1992 tar.addfile(t)
1993 else:
1994 self.assertRaises(exc, tar.addfile, t)
1995
1996 if exc is None:
Lars Gustäbelddd99172016-04-19 11:58:41 +02001997 with tarfile.open(tmpname, "r", encoding="utf-8") as tar:
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001998 for t in tar:
1999 self.assertEqual(name, t.linkname)
2000 break
2001
2002
2003class GNUUnicodeTest(UnicodeTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +00002004
Guido van Rossume7ba4952007-06-06 23:52:48 +00002005 format = tarfile.GNU_FORMAT
Guido van Rossumd8faa362007-04-27 19:54:29 +00002006
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002007 def test_bad_pax_header(self):
2008 # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
2009 # without a hdrcharset=BINARY header.
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002010 for encoding, name in (
2011 ("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"),
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002012 ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002013 with tarfile.open(tarname, encoding=encoding,
2014 errors="surrogateescape") as tar:
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002015 try:
2016 t = tar.getmember(name)
2017 except KeyError:
2018 self.fail("unable to read bad GNU tar pax header")
2019
Guido van Rossumd8faa362007-04-27 19:54:29 +00002020
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02002021class PAXUnicodeTest(UnicodeTest, unittest.TestCase):
Lars Gustäbel3741eff2007-08-21 12:17:05 +00002022
2023 format = tarfile.PAX_FORMAT
2024
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002025 # PAX_FORMAT ignores encoding in write mode.
2026 test_unicode_filename_error = None
2027
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002028 def test_binary_header(self):
2029 # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002030 for encoding, name in (
2031 ("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002032 ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002033 with tarfile.open(tarname, encoding=encoding,
2034 errors="surrogateescape") as tar:
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002035 try:
2036 t = tar.getmember(name)
2037 except KeyError:
2038 self.fail("unable to read POSIX.1-2008 binary header")
2039
Lars Gustäbel3741eff2007-08-21 12:17:05 +00002040
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002041class AppendTestBase:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002042 # Test append mode (cp. patch #1652681).
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002043
Guido van Rossumd8faa362007-04-27 19:54:29 +00002044 def setUp(self):
2045 self.tarname = tmpname
2046 if os.path.exists(self.tarname):
Hai Shia7f5d932020-08-04 00:41:24 +08002047 os_helper.unlink(self.tarname)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002048
Guido van Rossumd8faa362007-04-27 19:54:29 +00002049 def _create_testtar(self, mode="w:"):
Antoine Pitrou95f55602010-09-23 18:36:46 +00002050 with tarfile.open(tarname, encoding="iso8859-1") as src:
2051 t = src.getmember("ustar/regtype")
2052 t.name = "foo"
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002053 with src.extractfile(t) as f:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00002054 with tarfile.open(self.tarname, mode) as tar:
2055 tar.addfile(t, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002056
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002057 def test_append_compressed(self):
2058 self._create_testtar("w:" + self.suffix)
2059 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
2060
2061class AppendTest(AppendTestBase, unittest.TestCase):
2062 test_append_compressed = None
2063
2064 def _add_testfile(self, fileobj=None):
2065 with tarfile.open(self.tarname, "a", fileobj=fileobj) as tar:
2066 tar.addfile(tarfile.TarInfo("bar"))
2067
Guido van Rossumd8faa362007-04-27 19:54:29 +00002068 def _test(self, names=["bar"], fileobj=None):
Antoine Pitrou95f55602010-09-23 18:36:46 +00002069 with tarfile.open(self.tarname, fileobj=fileobj) as tar:
2070 self.assertEqual(tar.getnames(), names)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002071
2072 def test_non_existing(self):
2073 self._add_testfile()
2074 self._test()
2075
2076 def test_empty(self):
Lars Gustäbel9520a432009-11-22 18:48:49 +00002077 tarfile.open(self.tarname, "w:").close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00002078 self._add_testfile()
2079 self._test()
2080
2081 def test_empty_fileobj(self):
Lars Gustäbel9520a432009-11-22 18:48:49 +00002082 fobj = io.BytesIO(b"\0" * 1024)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002083 self._add_testfile(fobj)
2084 fobj.seek(0)
2085 self._test(fileobj=fobj)
2086
2087 def test_fileobj(self):
2088 self._create_testtar()
Antoine Pitrou95f55602010-09-23 18:36:46 +00002089 with open(self.tarname, "rb") as fobj:
2090 data = fobj.read()
Guido van Rossum34d19282007-08-09 01:03:29 +00002091 fobj = io.BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002092 self._add_testfile(fobj)
2093 fobj.seek(0)
2094 self._test(names=["foo", "bar"], fileobj=fobj)
2095
2096 def test_existing(self):
2097 self._create_testtar()
2098 self._add_testfile()
2099 self._test(names=["foo", "bar"])
2100
Lars Gustäbel9520a432009-11-22 18:48:49 +00002101 # Append mode is supposed to fail if the tarfile to append to
2102 # does not end with a zero block.
2103 def _test_error(self, data):
Antoine Pitrou95f55602010-09-23 18:36:46 +00002104 with open(self.tarname, "wb") as fobj:
2105 fobj.write(data)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002106 self.assertRaises(tarfile.ReadError, self._add_testfile)
2107
2108 def test_null(self):
2109 self._test_error(b"")
2110
2111 def test_incomplete(self):
2112 self._test_error(b"\0" * 13)
2113
2114 def test_premature_eof(self):
2115 data = tarfile.TarInfo("foo").tobuf()
2116 self._test_error(data)
2117
2118 def test_trailing_garbage(self):
2119 data = tarfile.TarInfo("foo").tobuf()
2120 self._test_error(data + b"\0" * 13)
2121
2122 def test_invalid(self):
2123 self._test_error(b"a" * 512)
2124
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002125class GzipAppendTest(GzipTest, AppendTestBase, unittest.TestCase):
2126 pass
2127
2128class Bz2AppendTest(Bz2Test, AppendTestBase, unittest.TestCase):
2129 pass
2130
2131class LzmaAppendTest(LzmaTest, AppendTestBase, unittest.TestCase):
2132 pass
2133
Guido van Rossumd8faa362007-04-27 19:54:29 +00002134
2135class LimitsTest(unittest.TestCase):
2136
2137 def test_ustar_limits(self):
2138 # 100 char name
2139 tarinfo = tarfile.TarInfo("0123456789" * 10)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002140 tarinfo.tobuf(tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002141
2142 # 101 char name that cannot be stored
2143 tarinfo = tarfile.TarInfo("0123456789" * 10 + "0")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002144 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002145
2146 # 256 char name with a slash at pos 156
2147 tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002148 tarinfo.tobuf(tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002149
2150 # 256 char name that cannot be stored
2151 tarinfo = tarfile.TarInfo("1234567/" * 31 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002152 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002153
2154 # 512 char name
2155 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002156 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002157
2158 # 512 char linkname
2159 tarinfo = tarfile.TarInfo("longlink")
2160 tarinfo.linkname = "123/" * 126 + "longname"
Guido van Rossume7ba4952007-06-06 23:52:48 +00002161 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002162
2163 # uid > 8 digits
2164 tarinfo = tarfile.TarInfo("name")
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002165 tarinfo.uid = 0o10000000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002166 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002167
2168 def test_gnu_limits(self):
2169 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002170 tarinfo.tobuf(tarfile.GNU_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002171
2172 tarinfo = tarfile.TarInfo("longlink")
2173 tarinfo.linkname = "123/" * 126 + "longname"
Guido van Rossume7ba4952007-06-06 23:52:48 +00002174 tarinfo.tobuf(tarfile.GNU_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002175
2176 # uid >= 256 ** 7
2177 tarinfo = tarfile.TarInfo("name")
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002178 tarinfo.uid = 0o4000000000000000000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002179 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.GNU_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002180
2181 def test_pax_limits(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00002182 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002183 tarinfo.tobuf(tarfile.PAX_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002184
2185 tarinfo = tarfile.TarInfo("longlink")
2186 tarinfo.linkname = "123/" * 126 + "longname"
Guido van Rossume7ba4952007-06-06 23:52:48 +00002187 tarinfo.tobuf(tarfile.PAX_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002188
2189 tarinfo = tarfile.TarInfo("name")
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002190 tarinfo.uid = 0o4000000000000000000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002191 tarinfo.tobuf(tarfile.PAX_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002192
2193
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002194class MiscTest(unittest.TestCase):
2195
2196 def test_char_fields(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002197 self.assertEqual(tarfile.stn("foo", 8, "ascii", "strict"),
2198 b"foo\0\0\0\0\0")
2199 self.assertEqual(tarfile.stn("foobar", 3, "ascii", "strict"),
2200 b"foo")
2201 self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"),
2202 "foo")
2203 self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"),
2204 "foo")
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002205
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002206 def test_read_number_fields(self):
2207 # Issue 13158: Test if GNU tar specific base-256 number fields
2208 # are decoded correctly.
2209 self.assertEqual(tarfile.nti(b"0000001\x00"), 1)
2210 self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002211 self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"),
2212 0o10000000)
2213 self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"),
2214 0xffffffff)
2215 self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"),
2216 -1)
2217 self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"),
2218 -100)
2219 self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"),
2220 -0x100000000000000)
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002221
Lars Gustäbelb7a688b2015-07-02 19:38:38 +02002222 # Issue 24514: Test if empty number fields are converted to zero.
2223 self.assertEqual(tarfile.nti(b"\0"), 0)
2224 self.assertEqual(tarfile.nti(b" \0"), 0)
2225
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002226 def test_write_number_fields(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002227 self.assertEqual(tarfile.itn(1), b"0000001\x00")
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002228 self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002229 self.assertEqual(tarfile.itn(0o10000000, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002230 b"\x80\x00\x00\x00\x00\x20\x00\x00")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002231 self.assertEqual(tarfile.itn(0xffffffff, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002232 b"\x80\x00\x00\x00\xff\xff\xff\xff")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002233 self.assertEqual(tarfile.itn(-1, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002234 b"\xff\xff\xff\xff\xff\xff\xff\xff")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002235 self.assertEqual(tarfile.itn(-100, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002236 b"\xff\xff\xff\xff\xff\xff\xff\x9c")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002237 self.assertEqual(tarfile.itn(-0x100000000000000,
2238 format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002239 b"\xff\x00\x00\x00\x00\x00\x00\x00")
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002240
Joffrey F72d9b2b2018-02-26 16:02:21 -08002241 # Issue 32713: Test if itn() supports float values outside the
2242 # non-GNU format range
2243 self.assertEqual(tarfile.itn(-100.0, format=tarfile.GNU_FORMAT),
2244 b"\xff\xff\xff\xff\xff\xff\xff\x9c")
2245 self.assertEqual(tarfile.itn(8 ** 12 + 0.0, format=tarfile.GNU_FORMAT),
2246 b"\x80\x00\x00\x10\x00\x00\x00\x00")
2247 self.assertEqual(tarfile.nti(tarfile.itn(-0.1, format=tarfile.GNU_FORMAT)), 0)
2248
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002249 def test_number_field_limits(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002250 with self.assertRaises(ValueError):
2251 tarfile.itn(-1, 8, tarfile.USTAR_FORMAT)
2252 with self.assertRaises(ValueError):
2253 tarfile.itn(0o10000000, 8, tarfile.USTAR_FORMAT)
2254 with self.assertRaises(ValueError):
2255 tarfile.itn(-0x10000000001, 6, tarfile.GNU_FORMAT)
2256 with self.assertRaises(ValueError):
2257 tarfile.itn(0x10000000000, 6, tarfile.GNU_FORMAT)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002258
Martin Panter104dcda2016-01-16 06:59:13 +00002259 def test__all__(self):
Victor Stinnerfbf43f02020-08-17 07:20:40 +02002260 not_exported = {
2261 'version', 'grp', 'pwd', 'symlink_exception', 'NUL', 'BLOCKSIZE',
2262 'RECORDSIZE', 'GNU_MAGIC', 'POSIX_MAGIC', 'LENGTH_NAME',
2263 'LENGTH_LINK', 'LENGTH_PREFIX', 'REGTYPE', 'AREGTYPE', 'LNKTYPE',
2264 'SYMTYPE', 'CHRTYPE', 'BLKTYPE', 'DIRTYPE', 'FIFOTYPE', 'CONTTYPE',
2265 'GNUTYPE_LONGNAME', 'GNUTYPE_LONGLINK', 'GNUTYPE_SPARSE',
2266 'XHDTYPE', 'XGLTYPE', 'SOLARIS_XHDTYPE', 'SUPPORTED_TYPES',
2267 'REGULAR_TYPES', 'GNU_TYPES', 'PAX_FIELDS', 'PAX_NAME_FIELDS',
2268 'PAX_NUMBER_FIELDS', 'stn', 'nts', 'nti', 'itn', 'calc_chksums',
2269 'copyfileobj', 'filemode', 'EmptyHeaderError',
2270 'TruncatedHeaderError', 'EOFHeaderError', 'InvalidHeaderError',
2271 'SubsequentHeaderError', 'ExFileObject', 'main'}
2272 support.check__all__(self, tarfile, not_exported=not_exported)
Martin Panter104dcda2016-01-16 06:59:13 +00002273
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002274
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002275class CommandLineTest(unittest.TestCase):
2276
Serhiy Storchaka255493c2014-02-05 20:54:43 +02002277 def tarfilecmd(self, *args, **kwargs):
2278 rc, out, err = script_helper.assert_python_ok('-m', 'tarfile', *args,
2279 **kwargs)
Antoine Pitrou3b7b1e52013-11-24 01:55:05 +01002280 return out.replace(os.linesep.encode(), b'\n')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002281
2282 def tarfilecmd_failure(self, *args):
2283 return script_helper.assert_python_failure('-m', 'tarfile', *args)
2284
2285 def make_simple_tarfile(self, tar_name):
2286 files = [support.findfile('tokenize_tests.txt'),
2287 support.findfile('tokenize_tests-no-coding-cookie-'
2288 'and-utf8-bom-sig-only.txt')]
Hai Shia7f5d932020-08-04 00:41:24 +08002289 self.addCleanup(os_helper.unlink, tar_name)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002290 with tarfile.open(tar_name, 'w') as tf:
2291 for tardata in files:
2292 tf.add(tardata, arcname=os.path.basename(tardata))
2293
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002294 def test_bad_use(self):
2295 rc, out, err = self.tarfilecmd_failure()
2296 self.assertEqual(out, b'')
2297 self.assertIn(b'usage', err.lower())
2298 self.assertIn(b'error', err.lower())
2299 self.assertIn(b'required', err.lower())
2300 rc, out, err = self.tarfilecmd_failure('-l', '')
2301 self.assertEqual(out, b'')
2302 self.assertNotEqual(err.strip(), b'')
2303
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002304 def test_test_command(self):
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002305 for tar_name in testtarnames:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002306 for opt in '-t', '--test':
2307 out = self.tarfilecmd(opt, tar_name)
2308 self.assertEqual(out, b'')
2309
2310 def test_test_command_verbose(self):
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002311 for tar_name in testtarnames:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002312 for opt in '-v', '--verbose':
Serhiy Storchaka700cfa82020-06-25 17:56:31 +03002313 out = self.tarfilecmd(opt, '-t', tar_name,
2314 PYTHONIOENCODING='utf-8')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002315 self.assertIn(b'is a tar archive.\n', out)
2316
2317 def test_test_command_invalid_file(self):
2318 zipname = support.findfile('zipdir.zip')
2319 rc, out, err = self.tarfilecmd_failure('-t', zipname)
2320 self.assertIn(b' is not a tar archive.', err)
2321 self.assertEqual(out, b'')
2322 self.assertEqual(rc, 1)
2323
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002324 for tar_name in testtarnames:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002325 with self.subTest(tar_name=tar_name):
2326 with open(tar_name, 'rb') as f:
2327 data = f.read()
2328 try:
2329 with open(tmpname, 'wb') as f:
2330 f.write(data[:511])
2331 rc, out, err = self.tarfilecmd_failure('-t', tmpname)
2332 self.assertEqual(out, b'')
2333 self.assertEqual(rc, 1)
2334 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002335 os_helper.unlink(tmpname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002336
2337 def test_list_command(self):
Serhiy Storchaka255493c2014-02-05 20:54:43 +02002338 for tar_name in testtarnames:
2339 with support.captured_stdout() as t:
2340 with tarfile.open(tar_name, 'r') as tf:
2341 tf.list(verbose=False)
2342 expected = t.getvalue().encode('ascii', 'backslashreplace')
2343 for opt in '-l', '--list':
2344 out = self.tarfilecmd(opt, tar_name,
2345 PYTHONIOENCODING='ascii')
2346 self.assertEqual(out, expected)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002347
2348 def test_list_command_verbose(self):
Serhiy Storchaka255493c2014-02-05 20:54:43 +02002349 for tar_name in testtarnames:
2350 with support.captured_stdout() as t:
2351 with tarfile.open(tar_name, 'r') as tf:
2352 tf.list(verbose=True)
2353 expected = t.getvalue().encode('ascii', 'backslashreplace')
2354 for opt in '-v', '--verbose':
2355 out = self.tarfilecmd(opt, '-l', tar_name,
2356 PYTHONIOENCODING='ascii')
2357 self.assertEqual(out, expected)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002358
2359 def test_list_command_invalid_file(self):
2360 zipname = support.findfile('zipdir.zip')
2361 rc, out, err = self.tarfilecmd_failure('-l', zipname)
2362 self.assertIn(b' is not a tar archive.', err)
2363 self.assertEqual(out, b'')
2364 self.assertEqual(rc, 1)
2365
2366 def test_create_command(self):
2367 files = [support.findfile('tokenize_tests.txt'),
2368 support.findfile('tokenize_tests-no-coding-cookie-'
2369 'and-utf8-bom-sig-only.txt')]
2370 for opt in '-c', '--create':
2371 try:
2372 out = self.tarfilecmd(opt, tmpname, *files)
2373 self.assertEqual(out, b'')
2374 with tarfile.open(tmpname) as tar:
2375 tar.getmembers()
2376 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002377 os_helper.unlink(tmpname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002378
2379 def test_create_command_verbose(self):
2380 files = [support.findfile('tokenize_tests.txt'),
2381 support.findfile('tokenize_tests-no-coding-cookie-'
2382 'and-utf8-bom-sig-only.txt')]
2383 for opt in '-v', '--verbose':
2384 try:
Serhiy Storchaka700cfa82020-06-25 17:56:31 +03002385 out = self.tarfilecmd(opt, '-c', tmpname, *files,
2386 PYTHONIOENCODING='utf-8')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002387 self.assertIn(b' file created.', out)
2388 with tarfile.open(tmpname) as tar:
2389 tar.getmembers()
2390 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002391 os_helper.unlink(tmpname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002392
2393 def test_create_command_dotless_filename(self):
2394 files = [support.findfile('tokenize_tests.txt')]
2395 try:
2396 out = self.tarfilecmd('-c', dotlessname, *files)
2397 self.assertEqual(out, b'')
2398 with tarfile.open(dotlessname) as tar:
2399 tar.getmembers()
2400 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002401 os_helper.unlink(dotlessname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002402
2403 def test_create_command_dot_started_filename(self):
2404 tar_name = os.path.join(TEMPDIR, ".testtar")
2405 files = [support.findfile('tokenize_tests.txt')]
2406 try:
2407 out = self.tarfilecmd('-c', tar_name, *files)
2408 self.assertEqual(out, b'')
2409 with tarfile.open(tar_name) as tar:
2410 tar.getmembers()
2411 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002412 os_helper.unlink(tar_name)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002413
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002414 def test_create_command_compressed(self):
2415 files = [support.findfile('tokenize_tests.txt'),
2416 support.findfile('tokenize_tests-no-coding-cookie-'
2417 'and-utf8-bom-sig-only.txt')]
2418 for filetype in (GzipTest, Bz2Test, LzmaTest):
2419 if not filetype.open:
2420 continue
2421 try:
2422 tar_name = tmpname + '.' + filetype.suffix
2423 out = self.tarfilecmd('-c', tar_name, *files)
2424 with filetype.taropen(tar_name) as tar:
2425 tar.getmembers()
2426 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002427 os_helper.unlink(tar_name)
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002428
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002429 def test_extract_command(self):
2430 self.make_simple_tarfile(tmpname)
2431 for opt in '-e', '--extract':
2432 try:
Hai Shia7f5d932020-08-04 00:41:24 +08002433 with os_helper.temp_cwd(tarextdir):
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002434 out = self.tarfilecmd(opt, tmpname)
2435 self.assertEqual(out, b'')
2436 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002437 os_helper.rmtree(tarextdir)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002438
2439 def test_extract_command_verbose(self):
2440 self.make_simple_tarfile(tmpname)
2441 for opt in '-v', '--verbose':
2442 try:
Hai Shia7f5d932020-08-04 00:41:24 +08002443 with os_helper.temp_cwd(tarextdir):
Serhiy Storchaka700cfa82020-06-25 17:56:31 +03002444 out = self.tarfilecmd(opt, '-e', tmpname,
2445 PYTHONIOENCODING='utf-8')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002446 self.assertIn(b' file is extracted.', out)
2447 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002448 os_helper.rmtree(tarextdir)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002449
2450 def test_extract_command_different_directory(self):
2451 self.make_simple_tarfile(tmpname)
2452 try:
Hai Shia7f5d932020-08-04 00:41:24 +08002453 with os_helper.temp_cwd(tarextdir):
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002454 out = self.tarfilecmd('-e', tmpname, 'spamdir')
2455 self.assertEqual(out, b'')
2456 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002457 os_helper.rmtree(tarextdir)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002458
2459 def test_extract_command_invalid_file(self):
2460 zipname = support.findfile('zipdir.zip')
Hai Shia7f5d932020-08-04 00:41:24 +08002461 with os_helper.temp_cwd(tarextdir):
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002462 rc, out, err = self.tarfilecmd_failure('-e', zipname)
2463 self.assertIn(b' is not a tar archive.', err)
2464 self.assertEqual(out, b'')
2465 self.assertEqual(rc, 1)
2466
2467
Lars Gustäbel01385812010-03-03 12:08:54 +00002468class ContextManagerTest(unittest.TestCase):
2469
2470 def test_basic(self):
2471 with tarfile.open(tarname) as tar:
2472 self.assertFalse(tar.closed, "closed inside runtime context")
2473 self.assertTrue(tar.closed, "context manager failed")
2474
2475 def test_closed(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002476 # The __enter__() method is supposed to raise OSError
Lars Gustäbel01385812010-03-03 12:08:54 +00002477 # if the TarFile object is already closed.
2478 tar = tarfile.open(tarname)
2479 tar.close()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002480 with self.assertRaises(OSError):
Lars Gustäbel01385812010-03-03 12:08:54 +00002481 with tar:
2482 pass
2483
2484 def test_exception(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002485 # Test if the OSError exception is passed through properly.
Lars Gustäbel01385812010-03-03 12:08:54 +00002486 with self.assertRaises(Exception) as exc:
2487 with tarfile.open(tarname) as tar:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002488 raise OSError
2489 self.assertIsInstance(exc.exception, OSError,
Lars Gustäbel01385812010-03-03 12:08:54 +00002490 "wrong exception raised in context manager")
2491 self.assertTrue(tar.closed, "context manager failed")
2492
2493 def test_no_eof(self):
2494 # __exit__() must not write end-of-archive blocks if an
2495 # exception was raised.
2496 try:
2497 with tarfile.open(tmpname, "w") as tar:
2498 raise Exception
2499 except:
2500 pass
2501 self.assertEqual(os.path.getsize(tmpname), 0,
2502 "context manager wrote an end-of-archive block")
2503 self.assertTrue(tar.closed, "context manager failed")
2504
2505 def test_eof(self):
2506 # __exit__() must write end-of-archive blocks, i.e. call
2507 # TarFile.close() if there was no error.
2508 with tarfile.open(tmpname, "w"):
2509 pass
2510 self.assertNotEqual(os.path.getsize(tmpname), 0,
2511 "context manager wrote no end-of-archive block")
2512
2513 def test_fileobj(self):
2514 # Test that __exit__() did not close the external file
2515 # object.
Antoine Pitrou95f55602010-09-23 18:36:46 +00002516 with open(tmpname, "wb") as fobj:
2517 try:
2518 with tarfile.open(fileobj=fobj, mode="w") as tar:
2519 raise Exception
2520 except:
2521 pass
2522 self.assertFalse(fobj.closed, "external file object was closed")
2523 self.assertTrue(tar.closed, "context manager failed")
Lars Gustäbel01385812010-03-03 12:08:54 +00002524
2525
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002526@unittest.skipIf(hasattr(os, "link"), "requires os.link to be missing")
2527class LinkEmulationTest(ReadTest, unittest.TestCase):
Lars Gustäbel1b512722010-06-03 12:45:16 +00002528
2529 # Test for issue #8741 regression. On platforms that do not support
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002530 # symbolic or hard links tarfile tries to extract these types of members
2531 # as the regular files they point to.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002532 def _test_link_extraction(self, name):
2533 self.tar.extract(name, TEMPDIR)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002534 with open(os.path.join(TEMPDIR, name), "rb") as f:
2535 data = f.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +02002536 self.assertEqual(sha256sum(data), sha256_regtype)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002537
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002538 # See issues #1578269, #8879, and #17689 for some history on these skips
Brian Curtind40e6f72010-07-08 21:39:08 +00002539 @unittest.skipIf(hasattr(os.path, "islink"),
2540 "Skip emulation - has os.path.islink but not os.link")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002541 def test_hardlink_extraction1(self):
2542 self._test_link_extraction("ustar/lnktype")
2543
Brian Curtind40e6f72010-07-08 21:39:08 +00002544 @unittest.skipIf(hasattr(os.path, "islink"),
2545 "Skip emulation - has os.path.islink but not os.link")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002546 def test_hardlink_extraction2(self):
2547 self._test_link_extraction("./ustar/linktest2/lnktype")
2548
Brian Curtin74e45612010-07-09 15:58:59 +00002549 @unittest.skipIf(hasattr(os, "symlink"),
2550 "Skip emulation if symlink exists")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002551 def test_symlink_extraction1(self):
2552 self._test_link_extraction("ustar/symtype")
2553
Brian Curtin74e45612010-07-09 15:58:59 +00002554 @unittest.skipIf(hasattr(os, "symlink"),
2555 "Skip emulation if symlink exists")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002556 def test_symlink_extraction2(self):
2557 self._test_link_extraction("./ustar/linktest2/symtype")
2558
2559
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002560class Bz2PartialReadTest(Bz2Test, unittest.TestCase):
Lars Gustäbel42e00912009-03-22 20:34:29 +00002561 # Issue5068: The _BZ2Proxy.read() method loops forever
2562 # on an empty or partial bzipped file.
2563
2564 def _test_partial_input(self, mode):
2565 class MyBytesIO(io.BytesIO):
2566 hit_eof = False
2567 def read(self, n):
2568 if self.hit_eof:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002569 raise AssertionError("infinite loop detected in "
2570 "tarfile.open()")
Lars Gustäbel42e00912009-03-22 20:34:29 +00002571 self.hit_eof = self.tell() == len(self.getvalue())
2572 return super(MyBytesIO, self).read(n)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002573 def seek(self, *args):
2574 self.hit_eof = False
2575 return super(MyBytesIO, self).seek(*args)
Lars Gustäbel42e00912009-03-22 20:34:29 +00002576
2577 data = bz2.compress(tarfile.TarInfo("foo").tobuf())
2578 for x in range(len(data) + 1):
Lars Gustäbel9520a432009-11-22 18:48:49 +00002579 try:
2580 tarfile.open(fileobj=MyBytesIO(data[:x]), mode=mode)
2581 except tarfile.ReadError:
2582 pass # we have no interest in ReadErrors
Lars Gustäbel42e00912009-03-22 20:34:29 +00002583
2584 def test_partial_input(self):
2585 self._test_partial_input("r")
2586
2587 def test_partial_input_bz2(self):
2588 self._test_partial_input("r:bz2")
2589
2590
Eric V. Smith7a803892015-04-15 10:27:58 -04002591def root_is_uid_gid_0():
2592 try:
2593 import pwd, grp
2594 except ImportError:
2595 return False
2596 if pwd.getpwuid(0)[0] != 'root':
2597 return False
2598 if grp.getgrgid(0)[0] != 'root':
2599 return False
2600 return True
2601
2602
Zachary Waread3e27a2015-05-12 23:57:21 -05002603@unittest.skipUnless(hasattr(os, 'chown'), "missing os.chown")
2604@unittest.skipUnless(hasattr(os, 'geteuid'), "missing os.geteuid")
Eric V. Smith7a803892015-04-15 10:27:58 -04002605class NumericOwnerTest(unittest.TestCase):
2606 # mock the following:
2607 # os.chown: so we can test what's being called
2608 # os.chmod: so the modes are not actually changed. if they are, we can't
2609 # delete the files/directories
2610 # os.geteuid: so we can lie and say we're root (uid = 0)
2611
2612 @staticmethod
2613 def _make_test_archive(filename_1, dirname_1, filename_2):
2614 # the file contents to write
2615 fobj = io.BytesIO(b"content")
2616
2617 # create a tar file with a file, a directory, and a file within that
2618 # directory. Assign various .uid/.gid values to them
2619 items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj),
2620 (dirname_1, 77, 76, tarfile.DIRTYPE, None),
2621 (filename_2, 88, 87, tarfile.REGTYPE, fobj),
2622 ]
2623 with tarfile.open(tmpname, 'w') as tarfl:
2624 for name, uid, gid, typ, contents in items:
2625 t = tarfile.TarInfo(name)
2626 t.uid = uid
2627 t.gid = gid
2628 t.uname = 'root'
2629 t.gname = 'root'
2630 t.type = typ
2631 tarfl.addfile(t, contents)
2632
2633 # return the full pathname to the tar file
2634 return tmpname
2635
2636 @staticmethod
2637 @contextmanager
2638 def _setup_test(mock_geteuid):
2639 mock_geteuid.return_value = 0 # lie and say we're root
2640 fname = 'numeric-owner-testfile'
2641 dirname = 'dir'
2642
2643 # the names we want stored in the tarfile
2644 filename_1 = fname
2645 dirname_1 = dirname
2646 filename_2 = os.path.join(dirname, fname)
2647
2648 # create the tarfile with the contents we're after
2649 tar_filename = NumericOwnerTest._make_test_archive(filename_1,
2650 dirname_1,
2651 filename_2)
2652
2653 # open the tarfile for reading. yield it and the names of the items
2654 # we stored into the file
2655 with tarfile.open(tar_filename) as tarfl:
2656 yield tarfl, filename_1, dirname_1, filename_2
2657
2658 @unittest.mock.patch('os.chown')
2659 @unittest.mock.patch('os.chmod')
2660 @unittest.mock.patch('os.geteuid')
2661 def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod,
2662 mock_chown):
2663 with self._setup_test(mock_geteuid) as (tarfl, filename_1, _,
2664 filename_2):
2665 tarfl.extract(filename_1, TEMPDIR, numeric_owner=True)
2666 tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True)
2667
2668 # convert to filesystem paths
2669 f_filename_1 = os.path.join(TEMPDIR, filename_1)
2670 f_filename_2 = os.path.join(TEMPDIR, filename_2)
2671
2672 mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
2673 unittest.mock.call(f_filename_2, 88, 87),
2674 ],
2675 any_order=True)
2676
2677 @unittest.mock.patch('os.chown')
2678 @unittest.mock.patch('os.chmod')
2679 @unittest.mock.patch('os.geteuid')
2680 def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod,
2681 mock_chown):
2682 with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1,
2683 filename_2):
2684 tarfl.extractall(TEMPDIR, numeric_owner=True)
2685
2686 # convert to filesystem paths
2687 f_filename_1 = os.path.join(TEMPDIR, filename_1)
2688 f_dirname_1 = os.path.join(TEMPDIR, dirname_1)
2689 f_filename_2 = os.path.join(TEMPDIR, filename_2)
2690
2691 mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
2692 unittest.mock.call(f_dirname_1, 77, 76),
2693 unittest.mock.call(f_filename_2, 88, 87),
2694 ],
2695 any_order=True)
2696
2697 # this test requires that uid=0 and gid=0 really be named 'root'. that's
2698 # because the uname and gname in the test file are 'root', and extract()
2699 # will look them up using pwd and grp to find their uid and gid, which we
2700 # test here to be 0.
2701 @unittest.skipUnless(root_is_uid_gid_0(),
2702 'uid=0,gid=0 must be named "root"')
2703 @unittest.mock.patch('os.chown')
2704 @unittest.mock.patch('os.chmod')
2705 @unittest.mock.patch('os.geteuid')
2706 def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod,
2707 mock_chown):
2708 with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
2709 tarfl.extract(filename_1, TEMPDIR, numeric_owner=False)
2710
2711 # convert to filesystem paths
2712 f_filename_1 = os.path.join(TEMPDIR, filename_1)
2713
2714 mock_chown.assert_called_with(f_filename_1, 0, 0)
2715
2716 @unittest.mock.patch('os.geteuid')
2717 def test_keyword_only(self, mock_geteuid):
2718 with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
2719 self.assertRaises(TypeError,
2720 tarfl.extract, filename_1, TEMPDIR, False, True)
2721
2722
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002723def setUpModule():
Hai Shia7f5d932020-08-04 00:41:24 +08002724 os_helper.unlink(TEMPDIR)
Antoine Pitrou941ee882009-11-11 20:59:38 +00002725 os.makedirs(TEMPDIR)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002726
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002727 global testtarnames
2728 testtarnames = [tarname]
Antoine Pitrou95f55602010-09-23 18:36:46 +00002729 with open(tarname, "rb") as fobj:
2730 data = fobj.read()
Neal Norwitza4f651a2004-07-20 22:07:44 +00002731
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002732 # Create compressed tarfiles.
2733 for c in GzipTest, Bz2Test, LzmaTest:
2734 if c.open:
Hai Shia7f5d932020-08-04 00:41:24 +08002735 os_helper.unlink(c.tarname)
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002736 testtarnames.append(c.tarname)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002737 with c.open(c.tarname, "wb") as tar:
2738 tar.write(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002739
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002740def tearDownModule():
2741 if os.path.exists(TEMPDIR):
Hai Shia7f5d932020-08-04 00:41:24 +08002742 os_helper.rmtree(TEMPDIR)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002743
Neal Norwitz996acf12003-02-17 14:51:41 +00002744if __name__ == "__main__":
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002745 unittest.main()