blob: 77ad8305c3107aa383d1aecfeb244575f4a69124 [file] [log] [blame]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001import sys
2import os
Lars Gustäbelb506dc32007-08-07 18:36:16 +00003import io
Christian Heimesc64a1a62019-09-25 16:30:20 +02004from hashlib import sha256
Eric V. Smith7a803892015-04-15 10:27:58 -04005from contextlib import contextmanager
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02006from random import Random
Serhiy Storchakac45cd162017-03-08 10:32:44 +02007import pathlib
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00008
9import unittest
Eric V. Smith7a803892015-04-15 10:27:58 -040010import unittest.mock
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000011import tarfile
12
Berker Peksagce643912015-05-06 06:33:17 +030013from test import support
Hai Shia7f5d932020-08-04 00:41:24 +080014from test.support import os_helper
Hai Shi66abe982020-04-29 09:11:29 +080015from test.support import script_helper
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000016
17# Check for our compression modules.
18try:
19 import gzip
Brett Cannon260fbe82013-07-04 18:16:15 -040020except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000021 gzip = None
22try:
23 import bz2
Brett Cannon260fbe82013-07-04 18:16:15 -040024except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000025 bz2 = None
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +010026try:
27 import lzma
Brett Cannon260fbe82013-07-04 18:16:15 -040028except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +010029 lzma = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000030
Christian Heimesc64a1a62019-09-25 16:30:20 +020031def sha256sum(data):
32 return sha256(data).hexdigest()
Guido van Rossumd8faa362007-04-27 19:54:29 +000033
Hai Shia7f5d932020-08-04 00:41:24 +080034TEMPDIR = os.path.abspath(os_helper.TESTFN) + "-tardir"
Serhiy Storchakad27b4552013-11-24 01:53:29 +020035tarextdir = TEMPDIR + '-extract-test'
Antoine Pitrou941ee882009-11-11 20:59:38 +000036tarname = support.findfile("testtar.tar")
Guido van Rossumd8faa362007-04-27 19:54:29 +000037gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
38bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +010039xzname = os.path.join(TEMPDIR, "testtar.tar.xz")
Guido van Rossumd8faa362007-04-27 19:54:29 +000040tmpname = os.path.join(TEMPDIR, "tmp.tar")
Serhiy Storchakad27b4552013-11-24 01:53:29 +020041dotlessname = os.path.join(TEMPDIR, "testtar")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000042
Christian Heimesc64a1a62019-09-25 16:30:20 +020043sha256_regtype = (
44 "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
45)
46sha256_sparse = (
47 "4f05a776071146756345ceee937b33fc5644f5a96b9780d1c7d6a32cdf164d7b"
48)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50
Serhiy Storchaka8b562922013-06-17 15:38:50 +030051class TarTest:
Guido van Rossumd8faa362007-04-27 19:54:29 +000052 tarname = tarname
Serhiy Storchaka8b562922013-06-17 15:38:50 +030053 suffix = ''
54 open = io.FileIO
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020055 taropen = tarfile.TarFile.taropen
Serhiy Storchaka8b562922013-06-17 15:38:50 +030056
57 @property
58 def mode(self):
59 return self.prefix + self.suffix
60
Hai Shia3ec3ad2020-05-19 06:02:57 +080061@support.requires_gzip()
Serhiy Storchaka8b562922013-06-17 15:38:50 +030062class GzipTest:
63 tarname = gzipname
64 suffix = 'gz'
65 open = gzip.GzipFile if gzip else None
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020066 taropen = tarfile.TarFile.gzopen
Serhiy Storchaka8b562922013-06-17 15:38:50 +030067
Hai Shia3ec3ad2020-05-19 06:02:57 +080068@support.requires_bz2()
Serhiy Storchaka8b562922013-06-17 15:38:50 +030069class Bz2Test:
70 tarname = bz2name
71 suffix = 'bz2'
72 open = bz2.BZ2File if bz2 else None
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020073 taropen = tarfile.TarFile.bz2open
Serhiy Storchaka8b562922013-06-17 15:38:50 +030074
Hai Shia3ec3ad2020-05-19 06:02:57 +080075@support.requires_lzma()
Serhiy Storchaka8b562922013-06-17 15:38:50 +030076class LzmaTest:
77 tarname = xzname
78 suffix = 'xz'
79 open = lzma.LZMAFile if lzma else None
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +020080 taropen = tarfile.TarFile.xzopen
Serhiy Storchaka8b562922013-06-17 15:38:50 +030081
82
83class ReadTest(TarTest):
84
85 prefix = "r:"
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
87 def setUp(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +030088 self.tar = tarfile.open(self.tarname, mode=self.mode,
89 encoding="iso8859-1")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000090
91 def tearDown(self):
92 self.tar.close()
93
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000094
Serhiy Storchaka8b562922013-06-17 15:38:50 +030095class UstarReadTest(ReadTest, unittest.TestCase):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Guido van Rossumd8faa362007-04-27 19:54:29 +000097 def test_fileobj_regular_file(self):
98 tarinfo = self.tar.getmember("ustar/regtype")
Lars Gustäbel7a919e92012-05-05 18:15:03 +020099 with self.tar.extractfile(tarinfo) as fobj:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000100 data = fobj.read()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300101 self.assertEqual(len(data), tarinfo.size,
102 "regular file extraction failed")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200103 self.assertEqual(sha256sum(data), sha256_regtype,
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000104 "regular file extraction failed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
Guido van Rossumd8faa362007-04-27 19:54:29 +0000106 def test_fileobj_readlines(self):
107 self.tar.extract("ustar/regtype", TEMPDIR)
108 tarinfo = self.tar.getmember("ustar/regtype")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000109 with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
110 lines1 = fobj1.readlines()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000111
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200112 with self.tar.extractfile(tarinfo) as fobj:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000113 fobj2 = io.TextIOWrapper(fobj)
114 lines2 = fobj2.readlines()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300115 self.assertEqual(lines1, lines2,
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000116 "fileobj.readlines() failed")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300117 self.assertEqual(len(lines2), 114,
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000118 "fileobj.readlines() failed")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300119 self.assertEqual(lines2[83],
120 "I will gladly admit that Python is not the fastest "
121 "running scripting language.\n",
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000122 "fileobj.readlines() failed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000123
Guido van Rossumd8faa362007-04-27 19:54:29 +0000124 def test_fileobj_iter(self):
125 self.tar.extract("ustar/regtype", TEMPDIR)
126 tarinfo = self.tar.getmember("ustar/regtype")
Victor Stinner4e86d5b2011-05-04 13:55:36 +0200127 with open(os.path.join(TEMPDIR, "ustar/regtype"), "r") as fobj1:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000128 lines1 = fobj1.readlines()
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200129 with self.tar.extractfile(tarinfo) as fobj2:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000130 lines2 = list(io.TextIOWrapper(fobj2))
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300131 self.assertEqual(lines1, lines2,
132 "fileobj.__iter__() failed")
Martin v. Löwisdf241532005-03-03 08:17:42 +0000133
Guido van Rossumd8faa362007-04-27 19:54:29 +0000134 def test_fileobj_seek(self):
135 self.tar.extract("ustar/regtype", TEMPDIR)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000136 with open(os.path.join(TEMPDIR, "ustar/regtype"), "rb") as fobj:
137 data = fobj.read()
Neal Norwitzf3396542005-10-28 05:52:22 +0000138
Guido van Rossumd8faa362007-04-27 19:54:29 +0000139 tarinfo = self.tar.getmember("ustar/regtype")
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +0200140 with self.tar.extractfile(tarinfo) as fobj:
141 text = fobj.read()
142 fobj.seek(0)
143 self.assertEqual(0, fobj.tell(),
144 "seek() to file's start failed")
145 fobj.seek(2048, 0)
146 self.assertEqual(2048, fobj.tell(),
147 "seek() to absolute position failed")
148 fobj.seek(-1024, 1)
149 self.assertEqual(1024, fobj.tell(),
150 "seek() to negative relative position failed")
151 fobj.seek(1024, 1)
152 self.assertEqual(2048, fobj.tell(),
153 "seek() to positive relative position failed")
154 s = fobj.read(10)
155 self.assertEqual(s, data[2048:2058],
156 "read() after seek failed")
157 fobj.seek(0, 2)
158 self.assertEqual(tarinfo.size, fobj.tell(),
159 "seek() to file's end failed")
160 self.assertEqual(fobj.read(), b"",
161 "read() at file's end did not return empty string")
162 fobj.seek(-tarinfo.size, 2)
163 self.assertEqual(0, fobj.tell(),
164 "relative seek() to file's end failed")
165 fobj.seek(512)
166 s1 = fobj.readlines()
167 fobj.seek(512)
168 s2 = fobj.readlines()
169 self.assertEqual(s1, s2,
170 "readlines() after seek failed")
171 fobj.seek(0)
172 self.assertEqual(len(fobj.readline()), fobj.tell(),
173 "tell() after readline() failed")
174 fobj.seek(512)
175 self.assertEqual(len(fobj.readline()) + 512, fobj.tell(),
176 "tell() after seek() and readline() failed")
177 fobj.seek(0)
178 line = fobj.readline()
179 self.assertEqual(fobj.read(), data[len(line):],
180 "read() after readline() failed")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000181
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200182 def test_fileobj_text(self):
183 with self.tar.extractfile("ustar/regtype") as fobj:
184 fobj = io.TextIOWrapper(fobj)
185 data = fobj.read().encode("iso8859-1")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200186 self.assertEqual(sha256sum(data), sha256_regtype)
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200187 try:
188 fobj.seek(100)
189 except AttributeError:
190 # Issue #13815: seek() complained about a missing
191 # flush() method.
192 self.fail("seeking failed in text mode")
193
Lars Gustäbel1b512722010-06-03 12:45:16 +0000194 # Test if symbolic and hard links are resolved by extractfile(). The
195 # test link members each point to a regular member whose data is
196 # supposed to be exported.
197 def _test_fileobj_link(self, lnktype, regtype):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300198 with self.tar.extractfile(lnktype) as a, \
199 self.tar.extractfile(regtype) as b:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000200 self.assertEqual(a.name, b.name)
Lars Gustäbel1b512722010-06-03 12:45:16 +0000201
202 def test_fileobj_link1(self):
203 self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
204
205 def test_fileobj_link2(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300206 self._test_fileobj_link("./ustar/linktest2/lnktype",
207 "ustar/linktest1/regtype")
Lars Gustäbel1b512722010-06-03 12:45:16 +0000208
209 def test_fileobj_symlink1(self):
210 self._test_fileobj_link("ustar/symtype", "ustar/regtype")
211
212 def test_fileobj_symlink2(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300213 self._test_fileobj_link("./ustar/linktest2/symtype",
214 "ustar/linktest1/regtype")
Lars Gustäbel1b512722010-06-03 12:45:16 +0000215
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +0200216 def test_issue14160(self):
217 self._test_fileobj_link("symtype2", "ustar/regtype")
218
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300219class GzipUstarReadTest(GzipTest, UstarReadTest):
220 pass
221
222class Bz2UstarReadTest(Bz2Test, UstarReadTest):
223 pass
224
225class LzmaUstarReadTest(LzmaTest, UstarReadTest):
226 pass
227
Guido van Rossumd8faa362007-04-27 19:54:29 +0000228
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200229class ListTest(ReadTest, unittest.TestCase):
230
231 # Override setUp to use default encoding (UTF-8)
232 def setUp(self):
233 self.tar = tarfile.open(self.tarname, mode=self.mode)
234
235 def test_list(self):
236 tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
237 with support.swap_attr(sys, 'stdout', tio):
238 self.tar.list(verbose=False)
239 out = tio.detach().getvalue()
240 self.assertIn(b'ustar/conttype', out)
241 self.assertIn(b'ustar/regtype', out)
242 self.assertIn(b'ustar/lnktype', out)
243 self.assertIn(b'ustar' + (b'/12345' * 40) + b'67/longname', out)
244 self.assertIn(b'./ustar/linktest2/symtype', out)
245 self.assertIn(b'./ustar/linktest2/lnktype', out)
246 # Make sure it puts trailing slash for directory
247 self.assertIn(b'ustar/dirtype/', out)
248 self.assertIn(b'ustar/dirtype-with-size/', out)
249 # Make sure it is able to print unencodable characters
Serhiy Storchaka162c4772014-02-19 18:44:12 +0200250 def conv(b):
251 s = b.decode(self.tar.encoding, 'surrogateescape')
252 return s.encode('ascii', 'backslashreplace')
253 self.assertIn(conv(b'ustar/umlauts-\xc4\xd6\xdc\xe4\xf6\xfc\xdf'), out)
254 self.assertIn(conv(b'misc/regtype-hpux-signed-chksum-'
255 b'\xc4\xd6\xdc\xe4\xf6\xfc\xdf'), out)
256 self.assertIn(conv(b'misc/regtype-old-v7-signed-chksum-'
257 b'\xc4\xd6\xdc\xe4\xf6\xfc\xdf'), out)
258 self.assertIn(conv(b'pax/bad-pax-\xe4\xf6\xfc'), out)
259 self.assertIn(conv(b'pax/hdrcharset-\xe4\xf6\xfc'), out)
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200260 # Make sure it prints files separated by one newline without any
261 # 'ls -l'-like accessories if verbose flag is not being used
262 # ...
263 # ustar/conttype
264 # ustar/regtype
265 # ...
266 self.assertRegex(out, br'ustar/conttype ?\r?\n'
267 br'ustar/regtype ?\r?\n')
268 # Make sure it does not print the source of link without verbose flag
269 self.assertNotIn(b'link to', out)
270 self.assertNotIn(b'->', out)
271
272 def test_list_verbose(self):
273 tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
274 with support.swap_attr(sys, 'stdout', tio):
275 self.tar.list(verbose=True)
276 out = tio.detach().getvalue()
277 # Make sure it prints files separated by one newline with 'ls -l'-like
278 # accessories if verbose flag is being used
279 # ...
280 # ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/conttype
281 # ?rw-r--r-- tarfile/tarfile 7011 2003-01-06 07:19:43 ustar/regtype
282 # ...
Serhiy Storchaka255493c2014-02-05 20:54:43 +0200283 self.assertRegex(out, (br'\?rw-r--r-- tarfile/tarfile\s+7011 '
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200284 br'\d{4}-\d\d-\d\d\s+\d\d:\d\d:\d\d '
285 br'ustar/\w+type ?\r?\n') * 2)
286 # Make sure it prints the source of link with verbose flag
287 self.assertIn(b'ustar/symtype -> regtype', out)
288 self.assertIn(b'./ustar/linktest2/symtype -> ../linktest1/regtype', out)
289 self.assertIn(b'./ustar/linktest2/lnktype link to '
290 b'./ustar/linktest1/regtype', out)
291 self.assertIn(b'gnu' + (b'/123' * 125) + b'/longlink link to gnu' +
292 (b'/123' * 125) + b'/longname', out)
293 self.assertIn(b'pax' + (b'/123' * 125) + b'/longlink link to pax' +
294 (b'/123' * 125) + b'/longname', out)
295
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +0300296 def test_list_members(self):
297 tio = io.TextIOWrapper(io.BytesIO(), 'ascii', newline='\n')
298 def members(tar):
299 for tarinfo in tar.getmembers():
300 if 'reg' in tarinfo.name:
301 yield tarinfo
302 with support.swap_attr(sys, 'stdout', tio):
303 self.tar.list(verbose=False, members=members(self.tar))
304 out = tio.detach().getvalue()
305 self.assertIn(b'ustar/regtype', out)
306 self.assertNotIn(b'ustar/conttype', out)
307
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200308
309class GzipListTest(GzipTest, ListTest):
310 pass
311
312
313class Bz2ListTest(Bz2Test, ListTest):
314 pass
315
316
317class LzmaListTest(LzmaTest, ListTest):
318 pass
319
320
Lars Gustäbel9520a432009-11-22 18:48:49 +0000321class CommonReadTest(ReadTest):
322
William Woodruffdd754ca2020-01-22 21:24:16 -0500323 def test_is_tarfile_erroneous(self):
324 with open(tmpname, "wb"):
325 pass
326
327 # is_tarfile works on filenames
328 self.assertFalse(tarfile.is_tarfile(tmpname))
329
330 # is_tarfile works on path-like objects
331 self.assertFalse(tarfile.is_tarfile(pathlib.Path(tmpname)))
332
333 # is_tarfile works on file objects
334 with open(tmpname, "rb") as fobj:
335 self.assertFalse(tarfile.is_tarfile(fobj))
336
337 # is_tarfile works on file-like objects
338 self.assertFalse(tarfile.is_tarfile(io.BytesIO(b"invalid")))
339
340 def test_is_tarfile_valid(self):
341 # is_tarfile works on filenames
342 self.assertTrue(tarfile.is_tarfile(self.tarname))
343
344 # is_tarfile works on path-like objects
345 self.assertTrue(tarfile.is_tarfile(pathlib.Path(self.tarname)))
346
347 # is_tarfile works on file objects
348 with open(self.tarname, "rb") as fobj:
349 self.assertTrue(tarfile.is_tarfile(fobj))
350
351 # is_tarfile works on file-like objects
352 with open(self.tarname, "rb") as fobj:
353 self.assertTrue(tarfile.is_tarfile(io.BytesIO(fobj.read())))
354
Lars Gustäbel9520a432009-11-22 18:48:49 +0000355 def test_empty_tarfile(self):
356 # Test for issue6123: Allow opening empty archives.
357 # This test checks if tarfile.open() is able to open an empty tar
358 # archive successfully. Note that an empty tar archive is not the
359 # same as an empty file!
Antoine Pitrou95f55602010-09-23 18:36:46 +0000360 with tarfile.open(tmpname, self.mode.replace("r", "w")):
361 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000362 try:
363 tar = tarfile.open(tmpname, self.mode)
364 tar.getnames()
365 except tarfile.ReadError:
366 self.fail("tarfile.open() failed on empty archive")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000367 else:
368 self.assertListEqual(tar.getmembers(), [])
369 finally:
370 tar.close()
Lars Gustäbel9520a432009-11-22 18:48:49 +0000371
Serhiy Storchakaf22fe0f2014-01-13 19:08:00 +0200372 def test_non_existent_tarfile(self):
373 # Test for issue11513: prevent non-existent gzipped tarfiles raising
374 # multiple exceptions.
375 with self.assertRaisesRegex(FileNotFoundError, "xxx"):
376 tarfile.open("xxx", self.mode)
377
Lars Gustäbel9520a432009-11-22 18:48:49 +0000378 def test_null_tarfile(self):
379 # Test for issue6123: Allow opening empty archives.
380 # This test guarantees that tarfile.open() does not treat an empty
381 # file as an empty tar archive.
Antoine Pitrou95f55602010-09-23 18:36:46 +0000382 with open(tmpname, "wb"):
383 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000384 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, self.mode)
385 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname)
386
387 def test_ignore_zeros(self):
388 # Test TarFile's ignore_zeros option.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +0200389 # generate 512 pseudorandom bytes
Victor Stinner87502dd2020-04-17 22:54:38 +0200390 data = Random(0).randbytes(512)
Lars Gustäbel9520a432009-11-22 18:48:49 +0000391 for char in (b'\0', b'a'):
392 # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
393 # are ignored correctly.
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300394 with self.open(tmpname, "w") as fobj:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000395 fobj.write(char * 1024)
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +0200396 tarinfo = tarfile.TarInfo("foo")
397 tarinfo.size = len(data)
398 fobj.write(tarinfo.tobuf())
399 fobj.write(data)
Lars Gustäbel9520a432009-11-22 18:48:49 +0000400
401 tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000402 try:
403 self.assertListEqual(tar.getnames(), ["foo"],
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300404 "ignore_zeros=True should have skipped the %r-blocks" %
405 char)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000406 finally:
407 tar.close()
Lars Gustäbel9520a432009-11-22 18:48:49 +0000408
Lars Gustäbel03572682015-07-06 09:27:24 +0200409 def test_premature_end_of_archive(self):
410 for size in (512, 600, 1024, 1200):
411 with tarfile.open(tmpname, "w:") as tar:
412 t = tarfile.TarInfo("foo")
413 t.size = 1024
414 tar.addfile(t, io.BytesIO(b"a" * 1024))
415
416 with open(tmpname, "r+b") as fobj:
417 fobj.truncate(size)
418
419 with tarfile.open(tmpname) as tar:
420 with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
421 for t in tar:
422 pass
423
424 with tarfile.open(tmpname) as tar:
425 t = tar.next()
426
427 with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
428 tar.extract(t, TEMPDIR)
429
430 with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
431 tar.extractfile(t).read()
Lars Gustäbel9520a432009-11-22 18:48:49 +0000432
Rishi5a8d1212020-07-15 13:51:00 +0200433 def test_length_zero_header(self):
434 # bpo-39017 (CVE-2019-20907): reading a zero-length header should fail
435 # with an exception
436 with self.assertRaisesRegex(tarfile.ReadError, "file could not be opened successfully"):
437 with tarfile.open(support.findfile('recursion.tar')) as tar:
438 pass
439
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300440class MiscReadTestBase(CommonReadTest):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300441 def requires_name_attribute(self):
442 pass
443
Thomas Woutersed03b412007-08-28 21:37:11 +0000444 def test_no_name_argument(self):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300445 self.requires_name_attribute()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000446 with open(self.tarname, "rb") as fobj:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300447 self.assertIsInstance(fobj.name, str)
448 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
449 self.assertIsInstance(tar.name, str)
450 self.assertEqual(tar.name, os.path.abspath(fobj.name))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000451
Thomas Woutersed03b412007-08-28 21:37:11 +0000452 def test_no_name_attribute(self):
Antoine Pitrou95f55602010-09-23 18:36:46 +0000453 with open(self.tarname, "rb") as fobj:
454 data = fobj.read()
Thomas Woutersed03b412007-08-28 21:37:11 +0000455 fobj = io.BytesIO(data)
456 self.assertRaises(AttributeError, getattr, fobj, "name")
457 tar = tarfile.open(fileobj=fobj, mode=self.mode)
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300458 self.assertIsNone(tar.name)
Thomas Woutersed03b412007-08-28 21:37:11 +0000459
460 def test_empty_name_attribute(self):
Antoine Pitrou95f55602010-09-23 18:36:46 +0000461 with open(self.tarname, "rb") as fobj:
462 data = fobj.read()
Thomas Woutersed03b412007-08-28 21:37:11 +0000463 fobj = io.BytesIO(data)
464 fobj.name = ""
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000465 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300466 self.assertIsNone(tar.name)
467
468 def test_int_name_attribute(self):
469 # Issue 21044: tarfile.open() should handle fileobj with an integer
470 # 'name' attribute.
471 fd = os.open(self.tarname, os.O_RDONLY)
472 with open(fd, 'rb') as fobj:
473 self.assertIsInstance(fobj.name, int)
474 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
475 self.assertIsNone(tar.name)
476
477 def test_bytes_name_attribute(self):
478 self.requires_name_attribute()
479 tarname = os.fsencode(self.tarname)
480 with open(tarname, 'rb') as fobj:
481 self.assertIsInstance(fobj.name, bytes)
482 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
483 self.assertIsInstance(tar.name, bytes)
484 self.assertEqual(tar.name, os.path.abspath(fobj.name))
Thomas Woutersed03b412007-08-28 21:37:11 +0000485
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200486 def test_pathlike_name(self):
487 tarname = pathlib.Path(self.tarname)
488 with tarfile.open(tarname, mode=self.mode) as tar:
489 self.assertIsInstance(tar.name, str)
490 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
491 with self.taropen(tarname) as tar:
492 self.assertIsInstance(tar.name, str)
493 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
494 with tarfile.TarFile.open(tarname, mode=self.mode) as tar:
495 self.assertIsInstance(tar.name, str)
496 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
497 if self.suffix == '':
498 with tarfile.TarFile(tarname, mode='r') as tar:
499 self.assertIsInstance(tar.name, str)
500 self.assertEqual(tar.name, os.path.abspath(os.fspath(tarname)))
501
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +0200502 def test_illegal_mode_arg(self):
503 with open(tmpname, 'wb'):
504 pass
505 with self.assertRaisesRegex(ValueError, 'mode must be '):
506 tar = self.taropen(tmpname, 'q')
507 with self.assertRaisesRegex(ValueError, 'mode must be '):
508 tar = self.taropen(tmpname, 'rw')
509 with self.assertRaisesRegex(ValueError, 'mode must be '):
510 tar = self.taropen(tmpname, '')
511
Christian Heimesd8654cf2007-12-02 15:22:16 +0000512 def test_fileobj_with_offset(self):
513 # Skip the first member and store values from the second member
514 # of the testtar.
515 tar = tarfile.open(self.tarname, mode=self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000516 try:
517 tar.next()
518 t = tar.next()
519 name = t.name
520 offset = t.offset
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200521 with tar.extractfile(t) as f:
522 data = f.read()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000523 finally:
524 tar.close()
Christian Heimesd8654cf2007-12-02 15:22:16 +0000525
526 # Open the testtar and seek to the offset of the second member.
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300527 with self.open(self.tarname) as fobj:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000528 fobj.seek(offset)
Christian Heimesd8654cf2007-12-02 15:22:16 +0000529
Antoine Pitrou95f55602010-09-23 18:36:46 +0000530 # Test if the tarfile starts with the second member.
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +0200531 with tar.open(self.tarname, mode="r:", fileobj=fobj) as tar:
532 t = tar.next()
533 self.assertEqual(t.name, name)
534 # Read to the end of fileobj and test if seeking back to the
535 # beginning works.
536 tar.getmembers()
537 self.assertEqual(tar.extractfile(t).read(), data,
538 "seek back did not work")
Christian Heimesd8654cf2007-12-02 15:22:16 +0000539
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540 def test_fail_comp(self):
541 # For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542 self.assertRaises(tarfile.ReadError, tarfile.open, tarname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000543 with open(tarname, "rb") as fobj:
544 self.assertRaises(tarfile.ReadError, tarfile.open,
545 fileobj=fobj, mode=self.mode)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000546
547 def test_v7_dirtype(self):
548 # Test old style dirtype member (bug #1336623):
549 # Old V7 tars create directory members using an AREGTYPE
550 # header with a "/" appended to the filename field.
551 tarinfo = self.tar.getmember("misc/dirtype-old-v7")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300552 self.assertEqual(tarinfo.type, tarfile.DIRTYPE,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 "v7 dirtype failed")
554
Christian Heimes126d29a2008-02-11 22:57:17 +0000555 def test_xstar_type(self):
556 # The xstar format stores extra atime and ctime fields inside the
557 # space reserved for the prefix field. The prefix field must be
558 # ignored in this case, otherwise it will mess up the name.
559 try:
560 self.tar.getmember("misc/regtype-xstar")
561 except KeyError:
562 self.fail("failed to find misc/regtype-xstar (mangled prefix?)")
563
Guido van Rossumd8faa362007-04-27 19:54:29 +0000564 def test_check_members(self):
565 for tarinfo in self.tar:
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300566 self.assertEqual(int(tarinfo.mtime), 0o7606136617,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000567 "wrong mtime for %s" % tarinfo.name)
568 if not tarinfo.name.startswith("ustar/"):
569 continue
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300570 self.assertEqual(tarinfo.uname, "tarfile",
Guido van Rossumd8faa362007-04-27 19:54:29 +0000571 "wrong uname for %s" % tarinfo.name)
572
573 def test_find_members(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300574 self.assertEqual(self.tar.getmembers()[-1].name, "misc/eof",
Guido van Rossumd8faa362007-04-27 19:54:29 +0000575 "could not find all members")
576
Brian Curtin74e45612010-07-09 15:58:59 +0000577 @unittest.skipUnless(hasattr(os, "link"),
578 "Missing hardlink implementation")
Hai Shia7f5d932020-08-04 00:41:24 +0800579 @os_helper.skip_unless_symlink
Guido van Rossumd8faa362007-04-27 19:54:29 +0000580 def test_extract_hardlink(self):
581 # Test hardlink extraction (e.g. bug #857297).
Serhiy Storchaka88339c42012-12-30 20:16:30 +0200582 with tarfile.open(tarname, errorlevel=1, encoding="iso8859-1") as tar:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000583 tar.extract("ustar/regtype", TEMPDIR)
Hai Shia7f5d932020-08-04 00:41:24 +0800584 self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/regtype"))
Neal Norwitzf3396542005-10-28 05:52:22 +0000585
Serhiy Storchaka88339c42012-12-30 20:16:30 +0200586 tar.extract("ustar/lnktype", TEMPDIR)
Hai Shia7f5d932020-08-04 00:41:24 +0800587 self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/lnktype"))
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000588 with open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb") as f:
589 data = f.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +0200590 self.assertEqual(sha256sum(data), sha256_regtype)
Neal Norwitzf3396542005-10-28 05:52:22 +0000591
Serhiy Storchaka88339c42012-12-30 20:16:30 +0200592 tar.extract("ustar/symtype", TEMPDIR)
Hai Shia7f5d932020-08-04 00:41:24 +0800593 self.addCleanup(os_helper.unlink, os.path.join(TEMPDIR, "ustar/symtype"))
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000594 with open(os.path.join(TEMPDIR, "ustar/symtype"), "rb") as f:
595 data = f.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +0200596 self.assertEqual(sha256sum(data), sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000597
Christian Heimesfaf2f632008-01-06 16:59:19 +0000598 def test_extractall(self):
599 # Test if extractall() correctly restores directory permissions
600 # and times (see issue1735).
Christian Heimesfaf2f632008-01-06 16:59:19 +0000601 tar = tarfile.open(tarname, encoding="iso8859-1")
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000602 DIR = os.path.join(TEMPDIR, "extractall")
603 os.mkdir(DIR)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000604 try:
605 directories = [t for t in tar if t.isdir()]
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000606 tar.extractall(DIR, directories)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000607 for tarinfo in directories:
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000608 path = os.path.join(DIR, tarinfo.name)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000609 if sys.platform != "win32":
610 # Win32 has no support for fine grained permissions.
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300611 self.assertEqual(tarinfo.mode & 0o777,
612 os.stat(path).st_mode & 0o777)
Victor Stinner26bfb5a2010-10-29 10:59:08 +0000613 def format_mtime(mtime):
614 if isinstance(mtime, float):
615 return "{} ({})".format(mtime, mtime.hex())
616 else:
617 return "{!r} (int)".format(mtime)
Victor Stinner14d8fe72010-10-29 11:02:06 +0000618 file_mtime = os.path.getmtime(path)
Victor Stinner26bfb5a2010-10-29 10:59:08 +0000619 errmsg = "tar mtime {0} != file time {1} of path {2!a}".format(
620 format_mtime(tarinfo.mtime),
621 format_mtime(file_mtime),
622 path)
623 self.assertEqual(tarinfo.mtime, file_mtime, errmsg)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000624 finally:
625 tar.close()
Hai Shia7f5d932020-08-04 00:41:24 +0800626 os_helper.rmtree(DIR)
Christian Heimesfaf2f632008-01-06 16:59:19 +0000627
Martin v. Löwis16f344d2010-11-01 21:39:13 +0000628 def test_extract_directory(self):
629 dirtype = "ustar/dirtype"
Martin v. Löwisbe647e22010-11-01 22:08:46 +0000630 DIR = os.path.join(TEMPDIR, "extractdir")
631 os.mkdir(DIR)
632 try:
633 with tarfile.open(tarname, encoding="iso8859-1") as tar:
634 tarinfo = tar.getmember(dirtype)
635 tar.extract(tarinfo, path=DIR)
636 extracted = os.path.join(DIR, dirtype)
637 self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime)
638 if sys.platform != "win32":
639 self.assertEqual(os.stat(extracted).st_mode & 0o777, 0o755)
640 finally:
Hai Shia7f5d932020-08-04 00:41:24 +0800641 os_helper.rmtree(DIR)
Martin v. Löwis16f344d2010-11-01 21:39:13 +0000642
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200643 def test_extractall_pathlike_name(self):
644 DIR = pathlib.Path(TEMPDIR) / "extractall"
Hai Shia7f5d932020-08-04 00:41:24 +0800645 with os_helper.temp_dir(DIR), \
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200646 tarfile.open(tarname, encoding="iso8859-1") as tar:
647 directories = [t for t in tar if t.isdir()]
648 tar.extractall(DIR, directories)
649 for tarinfo in directories:
650 path = DIR / tarinfo.name
651 self.assertEqual(os.path.getmtime(path), tarinfo.mtime)
652
653 def test_extract_pathlike_name(self):
654 dirtype = "ustar/dirtype"
655 DIR = pathlib.Path(TEMPDIR) / "extractall"
Hai Shia7f5d932020-08-04 00:41:24 +0800656 with os_helper.temp_dir(DIR), \
Serhiy Storchakac45cd162017-03-08 10:32:44 +0200657 tarfile.open(tarname, encoding="iso8859-1") as tar:
658 tarinfo = tar.getmember(dirtype)
659 tar.extract(tarinfo, path=DIR)
660 extracted = DIR / dirtype
661 self.assertEqual(os.path.getmtime(extracted), tarinfo.mtime)
662
Lars Gustäbelb7f09232009-11-23 15:48:33 +0000663 def test_init_close_fobj(self):
664 # Issue #7341: Close the internal file object in the TarFile
665 # constructor in case of an error. For the test we rely on
666 # the fact that opening an empty file raises a ReadError.
667 empty = os.path.join(TEMPDIR, "empty")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000668 with open(empty, "wb") as fobj:
669 fobj.write(b"")
Lars Gustäbelb7f09232009-11-23 15:48:33 +0000670
671 try:
672 tar = object.__new__(tarfile.TarFile)
673 try:
674 tar.__init__(empty)
675 except tarfile.ReadError:
676 self.assertTrue(tar.fileobj.closed)
677 else:
678 self.fail("ReadError not raised")
679 finally:
Hai Shia7f5d932020-08-04 00:41:24 +0800680 os_helper.unlink(empty)
Lars Gustäbelb7f09232009-11-23 15:48:33 +0000681
Serhiy Storchaka263fab92013-05-09 14:22:26 +0300682 def test_parallel_iteration(self):
683 # Issue #16601: Restarting iteration over tarfile continued
684 # from where it left off.
685 with tarfile.open(self.tarname) as tar:
686 for m1, m2 in zip(tar, tar):
687 self.assertEqual(m1.offset, m2.offset)
688 self.assertEqual(m1.get_info(), m2.get_info())
689
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300690class MiscReadTest(MiscReadTestBase, unittest.TestCase):
691 test_fail_comp = None
Guido van Rossumd8faa362007-04-27 19:54:29 +0000692
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300693class GzipMiscReadTest(GzipTest, MiscReadTestBase, unittest.TestCase):
Serhiy Storchakaf22fe0f2014-01-13 19:08:00 +0200694 pass
Guido van Rossumd8faa362007-04-27 19:54:29 +0000695
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300696class Bz2MiscReadTest(Bz2Test, MiscReadTestBase, unittest.TestCase):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300697 def requires_name_attribute(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300698 self.skipTest("BZ2File have no name attribute")
699
700class LzmaMiscReadTest(LzmaTest, MiscReadTestBase, unittest.TestCase):
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +0300701 def requires_name_attribute(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300702 self.skipTest("LZMAFile have no name attribute")
703
704
705class StreamReadTest(CommonReadTest, unittest.TestCase):
706
707 prefix="r|"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000708
Lars Gustäbeldd071042011-02-23 11:42:22 +0000709 def test_read_through(self):
710 # Issue #11224: A poorly designed _FileInFile.read() method
711 # caused seeking errors with stream tar files.
712 for tarinfo in self.tar:
713 if not tarinfo.isreg():
714 continue
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200715 with self.tar.extractfile(tarinfo) as fobj:
716 while True:
717 try:
718 buf = fobj.read(512)
719 except tarfile.StreamError:
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300720 self.fail("simple read-through using "
721 "TarFile.extractfile() failed")
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200722 if not buf:
723 break
Lars Gustäbeldd071042011-02-23 11:42:22 +0000724
Guido van Rossumd8faa362007-04-27 19:54:29 +0000725 def test_fileobj_regular_file(self):
726 tarinfo = self.tar.next() # get "regtype" (can't use getmember)
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200727 with self.tar.extractfile(tarinfo) as fobj:
728 data = fobj.read()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300729 self.assertEqual(len(data), tarinfo.size,
730 "regular file extraction failed")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200731 self.assertEqual(sha256sum(data), sha256_regtype,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732 "regular file extraction failed")
733
734 def test_provoke_stream_error(self):
735 tarinfos = self.tar.getmembers()
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200736 with self.tar.extractfile(tarinfos[0]) as f: # read the first member
737 self.assertRaises(tarfile.StreamError, f.read)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000738
Guido van Rossumd8faa362007-04-27 19:54:29 +0000739 def test_compare_members(self):
740 tar1 = tarfile.open(tarname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000741 try:
742 tar2 = self.tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000743
Antoine Pitrou95f55602010-09-23 18:36:46 +0000744 while True:
745 t1 = tar1.next()
746 t2 = tar2.next()
747 if t1 is None:
748 break
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300749 self.assertIsNotNone(t2, "stream.next() failed.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000750
Antoine Pitrou95f55602010-09-23 18:36:46 +0000751 if t2.islnk() or t2.issym():
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300752 with self.assertRaises(tarfile.StreamError):
753 tar2.extractfile(t2)
Antoine Pitrou95f55602010-09-23 18:36:46 +0000754 continue
Guido van Rossumd8faa362007-04-27 19:54:29 +0000755
Antoine Pitrou95f55602010-09-23 18:36:46 +0000756 v1 = tar1.extractfile(t1)
757 v2 = tar2.extractfile(t2)
758 if v1 is None:
759 continue
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300760 self.assertIsNotNone(v2, "stream.extractfile() failed")
761 self.assertEqual(v1.read(), v2.read(),
762 "stream extraction failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +0000763 finally:
764 tar1.close()
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000765
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300766class GzipStreamReadTest(GzipTest, StreamReadTest):
767 pass
Thomas Wouters89f507f2006-12-13 04:49:30 +0000768
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300769class Bz2StreamReadTest(Bz2Test, StreamReadTest):
770 pass
Thomas Wouterscf297e42007-02-23 15:07:44 +0000771
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300772class LzmaStreamReadTest(LzmaTest, StreamReadTest):
773 pass
774
775
776class DetectReadTest(TarTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777 def _testfunc_file(self, name, mode):
778 try:
Antoine Pitrou95f55602010-09-23 18:36:46 +0000779 tar = tarfile.open(name, mode)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000780 except tarfile.ReadError as e:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781 self.fail()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000782 else:
783 tar.close()
Thomas Wouterscf297e42007-02-23 15:07:44 +0000784
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785 def _testfunc_fileobj(self, name, mode):
786 try:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000787 with open(name, "rb") as f:
788 tar = tarfile.open(name, mode, fileobj=f)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000789 except tarfile.ReadError as e:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000790 self.fail()
Antoine Pitrou95f55602010-09-23 18:36:46 +0000791 else:
792 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793
794 def _test_modes(self, testfunc):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300795 if self.suffix:
796 with self.assertRaises(tarfile.ReadError):
797 tarfile.open(tarname, mode="r:" + self.suffix)
798 with self.assertRaises(tarfile.ReadError):
799 tarfile.open(tarname, mode="r|" + self.suffix)
800 with self.assertRaises(tarfile.ReadError):
801 tarfile.open(self.tarname, mode="r:")
802 with self.assertRaises(tarfile.ReadError):
803 tarfile.open(self.tarname, mode="r|")
804 testfunc(self.tarname, "r")
805 testfunc(self.tarname, "r:" + self.suffix)
806 testfunc(self.tarname, "r:*")
807 testfunc(self.tarname, "r|" + self.suffix)
808 testfunc(self.tarname, "r|*")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100809
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 def test_detect_file(self):
811 self._test_modes(self._testfunc_file)
812
813 def test_detect_fileobj(self):
814 self._test_modes(self._testfunc_fileobj)
815
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300816class GzipDetectReadTest(GzipTest, DetectReadTest):
817 pass
818
819class Bz2DetectReadTest(Bz2Test, DetectReadTest):
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100820 def test_detect_stream_bz2(self):
821 # Originally, tarfile's stream detection looked for the string
822 # "BZh91" at the start of the file. This is incorrect because
Victor Stinner8c663fd2017-11-08 14:44:44 -0800823 # the '9' represents the blocksize (900,000 bytes). If the file was
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100824 # compressed using another blocksize autodetection fails.
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100825 with open(tarname, "rb") as fobj:
826 data = fobj.read()
827
Victor Stinner8c663fd2017-11-08 14:44:44 -0800828 # Compress with blocksize 100,000 bytes, the file starts with "BZh11".
Lars Gustäbeled1ac582011-12-06 12:56:38 +0100829 with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
830 fobj.write(data)
831
832 self._testfunc_file(tmpname, "r|*")
833
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300834class LzmaDetectReadTest(LzmaTest, DetectReadTest):
835 pass
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300837
838class MemberReadTest(ReadTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839
840 def _test_member(self, tarinfo, chksum=None, **kwargs):
841 if chksum is not None:
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300842 with self.tar.extractfile(tarinfo) as f:
Christian Heimesc64a1a62019-09-25 16:30:20 +0200843 self.assertEqual(sha256sum(f.read()), chksum,
844 "wrong sha256sum for %s" % tarinfo.name)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000846 kwargs["mtime"] = 0o7606136617
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 kwargs["uid"] = 1000
848 kwargs["gid"] = 100
849 if "old-v7" not in tarinfo.name:
850 # V7 tar can't handle alphabetic owners.
851 kwargs["uname"] = "tarfile"
852 kwargs["gname"] = "tarfile"
853 for k, v in kwargs.items():
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300854 self.assertEqual(getattr(tarinfo, k), v,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855 "wrong value in %s field of %s" % (k, tarinfo.name))
856
857 def test_find_regtype(self):
858 tarinfo = self.tar.getmember("ustar/regtype")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200859 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860
861 def test_find_conttype(self):
862 tarinfo = self.tar.getmember("ustar/conttype")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200863 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864
865 def test_find_dirtype(self):
866 tarinfo = self.tar.getmember("ustar/dirtype")
867 self._test_member(tarinfo, size=0)
868
869 def test_find_dirtype_with_size(self):
870 tarinfo = self.tar.getmember("ustar/dirtype-with-size")
871 self._test_member(tarinfo, size=255)
872
873 def test_find_lnktype(self):
874 tarinfo = self.tar.getmember("ustar/lnktype")
875 self._test_member(tarinfo, size=0, linkname="ustar/regtype")
876
877 def test_find_symtype(self):
878 tarinfo = self.tar.getmember("ustar/symtype")
879 self._test_member(tarinfo, size=0, linkname="regtype")
880
881 def test_find_blktype(self):
882 tarinfo = self.tar.getmember("ustar/blktype")
883 self._test_member(tarinfo, size=0, devmajor=3, devminor=0)
884
885 def test_find_chrtype(self):
886 tarinfo = self.tar.getmember("ustar/chrtype")
887 self._test_member(tarinfo, size=0, devmajor=1, devminor=3)
888
889 def test_find_fifotype(self):
890 tarinfo = self.tar.getmember("ustar/fifotype")
891 self._test_member(tarinfo, size=0)
892
893 def test_find_sparse(self):
894 tarinfo = self.tar.getmember("ustar/sparse")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200895 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000897 def test_find_gnusparse(self):
898 tarinfo = self.tar.getmember("gnu/sparse")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200899 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000900
901 def test_find_gnusparse_00(self):
902 tarinfo = self.tar.getmember("gnu/sparse-0.0")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200903 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000904
905 def test_find_gnusparse_01(self):
906 tarinfo = self.tar.getmember("gnu/sparse-0.1")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200907 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000908
909 def test_find_gnusparse_10(self):
910 tarinfo = self.tar.getmember("gnu/sparse-1.0")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200911 self._test_member(tarinfo, size=86016, chksum=sha256_sparse)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000912
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 def test_find_umlauts(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300914 tarinfo = self.tar.getmember("ustar/umlauts-"
915 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200916 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917
918 def test_find_ustar_longname(self):
919 name = "ustar/" + "12345/" * 39 + "1234567/longname"
Benjamin Peterson577473f2010-01-19 00:09:57 +0000920 self.assertIn(name, self.tar.getnames())
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921
922 def test_find_regtype_oldv7(self):
923 tarinfo = self.tar.getmember("misc/regtype-old-v7")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200924 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925
926 def test_find_pax_umlauts(self):
Antoine Pitrouab58b5f2010-09-23 19:39:35 +0000927 self.tar.close()
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300928 self.tar = tarfile.open(self.tarname, mode=self.mode,
929 encoding="iso8859-1")
930 tarinfo = self.tar.getmember("pax/umlauts-"
931 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Christian Heimesc64a1a62019-09-25 16:30:20 +0200932 self._test_member(tarinfo, size=7011, chksum=sha256_regtype)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933
934
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300935class LongnameTest:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936
937 def test_read_longname(self):
938 # Test reading of longname (bug #1471427).
Guido van Rossume7ba4952007-06-06 23:52:48 +0000939 longname = self.subdir + "/" + "123/" * 125 + "longname"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000940 try:
Guido van Rossume7ba4952007-06-06 23:52:48 +0000941 tarinfo = self.tar.getmember(longname)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942 except KeyError:
943 self.fail("longname not found")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300944 self.assertNotEqual(tarinfo.type, tarfile.DIRTYPE,
945 "read longname as dirtype")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946
947 def test_read_longlink(self):
948 longname = self.subdir + "/" + "123/" * 125 + "longname"
949 longlink = self.subdir + "/" + "123/" * 125 + "longlink"
950 try:
951 tarinfo = self.tar.getmember(longlink)
952 except KeyError:
953 self.fail("longlink not found")
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300954 self.assertEqual(tarinfo.linkname, longname, "linkname wrong")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
956 def test_truncated_longname(self):
957 longname = self.subdir + "/" + "123/" * 125 + "longname"
958 tarinfo = self.tar.getmember(longname)
959 offset = tarinfo.offset
960 self.tar.fileobj.seek(offset)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000961 fobj = io.BytesIO(self.tar.fileobj.read(3 * 512))
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300962 with self.assertRaises(tarfile.ReadError):
963 tarfile.open(name="foo.tar", fileobj=fobj)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964
Guido van Rossume7ba4952007-06-06 23:52:48 +0000965 def test_header_offset(self):
966 # Test if the start offset of the TarInfo object includes
967 # the preceding extended header.
968 longname = self.subdir + "/" + "123/" * 125 + "longname"
969 offset = self.tar.getmember(longname).offset
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000970 with open(tarname, "rb") as fobj:
971 fobj.seek(offset)
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300972 tarinfo = tarfile.TarInfo.frombuf(fobj.read(512),
973 "iso8859-1", "strict")
Antoine Pitroue1eca4e2010-10-29 23:49:49 +0000974 self.assertEqual(tarinfo.type, self.longnametype)
Guido van Rossume7ba4952007-06-06 23:52:48 +0000975
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300977class GNUReadTest(LongnameTest, ReadTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000978
979 subdir = "gnu"
Guido van Rossume7ba4952007-06-06 23:52:48 +0000980 longnametype = tarfile.GNUTYPE_LONGNAME
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000982 # Since 3.2 tarfile is supposed to accurately restore sparse members and
983 # produce files with holes. This is what we actually want to test here.
984 # Unfortunately, not all platforms/filesystems support sparse files, and
985 # even on platforms that do it is non-trivial to make reliable assertions
986 # about holes in files. Therefore, we first do one basic test which works
987 # an all platforms, and after that a test that will work only on
988 # platforms/filesystems that prove to support sparse files.
989 def _test_sparse_file(self, name):
990 self.tar.extract(name, TEMPDIR)
991 filename = os.path.join(TEMPDIR, name)
992 with open(filename, "rb") as fobj:
993 data = fobj.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +0200994 self.assertEqual(sha256sum(data), sha256_sparse,
995 "wrong sha256sum for %s" % name)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000996
997 if self._fs_supports_holes():
998 s = os.stat(filename)
Serhiy Storchaka8b562922013-06-17 15:38:50 +0300999 self.assertLess(s.st_blocks * 512, s.st_size)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001000
1001 def test_sparse_file_old(self):
1002 self._test_sparse_file("gnu/sparse")
1003
1004 def test_sparse_file_00(self):
1005 self._test_sparse_file("gnu/sparse-0.0")
1006
1007 def test_sparse_file_01(self):
1008 self._test_sparse_file("gnu/sparse-0.1")
1009
1010 def test_sparse_file_10(self):
1011 self._test_sparse_file("gnu/sparse-1.0")
1012
1013 @staticmethod
1014 def _fs_supports_holes():
1015 # Return True if the platform knows the st_blocks stat attribute and
1016 # uses st_blocks units of 512 bytes, and if the filesystem is able to
Victor Stinnerb2385452019-01-21 10:24:12 +01001017 # store holes of 4 KiB in files.
1018 #
1019 # The function returns False if page size is larger than 4 KiB.
1020 # For example, ppc64 uses pages of 64 KiB.
Victor Stinner9c3de4a2011-08-17 20:49:41 +02001021 if sys.platform.startswith("linux"):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001022 # Linux evidentially has 512 byte st_blocks units.
1023 name = os.path.join(TEMPDIR, "sparse-test")
1024 with open(name, "wb") as fobj:
Victor Stinnerb2385452019-01-21 10:24:12 +01001025 # Seek to "punch a hole" of 4 KiB
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001026 fobj.seek(4096)
Victor Stinnerb2385452019-01-21 10:24:12 +01001027 fobj.write(b'x' * 4096)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001028 fobj.truncate()
1029 s = os.stat(name)
Hai Shia7f5d932020-08-04 00:41:24 +08001030 os_helper.unlink(name)
Victor Stinnerb2385452019-01-21 10:24:12 +01001031 return (s.st_blocks * 512 < s.st_size)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001032 else:
1033 return False
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034
1035
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001036class PaxReadTest(LongnameTest, ReadTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037
1038 subdir = "pax"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001039 longnametype = tarfile.XHDTYPE
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040
Guido van Rossume7ba4952007-06-06 23:52:48 +00001041 def test_pax_global_headers(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001042 tar = tarfile.open(tarname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001043 try:
1044 tarinfo = tar.getmember("pax/regtype1")
1045 self.assertEqual(tarinfo.uname, "foo")
1046 self.assertEqual(tarinfo.gname, "bar")
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001047 self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"),
1048 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Guido van Rossume7ba4952007-06-06 23:52:48 +00001049
Antoine Pitrou95f55602010-09-23 18:36:46 +00001050 tarinfo = tar.getmember("pax/regtype2")
1051 self.assertEqual(tarinfo.uname, "")
1052 self.assertEqual(tarinfo.gname, "bar")
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001053 self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"),
1054 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055
Antoine Pitrou95f55602010-09-23 18:36:46 +00001056 tarinfo = tar.getmember("pax/regtype3")
1057 self.assertEqual(tarinfo.uname, "tarfile")
1058 self.assertEqual(tarinfo.gname, "tarfile")
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001059 self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"),
1060 "\xc4\xd6\xdc\xe4\xf6\xfc\xdf")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001061 finally:
1062 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001063
1064 def test_pax_number_fields(self):
1065 # All following number fields are read from the pax header.
1066 tar = tarfile.open(tarname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001067 try:
1068 tarinfo = tar.getmember("pax/regtype4")
1069 self.assertEqual(tarinfo.size, 7011)
1070 self.assertEqual(tarinfo.uid, 123)
1071 self.assertEqual(tarinfo.gid, 123)
1072 self.assertEqual(tarinfo.mtime, 1041808783.0)
1073 self.assertEqual(type(tarinfo.mtime), float)
1074 self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0)
1075 self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0)
1076 finally:
1077 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001078
1079
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001080class WriteTestBase(TarTest):
Georg Brandlf08a9dd2008-06-10 16:57:31 +00001081 # Put all write tests in here that are supposed to be tested
1082 # in all possible mode combinations.
1083
1084 def test_fileobj_no_close(self):
1085 fobj = io.BytesIO()
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +02001086 with tarfile.open(fileobj=fobj, mode=self.mode) as tar:
1087 tar.addfile(tarfile.TarInfo("foo"))
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001088 self.assertFalse(fobj.closed, "external fileobjs must never closed")
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001089 # Issue #20238: Incomplete gzip output with mode="w:gz"
1090 data = fobj.getvalue()
1091 del tar
1092 support.gc_collect()
1093 self.assertFalse(fobj.closed)
1094 self.assertEqual(data, fobj.getvalue())
Georg Brandlf08a9dd2008-06-10 16:57:31 +00001095
Lars Gustäbel20703c62015-05-27 12:53:44 +02001096 def test_eof_marker(self):
1097 # Make sure an end of archive marker is written (two zero blocks).
1098 # tarfile insists on aligning archives to a 20 * 512 byte recordsize.
1099 # So, we create an archive that has exactly 10240 bytes without the
1100 # marker, and has 20480 bytes once the marker is written.
1101 with tarfile.open(tmpname, self.mode) as tar:
1102 t = tarfile.TarInfo("foo")
1103 t.size = tarfile.RECORDSIZE - tarfile.BLOCKSIZE
1104 tar.addfile(t, io.BytesIO(b"a" * t.size))
1105
1106 with self.open(tmpname, "rb") as fobj:
1107 self.assertEqual(len(fobj.read()), tarfile.RECORDSIZE * 2)
1108
Georg Brandlf08a9dd2008-06-10 16:57:31 +00001109
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001110class WriteTest(WriteTestBase, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001111
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001112 prefix = "w:"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113
1114 def test_100_char_name(self):
1115 # The name field in a tar header stores strings of at most 100 chars.
1116 # If a string is shorter than 100 chars it has to be padded with '\0',
1117 # which implies that a string of exactly 100 chars is stored without
1118 # a trailing '\0'.
1119 name = "0123456789" * 10
1120 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001121 try:
1122 t = tarfile.TarInfo(name)
1123 tar.addfile(t)
1124 finally:
1125 tar.close()
Thomas Wouterscf297e42007-02-23 15:07:44 +00001126
Guido van Rossumd8faa362007-04-27 19:54:29 +00001127 tar = tarfile.open(tmpname)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001128 try:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001129 self.assertEqual(tar.getnames()[0], name,
Antoine Pitrou95f55602010-09-23 18:36:46 +00001130 "failed to store 100 char filename")
1131 finally:
1132 tar.close()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001133
Guido van Rossumd8faa362007-04-27 19:54:29 +00001134 def test_tar_size(self):
1135 # Test for bug #1013882.
1136 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001137 try:
1138 path = os.path.join(TEMPDIR, "file")
1139 with open(path, "wb") as fobj:
1140 fobj.write(b"aaa")
1141 tar.add(path)
1142 finally:
1143 tar.close()
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001144 self.assertGreater(os.path.getsize(tmpname), 0,
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145 "tarfile is empty")
Thomas Wouters89f507f2006-12-13 04:49:30 +00001146
Guido van Rossumd8faa362007-04-27 19:54:29 +00001147 # The test_*_size tests test for bug #1167128.
1148 def test_file_size(self):
1149 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001150 try:
1151 path = os.path.join(TEMPDIR, "file")
1152 with open(path, "wb"):
1153 pass
1154 tarinfo = tar.gettarinfo(path)
1155 self.assertEqual(tarinfo.size, 0)
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001156
Antoine Pitrou95f55602010-09-23 18:36:46 +00001157 with open(path, "wb") as fobj:
1158 fobj.write(b"aaa")
1159 tarinfo = tar.gettarinfo(path)
1160 self.assertEqual(tarinfo.size, 3)
1161 finally:
1162 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001163
1164 def test_directory_size(self):
1165 path = os.path.join(TEMPDIR, "directory")
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001166 os.mkdir(path)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167 try:
1168 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001169 try:
1170 tarinfo = tar.gettarinfo(path)
1171 self.assertEqual(tarinfo.size, 0)
1172 finally:
1173 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001174 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001175 os_helper.rmdir(path)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001177 # mock the following:
1178 # os.listdir: so we know that files are in the wrong order
Bernhard M. Wiedemann4ad703b2018-02-06 19:08:53 +01001179 def test_ordered_recursion(self):
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001180 path = os.path.join(TEMPDIR, "directory")
1181 os.mkdir(path)
1182 open(os.path.join(path, "1"), "a").close()
1183 open(os.path.join(path, "2"), "a").close()
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001184 try:
1185 tar = tarfile.open(tmpname, self.mode)
1186 try:
Bernhard M. Wiedemann4ad703b2018-02-06 19:08:53 +01001187 with unittest.mock.patch('os.listdir') as mock_listdir:
1188 mock_listdir.return_value = ["2", "1"]
1189 tar.add(path)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001190 paths = []
1191 for m in tar.getmembers():
1192 paths.append(os.path.split(m.name)[-1])
1193 self.assertEqual(paths, ["directory", "1", "2"]);
1194 finally:
1195 tar.close()
1196 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001197 os_helper.unlink(os.path.join(path, "1"))
1198 os_helper.unlink(os.path.join(path, "2"))
1199 os_helper.rmdir(path)
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001200
Serhiy Storchakac45cd162017-03-08 10:32:44 +02001201 def test_gettarinfo_pathlike_name(self):
1202 with tarfile.open(tmpname, self.mode) as tar:
1203 path = pathlib.Path(TEMPDIR) / "file"
1204 with open(path, "wb") as fobj:
1205 fobj.write(b"aaa")
1206 tarinfo = tar.gettarinfo(path)
1207 tarinfo2 = tar.gettarinfo(os.fspath(path))
1208 self.assertIsInstance(tarinfo.name, str)
1209 self.assertEqual(tarinfo.name, tarinfo2.name)
1210 self.assertEqual(tarinfo.size, 3)
1211
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001212 @unittest.skipUnless(hasattr(os, "link"),
1213 "Missing hardlink implementation")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001214 def test_link_size(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001215 link = os.path.join(TEMPDIR, "link")
1216 target = os.path.join(TEMPDIR, "link_target")
1217 with open(target, "wb") as fobj:
1218 fobj.write(b"aaa")
xdegayed7d4fea2017-11-12 18:02:06 +01001219 try:
1220 os.link(target, link)
1221 except PermissionError as e:
1222 self.skipTest('os.link(): %s' % e)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001223 try:
1224 tar = tarfile.open(tmpname, self.mode)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 try:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001226 # Record the link target in the inodes list.
1227 tar.gettarinfo(target)
1228 tarinfo = tar.gettarinfo(link)
1229 self.assertEqual(tarinfo.size, 0)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001230 finally:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001231 tar.close()
1232 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001233 os_helper.unlink(target)
1234 os_helper.unlink(link)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001235
Hai Shia7f5d932020-08-04 00:41:24 +08001236 @os_helper.skip_unless_symlink
Guido van Rossumd8faa362007-04-27 19:54:29 +00001237 def test_symlink_size(self):
Brian Curtind40e6f72010-07-08 21:39:08 +00001238 path = os.path.join(TEMPDIR, "symlink")
1239 os.symlink("link_target", path)
1240 try:
1241 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001242 try:
1243 tarinfo = tar.gettarinfo(path)
1244 self.assertEqual(tarinfo.size, 0)
1245 finally:
1246 tar.close()
Brian Curtind40e6f72010-07-08 21:39:08 +00001247 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001248 os_helper.unlink(path)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001249
1250 def test_add_self(self):
1251 # Test for #1257255.
1252 dstname = os.path.abspath(tmpname)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001254 try:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001255 self.assertEqual(tar.name, dstname,
1256 "archive name must be absolute")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001257 tar.add(dstname)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001258 self.assertEqual(tar.getnames(), [],
1259 "added the archive to itself")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001260
Hai Shia7f5d932020-08-04 00:41:24 +08001261 with os_helper.change_cwd(TEMPDIR):
Serhiy Storchaka2a23adf2015-09-06 14:13:25 +03001262 tar.add(dstname)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001263 self.assertEqual(tar.getnames(), [],
1264 "added the archive to itself")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001265 finally:
1266 tar.close()
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001267
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001268 def test_filter(self):
1269 tempdir = os.path.join(TEMPDIR, "filter")
1270 os.mkdir(tempdir)
1271 try:
1272 for name in ("foo", "bar", "baz"):
1273 name = os.path.join(tempdir, name)
Hai Shia7f5d932020-08-04 00:41:24 +08001274 os_helper.create_empty_file(name)
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001275
1276 def filter(tarinfo):
1277 if os.path.basename(tarinfo.name) == "bar":
1278 return
1279 tarinfo.uid = 123
1280 tarinfo.uname = "foo"
1281 return tarinfo
1282
1283 tar = tarfile.open(tmpname, self.mode, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001284 try:
1285 tar.add(tempdir, arcname="empty_dir", filter=filter)
1286 finally:
1287 tar.close()
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001288
Raymond Hettingera63a3122011-01-26 20:34:14 +00001289 # Verify that filter is a keyword-only argument
1290 with self.assertRaises(TypeError):
1291 tar.add(tempdir, "empty_dir", True, None, filter)
1292
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001293 tar = tarfile.open(tmpname, "r")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001294 try:
1295 for tarinfo in tar:
1296 self.assertEqual(tarinfo.uid, 123)
1297 self.assertEqual(tarinfo.uname, "foo")
1298 self.assertEqual(len(tar.getmembers()), 3)
1299 finally:
1300 tar.close()
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001301 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001302 os_helper.rmtree(tempdir)
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001303
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001304 # Guarantee that stored pathnames are not modified. Don't
1305 # remove ./ or ../ or double slashes. Still make absolute
1306 # pathnames relative.
1307 # For details see bug #6054.
1308 def _test_pathname(self, path, cmp_path=None, dir=False):
1309 # Create a tarfile with an empty member named path
1310 # and compare the stored name with the original.
1311 foo = os.path.join(TEMPDIR, "foo")
1312 if not dir:
Hai Shia7f5d932020-08-04 00:41:24 +08001313 os_helper.create_empty_file(foo)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001314 else:
1315 os.mkdir(foo)
1316
1317 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001318 try:
1319 tar.add(foo, arcname=path)
1320 finally:
1321 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001322
1323 tar = tarfile.open(tmpname, "r")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001324 try:
1325 t = tar.next()
1326 finally:
1327 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001328
1329 if not dir:
Hai Shia7f5d932020-08-04 00:41:24 +08001330 os_helper.unlink(foo)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001331 else:
Hai Shia7f5d932020-08-04 00:41:24 +08001332 os_helper.rmdir(foo)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001333
1334 self.assertEqual(t.name, cmp_path or path.replace(os.sep, "/"))
1335
Senthil Kumaranbe5dbeb2011-04-30 06:09:51 +08001336
Hai Shia7f5d932020-08-04 00:41:24 +08001337 @os_helper.skip_unless_symlink
Senthil Kumaran123932f2011-04-28 15:38:12 +08001338 def test_extractall_symlinks(self):
1339 # Test if extractall works properly when tarfile contains symlinks
1340 tempdir = os.path.join(TEMPDIR, "testsymlinks")
1341 temparchive = os.path.join(TEMPDIR, "testsymlinks.tar")
1342 os.mkdir(tempdir)
1343 try:
1344 source_file = os.path.join(tempdir,'source')
1345 target_file = os.path.join(tempdir,'symlink')
1346 with open(source_file,'w') as f:
1347 f.write('something\n')
1348 os.symlink(source_file, target_file)
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +02001349 with tarfile.open(temparchive, 'w') as tar:
Julien Palard4fedd712020-11-25 10:23:17 +01001350 tar.add(source_file, arcname="source")
1351 tar.add(target_file, arcname="symlink")
Senthil Kumaran123932f2011-04-28 15:38:12 +08001352 # Let's extract it to the location which contains the symlink
Julien Palard4fedd712020-11-25 10:23:17 +01001353 with tarfile.open(temparchive, errorlevel=2) as tar:
Serhiy Storchaka9e4861f2019-03-05 10:05:57 +02001354 # this should not raise OSError: [Errno 17] File exists
1355 try:
1356 tar.extractall(path=tempdir)
1357 except OSError:
1358 self.fail("extractall failed with symlinked files")
Senthil Kumaran123932f2011-04-28 15:38:12 +08001359 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001360 os_helper.unlink(temparchive)
1361 os_helper.rmtree(tempdir)
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001362
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001363 def test_pathnames(self):
1364 self._test_pathname("foo")
1365 self._test_pathname(os.path.join("foo", ".", "bar"))
1366 self._test_pathname(os.path.join("foo", "..", "bar"))
1367 self._test_pathname(os.path.join(".", "foo"))
1368 self._test_pathname(os.path.join(".", "foo", "."))
1369 self._test_pathname(os.path.join(".", "foo", ".", "bar"))
1370 self._test_pathname(os.path.join(".", "foo", "..", "bar"))
1371 self._test_pathname(os.path.join(".", "foo", "..", "bar"))
1372 self._test_pathname(os.path.join("..", "foo"))
1373 self._test_pathname(os.path.join("..", "foo", ".."))
1374 self._test_pathname(os.path.join("..", "foo", ".", "bar"))
1375 self._test_pathname(os.path.join("..", "foo", "..", "bar"))
1376
1377 self._test_pathname("foo" + os.sep + os.sep + "bar")
1378 self._test_pathname("foo" + os.sep + os.sep, "foo", dir=True)
1379
1380 def test_abs_pathnames(self):
1381 if sys.platform == "win32":
1382 self._test_pathname("C:\\foo", "foo")
1383 else:
1384 self._test_pathname("/foo", "foo")
1385 self._test_pathname("///foo", "foo")
1386
1387 def test_cwd(self):
1388 # Test adding the current working directory.
Hai Shia7f5d932020-08-04 00:41:24 +08001389 with os_helper.change_cwd(TEMPDIR):
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001390 tar = tarfile.open(tmpname, self.mode)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001391 try:
1392 tar.add(".")
1393 finally:
1394 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001395
1396 tar = tarfile.open(tmpname, "r")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001397 try:
1398 for t in tar:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001399 if t.name != ".":
1400 self.assertTrue(t.name.startswith("./"), t.name)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001401 finally:
1402 tar.close()
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001403
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001404 def test_open_nonwritable_fileobj(self):
1405 for exctype in OSError, EOFError, RuntimeError:
1406 class BadFile(io.BytesIO):
1407 first = True
1408 def write(self, data):
1409 if self.first:
1410 self.first = False
1411 raise exctype
1412
1413 f = BadFile()
1414 with self.assertRaises(exctype):
1415 tar = tarfile.open(tmpname, self.mode, fileobj=f,
1416 format=tarfile.PAX_FORMAT,
1417 pax_headers={'non': 'empty'})
1418 self.assertFalse(f.closed)
1419
Artem Bulgakov22748a82020-09-07 19:46:33 +03001420
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001421class GzipWriteTest(GzipTest, WriteTest):
1422 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001423
Artem Bulgakov22748a82020-09-07 19:46:33 +03001424
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001425class Bz2WriteTest(Bz2Test, WriteTest):
1426 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001427
Artem Bulgakov22748a82020-09-07 19:46:33 +03001428
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001429class LzmaWriteTest(LzmaTest, WriteTest):
1430 pass
1431
1432
1433class StreamWriteTest(WriteTestBase, unittest.TestCase):
1434
1435 prefix = "w|"
1436 decompressor = None
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001437
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438 def test_stream_padding(self):
1439 # Test for bug #1543303.
1440 tar = tarfile.open(tmpname, self.mode)
1441 tar.close()
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001442 if self.decompressor:
1443 dec = self.decompressor()
Antoine Pitrou95f55602010-09-23 18:36:46 +00001444 with open(tmpname, "rb") as fobj:
1445 data = fobj.read()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001446 data = dec.decompress(data)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001447 self.assertFalse(dec.unused_data, "found trailing data")
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001448 else:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001449 with self.open(tmpname) as fobj:
Antoine Pitrou95f55602010-09-23 18:36:46 +00001450 data = fobj.read()
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001451 self.assertEqual(data.count(b"\0"), tarfile.RECORDSIZE,
1452 "incorrect zero padding")
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001453
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001454 @unittest.skipUnless(sys.platform != "win32" and hasattr(os, "umask"),
1455 "Missing umask implementation")
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +00001456 def test_file_mode(self):
1457 # Test for issue #8464: Create files with correct
1458 # permissions.
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +00001459 if os.path.exists(tmpname):
Hai Shia7f5d932020-08-04 00:41:24 +08001460 os_helper.unlink(tmpname)
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +00001461
1462 original_umask = os.umask(0o022)
1463 try:
1464 tar = tarfile.open(tmpname, self.mode)
1465 tar.close()
1466 mode = os.stat(tmpname).st_mode & 0o777
1467 self.assertEqual(mode, 0o644, "wrong file permissions")
1468 finally:
1469 os.umask(original_umask)
1470
Artem Bulgakov22748a82020-09-07 19:46:33 +03001471
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001472class GzipStreamWriteTest(GzipTest, StreamWriteTest):
Artem Bulgakov22748a82020-09-07 19:46:33 +03001473 def test_source_directory_not_leaked(self):
1474 """
1475 Ensure the source directory is not included in the tar header
1476 per bpo-41316.
1477 """
1478 tarfile.open(tmpname, self.mode).close()
1479 payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
1480 assert os.path.dirname(tmpname) not in payload
1481
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001482
1483class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
1484 decompressor = bz2.BZ2Decompressor if bz2 else None
1485
1486class LzmaStreamWriteTest(LzmaTest, StreamWriteTest):
1487 decompressor = lzma.LZMADecompressor if lzma else None
1488
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001489
Guido van Rossumd8faa362007-04-27 19:54:29 +00001490class GNUWriteTest(unittest.TestCase):
1491 # This testcase checks for correct creation of GNU Longname
1492 # and Longlink extended headers (cp. bug #812325).
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001493
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001494 def _length(self, s):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001495 blocks = len(s) // 512 + 1
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001496 return blocks * 512
1497
1498 def _calc_size(self, name, link=None):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499 # Initial tar header
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001500 count = 512
1501
1502 if len(name) > tarfile.LENGTH_NAME:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001503 # GNU longname extended header + longname
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001504 count += 512
1505 count += self._length(name)
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001506 if link is not None and len(link) > tarfile.LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001507 # GNU longlink extended header + longlink
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001508 count += 512
1509 count += self._length(link)
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001510 return count
1511
1512 def _test(self, name, link=None):
1513 tarinfo = tarfile.TarInfo(name)
1514 if link:
1515 tarinfo.linkname = link
1516 tarinfo.type = tarfile.LNKTYPE
1517
Guido van Rossumd8faa362007-04-27 19:54:29 +00001518 tar = tarfile.open(tmpname, "w")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001519 try:
1520 tar.format = tarfile.GNU_FORMAT
1521 tar.addfile(tarinfo)
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001522
Antoine Pitrou95f55602010-09-23 18:36:46 +00001523 v1 = self._calc_size(name, link)
1524 v2 = tar.offset
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001525 self.assertEqual(v1, v2, "GNU longname/longlink creation failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001526 finally:
1527 tar.close()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001528
Guido van Rossumd8faa362007-04-27 19:54:29 +00001529 tar = tarfile.open(tmpname)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001530 try:
1531 member = tar.next()
1532 self.assertIsNotNone(member,
1533 "unable to read longname member")
1534 self.assertEqual(tarinfo.name, member.name,
1535 "unable to read longname member")
1536 self.assertEqual(tarinfo.linkname, member.linkname,
1537 "unable to read longname member")
1538 finally:
1539 tar.close()
Thomas Wouters89f507f2006-12-13 04:49:30 +00001540
Neal Norwitz0662f8a2004-07-20 21:54:18 +00001541 def test_longname_1023(self):
1542 self._test(("longnam/" * 127) + "longnam")
1543
1544 def test_longname_1024(self):
1545 self._test(("longnam/" * 127) + "longname")
1546
1547 def test_longname_1025(self):
1548 self._test(("longnam/" * 127) + "longname_")
1549
1550 def test_longlink_1023(self):
1551 self._test("name", ("longlnk/" * 127) + "longlnk")
1552
1553 def test_longlink_1024(self):
1554 self._test("name", ("longlnk/" * 127) + "longlink")
1555
1556 def test_longlink_1025(self):
1557 self._test("name", ("longlnk/" * 127) + "longlink_")
1558
1559 def test_longnamelink_1023(self):
1560 self._test(("longnam/" * 127) + "longnam",
1561 ("longlnk/" * 127) + "longlnk")
1562
1563 def test_longnamelink_1024(self):
1564 self._test(("longnam/" * 127) + "longname",
1565 ("longlnk/" * 127) + "longlink")
1566
1567 def test_longnamelink_1025(self):
1568 self._test(("longnam/" * 127) + "longname_",
1569 ("longlnk/" * 127) + "longlink_")
1570
Guido van Rossumd8faa362007-04-27 19:54:29 +00001571
William Chargin674935b2020-02-12 11:56:02 -08001572class DeviceHeaderTest(WriteTestBase, unittest.TestCase):
1573
1574 prefix = "w:"
1575
1576 def test_headers_written_only_for_device_files(self):
1577 # Regression test for bpo-18819.
1578 tempdir = os.path.join(TEMPDIR, "device_header_test")
1579 os.mkdir(tempdir)
1580 try:
1581 tar = tarfile.open(tmpname, self.mode)
1582 try:
1583 input_blk = tarfile.TarInfo(name="my_block_device")
1584 input_reg = tarfile.TarInfo(name="my_regular_file")
1585 input_blk.type = tarfile.BLKTYPE
1586 input_reg.type = tarfile.REGTYPE
1587 tar.addfile(input_blk)
1588 tar.addfile(input_reg)
1589 finally:
1590 tar.close()
1591
1592 # devmajor and devminor should be *interpreted* as 0 in both...
1593 tar = tarfile.open(tmpname, "r")
1594 try:
1595 output_blk = tar.getmember("my_block_device")
1596 output_reg = tar.getmember("my_regular_file")
1597 finally:
1598 tar.close()
1599 self.assertEqual(output_blk.devmajor, 0)
1600 self.assertEqual(output_blk.devminor, 0)
1601 self.assertEqual(output_reg.devmajor, 0)
1602 self.assertEqual(output_reg.devminor, 0)
1603
1604 # ...but the fields should not actually be set on regular files:
1605 with open(tmpname, "rb") as infile:
1606 buf = infile.read()
1607 buf_blk = buf[output_blk.offset:output_blk.offset_data]
1608 buf_reg = buf[output_reg.offset:output_reg.offset_data]
1609 # See `struct posixheader` in GNU docs for byte offsets:
1610 # <https://www.gnu.org/software/tar/manual/html_node/Standard.html>
1611 device_headers = slice(329, 329 + 16)
1612 self.assertEqual(buf_blk[device_headers], b"0000000\0" * 2)
1613 self.assertEqual(buf_reg[device_headers], b"\0" * 16)
1614 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08001615 os_helper.rmtree(tempdir)
William Chargin674935b2020-02-12 11:56:02 -08001616
1617
Lars Gustäbel20703c62015-05-27 12:53:44 +02001618class CreateTest(WriteTestBase, unittest.TestCase):
Berker Peksag0fe63252015-02-13 21:02:12 +02001619
1620 prefix = "x:"
1621
1622 file_path = os.path.join(TEMPDIR, "spameggs42")
1623
1624 def setUp(self):
Hai Shia7f5d932020-08-04 00:41:24 +08001625 os_helper.unlink(tmpname)
Berker Peksag0fe63252015-02-13 21:02:12 +02001626
1627 @classmethod
1628 def setUpClass(cls):
1629 with open(cls.file_path, "wb") as fobj:
1630 fobj.write(b"aaa")
1631
1632 @classmethod
1633 def tearDownClass(cls):
Hai Shia7f5d932020-08-04 00:41:24 +08001634 os_helper.unlink(cls.file_path)
Berker Peksag0fe63252015-02-13 21:02:12 +02001635
1636 def test_create(self):
1637 with tarfile.open(tmpname, self.mode) as tobj:
1638 tobj.add(self.file_path)
1639
1640 with self.taropen(tmpname) as tobj:
1641 names = tobj.getnames()
1642 self.assertEqual(len(names), 1)
1643 self.assertIn('spameggs42', names[0])
1644
1645 def test_create_existing(self):
1646 with tarfile.open(tmpname, self.mode) as tobj:
1647 tobj.add(self.file_path)
1648
1649 with self.assertRaises(FileExistsError):
1650 tobj = tarfile.open(tmpname, self.mode)
1651
1652 with self.taropen(tmpname) as tobj:
1653 names = tobj.getnames()
1654 self.assertEqual(len(names), 1)
1655 self.assertIn('spameggs42', names[0])
1656
1657 def test_create_taropen(self):
1658 with self.taropen(tmpname, "x") as tobj:
1659 tobj.add(self.file_path)
1660
1661 with self.taropen(tmpname) as tobj:
1662 names = tobj.getnames()
1663 self.assertEqual(len(names), 1)
1664 self.assertIn('spameggs42', names[0])
1665
1666 def test_create_existing_taropen(self):
1667 with self.taropen(tmpname, "x") as tobj:
1668 tobj.add(self.file_path)
1669
1670 with self.assertRaises(FileExistsError):
1671 with self.taropen(tmpname, "x"):
1672 pass
1673
1674 with self.taropen(tmpname) as tobj:
1675 names = tobj.getnames()
1676 self.assertEqual(len(names), 1)
1677 self.assertIn("spameggs42", names[0])
1678
Serhiy Storchakac45cd162017-03-08 10:32:44 +02001679 def test_create_pathlike_name(self):
1680 with tarfile.open(pathlib.Path(tmpname), self.mode) as tobj:
1681 self.assertIsInstance(tobj.name, str)
1682 self.assertEqual(tobj.name, os.path.abspath(tmpname))
1683 tobj.add(pathlib.Path(self.file_path))
1684 names = tobj.getnames()
1685 self.assertEqual(len(names), 1)
1686 self.assertIn('spameggs42', names[0])
1687
1688 with self.taropen(tmpname) as tobj:
1689 names = tobj.getnames()
1690 self.assertEqual(len(names), 1)
1691 self.assertIn('spameggs42', names[0])
1692
1693 def test_create_taropen_pathlike_name(self):
1694 with self.taropen(pathlib.Path(tmpname), "x") as tobj:
1695 self.assertIsInstance(tobj.name, str)
1696 self.assertEqual(tobj.name, os.path.abspath(tmpname))
1697 tobj.add(pathlib.Path(self.file_path))
1698 names = tobj.getnames()
1699 self.assertEqual(len(names), 1)
1700 self.assertIn('spameggs42', names[0])
1701
1702 with self.taropen(tmpname) as tobj:
1703 names = tobj.getnames()
1704 self.assertEqual(len(names), 1)
1705 self.assertIn('spameggs42', names[0])
1706
Berker Peksag0fe63252015-02-13 21:02:12 +02001707
1708class GzipCreateTest(GzipTest, CreateTest):
1709 pass
1710
1711
1712class Bz2CreateTest(Bz2Test, CreateTest):
1713 pass
1714
1715
1716class LzmaCreateTest(LzmaTest, CreateTest):
1717 pass
1718
1719
1720class CreateWithXModeTest(CreateTest):
1721
1722 prefix = "x"
1723
1724 test_create_taropen = None
1725 test_create_existing_taropen = None
1726
1727
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001728@unittest.skipUnless(hasattr(os, "link"), "Missing hardlink implementation")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001729class HardlinkTest(unittest.TestCase):
1730 # Test the creation of LNKTYPE (hardlink) members in an archive.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731
1732 def setUp(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001733 self.foo = os.path.join(TEMPDIR, "foo")
1734 self.bar = os.path.join(TEMPDIR, "bar")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001735
Antoine Pitrou95f55602010-09-23 18:36:46 +00001736 with open(self.foo, "wb") as fobj:
1737 fobj.write(b"foo")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001738
xdegayed7d4fea2017-11-12 18:02:06 +01001739 try:
1740 os.link(self.foo, self.bar)
1741 except PermissionError as e:
1742 self.skipTest('os.link(): %s' % e)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001743
Guido van Rossumd8faa362007-04-27 19:54:29 +00001744 self.tar = tarfile.open(tmpname, "w")
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001745 self.tar.add(self.foo)
1746
Guido van Rossumd8faa362007-04-27 19:54:29 +00001747 def tearDown(self):
Hirokazu Yamamotoaf079d42008-09-21 11:50:03 +00001748 self.tar.close()
Hai Shia7f5d932020-08-04 00:41:24 +08001749 os_helper.unlink(self.foo)
1750 os_helper.unlink(self.bar)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001751
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001752 def test_add_twice(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001753 # The same name will be added as a REGTYPE every
1754 # time regardless of st_nlink.
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001755 tarinfo = self.tar.gettarinfo(self.foo)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001756 self.assertEqual(tarinfo.type, tarfile.REGTYPE,
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001757 "add file as regular failed")
1758
1759 def test_add_hardlink(self):
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001760 tarinfo = self.tar.gettarinfo(self.bar)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001761 self.assertEqual(tarinfo.type, tarfile.LNKTYPE,
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001762 "add file as hardlink failed")
1763
1764 def test_dereference_hardlink(self):
1765 self.tar.dereference = True
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001766 tarinfo = self.tar.gettarinfo(self.bar)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001767 self.assertEqual(tarinfo.type, tarfile.REGTYPE,
Neal Norwitzb0e32e22005-10-20 04:50:13 +00001768 "dereferencing hardlink failed")
1769
Neal Norwitza4f651a2004-07-20 22:07:44 +00001770
Guido van Rossumd8faa362007-04-27 19:54:29 +00001771class PaxWriteTest(GNUWriteTest):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001772
Guido van Rossumd8faa362007-04-27 19:54:29 +00001773 def _test(self, name, link=None):
1774 # See GNUWriteTest.
1775 tarinfo = tarfile.TarInfo(name)
1776 if link:
1777 tarinfo.linkname = link
1778 tarinfo.type = tarfile.LNKTYPE
Andrew M. Kuchlingd4f25522004-10-20 11:47:01 +00001779
Guido van Rossumd8faa362007-04-27 19:54:29 +00001780 tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001781 try:
1782 tar.addfile(tarinfo)
1783 finally:
1784 tar.close()
Andrew M. Kuchlingd4f25522004-10-20 11:47:01 +00001785
Guido van Rossumd8faa362007-04-27 19:54:29 +00001786 tar = tarfile.open(tmpname)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001787 try:
1788 if link:
1789 l = tar.getmembers()[0].linkname
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001790 self.assertEqual(link, l, "PAX longlink creation failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001791 else:
1792 n = tar.getmembers()[0].name
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001793 self.assertEqual(name, n, "PAX longname creation failed")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001794 finally:
1795 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001796
Guido van Rossume7ba4952007-06-06 23:52:48 +00001797 def test_pax_global_header(self):
1798 pax_headers = {
Guido van Rossum9cbfffd2007-06-07 00:54:15 +00001799 "foo": "bar",
1800 "uid": "0",
1801 "mtime": "1.23",
Guido van Rossuma0557702007-08-07 23:19:53 +00001802 "test": "\xe4\xf6\xfc",
1803 "\xe4\xf6\xfc": "test"}
Guido van Rossume7ba4952007-06-06 23:52:48 +00001804
Benjamin Peterson886af962010-03-21 23:13:07 +00001805 tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
Guido van Rossume7ba4952007-06-06 23:52:48 +00001806 pax_headers=pax_headers)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001807 try:
1808 tar.addfile(tarfile.TarInfo("test"))
1809 finally:
1810 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001811
1812 # Test if the global header was written correctly.
1813 tar = tarfile.open(tmpname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001814 try:
1815 self.assertEqual(tar.pax_headers, pax_headers)
1816 self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)
1817 # Test if all the fields are strings.
1818 for key, val in tar.pax_headers.items():
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001819 self.assertIsNot(type(key), bytes)
1820 self.assertIsNot(type(val), bytes)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001821 if key in tarfile.PAX_NUMBER_FIELDS:
1822 try:
1823 tarfile.PAX_NUMBER_FIELDS[key](val)
1824 except (TypeError, ValueError):
1825 self.fail("unable to convert pax header field")
1826 finally:
1827 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001828
1829 def test_pax_extended_header(self):
1830 # The fields from the pax header have priority over the
1831 # TarInfo.
Guido van Rossum9cbfffd2007-06-07 00:54:15 +00001832 pax_headers = {"path": "foo", "uid": "123"}
Guido van Rossume7ba4952007-06-06 23:52:48 +00001833
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001834 tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
1835 encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001836 try:
1837 t = tarfile.TarInfo()
1838 t.name = "\xe4\xf6\xfc" # non-ASCII
1839 t.uid = 8**8 # too large
1840 t.pax_headers = pax_headers
1841 tar.addfile(t)
1842 finally:
1843 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001844
1845 tar = tarfile.open(tmpname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001846 try:
1847 t = tar.getmembers()[0]
1848 self.assertEqual(t.pax_headers, pax_headers)
1849 self.assertEqual(t.name, "foo")
1850 self.assertEqual(t.uid, 123)
1851 finally:
1852 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001853
1854
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001855class UnicodeTest:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001856
1857 def test_iso8859_1_filename(self):
1858 self._test_unicode_filename("iso8859-1")
1859
1860 def test_utf7_filename(self):
1861 self._test_unicode_filename("utf7")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001862
1863 def test_utf8_filename(self):
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001864 self._test_unicode_filename("utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001865
Guido van Rossumd8faa362007-04-27 19:54:29 +00001866 def _test_unicode_filename(self, encoding):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001867 tar = tarfile.open(tmpname, "w", format=self.format,
1868 encoding=encoding, errors="strict")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001869 try:
1870 name = "\xe4\xf6\xfc"
1871 tar.addfile(tarfile.TarInfo(name))
1872 finally:
1873 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001874
1875 tar = tarfile.open(tmpname, encoding=encoding)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001876 try:
1877 self.assertEqual(tar.getmembers()[0].name, name)
1878 finally:
1879 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001880
1881 def test_unicode_filename_error(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001882 tar = tarfile.open(tmpname, "w", format=self.format,
1883 encoding="ascii", errors="strict")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001884 try:
1885 tarinfo = tarfile.TarInfo()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001886
Antoine Pitrou95f55602010-09-23 18:36:46 +00001887 tarinfo.name = "\xe4\xf6\xfc"
1888 self.assertRaises(UnicodeError, tar.addfile, tarinfo)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001889
Antoine Pitrou95f55602010-09-23 18:36:46 +00001890 tarinfo.name = "foo"
1891 tarinfo.uname = "\xe4\xf6\xfc"
1892 self.assertRaises(UnicodeError, tar.addfile, tarinfo)
1893 finally:
1894 tar.close()
Guido van Rossume7ba4952007-06-06 23:52:48 +00001895
1896 def test_unicode_argument(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001897 tar = tarfile.open(tarname, "r",
1898 encoding="iso8859-1", errors="strict")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001899 try:
1900 for t in tar:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001901 self.assertIs(type(t.name), str)
1902 self.assertIs(type(t.linkname), str)
1903 self.assertIs(type(t.uname), str)
1904 self.assertIs(type(t.gname), str)
Antoine Pitrou95f55602010-09-23 18:36:46 +00001905 finally:
1906 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001907
Guido van Rossume7ba4952007-06-06 23:52:48 +00001908 def test_uname_unicode(self):
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001909 t = tarfile.TarInfo("foo")
1910 t.uname = "\xe4\xf6\xfc"
1911 t.gname = "\xe4\xf6\xfc"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001912
Serhiy Storchaka8b562922013-06-17 15:38:50 +03001913 tar = tarfile.open(tmpname, mode="w", format=self.format,
1914 encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001915 try:
1916 tar.addfile(t)
1917 finally:
1918 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001919
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001920 tar = tarfile.open(tmpname, encoding="iso8859-1")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001921 try:
Guido van Rossume7ba4952007-06-06 23:52:48 +00001922 t = tar.getmember("foo")
Antoine Pitrou95f55602010-09-23 18:36:46 +00001923 self.assertEqual(t.uname, "\xe4\xf6\xfc")
1924 self.assertEqual(t.gname, "\xe4\xf6\xfc")
1925
1926 if self.format != tarfile.PAX_FORMAT:
Antoine Pitrouab58b5f2010-09-23 19:39:35 +00001927 tar.close()
Antoine Pitrou95f55602010-09-23 18:36:46 +00001928 tar = tarfile.open(tmpname, encoding="ascii")
1929 t = tar.getmember("foo")
1930 self.assertEqual(t.uname, "\udce4\udcf6\udcfc")
1931 self.assertEqual(t.gname, "\udce4\udcf6\udcfc")
1932 finally:
1933 tar.close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001934
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001935
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001936class UstarUnicodeTest(UnicodeTest, unittest.TestCase):
1937
1938 format = tarfile.USTAR_FORMAT
1939
1940 # Test whether the utf-8 encoded version of a filename exceeds the 100
1941 # bytes name field limit (every occurrence of '\xff' will be expanded to 2
1942 # bytes).
1943 def test_unicode_name1(self):
1944 self._test_ustar_name("0123456789" * 10)
1945 self._test_ustar_name("0123456789" * 10 + "0", ValueError)
1946 self._test_ustar_name("0123456789" * 9 + "01234567\xff")
1947 self._test_ustar_name("0123456789" * 9 + "012345678\xff", ValueError)
1948
1949 def test_unicode_name2(self):
1950 self._test_ustar_name("0123456789" * 9 + "012345\xff\xff")
1951 self._test_ustar_name("0123456789" * 9 + "0123456\xff\xff", ValueError)
1952
1953 # Test whether the utf-8 encoded version of a filename exceeds the 155
1954 # bytes prefix + '/' + 100 bytes name limit.
1955 def test_unicode_longname1(self):
1956 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 10)
1957 self._test_ustar_name("0123456789" * 15 + "0123/4" + "0123456789" * 10, ValueError)
1958 self._test_ustar_name("0123456789" * 15 + "012\xff/" + "0123456789" * 10)
1959 self._test_ustar_name("0123456789" * 15 + "0123\xff/" + "0123456789" * 10, ValueError)
1960
1961 def test_unicode_longname2(self):
1962 self._test_ustar_name("0123456789" * 15 + "01\xff/2" + "0123456789" * 10, ValueError)
1963 self._test_ustar_name("0123456789" * 15 + "01\xff\xff/" + "0123456789" * 10, ValueError)
1964
1965 def test_unicode_longname3(self):
1966 self._test_ustar_name("0123456789" * 15 + "01\xff\xff/2" + "0123456789" * 10, ValueError)
1967 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "01234567\xff")
1968 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345678\xff", ValueError)
1969
1970 def test_unicode_longname4(self):
1971 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "012345\xff\xff")
1972 self._test_ustar_name("0123456789" * 15 + "01234/" + "0123456789" * 9 + "0123456\xff\xff", ValueError)
1973
1974 def _test_ustar_name(self, name, exc=None):
1975 with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
1976 t = tarfile.TarInfo(name)
1977 if exc is None:
1978 tar.addfile(t)
1979 else:
1980 self.assertRaises(exc, tar.addfile, t)
1981
1982 if exc is None:
Lars Gustäbelddd99172016-04-19 11:58:41 +02001983 with tarfile.open(tmpname, "r", encoding="utf-8") as tar:
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02001984 for t in tar:
1985 self.assertEqual(name, t.name)
1986 break
1987
1988 # Test the same as above for the 100 bytes link field.
1989 def test_unicode_link1(self):
1990 self._test_ustar_link("0123456789" * 10)
1991 self._test_ustar_link("0123456789" * 10 + "0", ValueError)
1992 self._test_ustar_link("0123456789" * 9 + "01234567\xff")
1993 self._test_ustar_link("0123456789" * 9 + "012345678\xff", ValueError)
1994
1995 def test_unicode_link2(self):
1996 self._test_ustar_link("0123456789" * 9 + "012345\xff\xff")
1997 self._test_ustar_link("0123456789" * 9 + "0123456\xff\xff", ValueError)
1998
1999 def _test_ustar_link(self, name, exc=None):
2000 with tarfile.open(tmpname, "w", format=self.format, encoding="utf-8") as tar:
2001 t = tarfile.TarInfo("foo")
2002 t.linkname = name
2003 if exc is None:
2004 tar.addfile(t)
2005 else:
2006 self.assertRaises(exc, tar.addfile, t)
2007
2008 if exc is None:
Lars Gustäbelddd99172016-04-19 11:58:41 +02002009 with tarfile.open(tmpname, "r", encoding="utf-8") as tar:
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02002010 for t in tar:
2011 self.assertEqual(name, t.linkname)
2012 break
2013
2014
2015class GNUUnicodeTest(UnicodeTest, unittest.TestCase):
Guido van Rossumd8faa362007-04-27 19:54:29 +00002016
Guido van Rossume7ba4952007-06-06 23:52:48 +00002017 format = tarfile.GNU_FORMAT
Guido van Rossumd8faa362007-04-27 19:54:29 +00002018
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002019 def test_bad_pax_header(self):
2020 # Test for issue #8633. GNU tar <= 1.23 creates raw binary fields
2021 # without a hdrcharset=BINARY header.
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002022 for encoding, name in (
2023 ("utf-8", "pax/bad-pax-\udce4\udcf6\udcfc"),
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002024 ("iso8859-1", "pax/bad-pax-\xe4\xf6\xfc"),):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002025 with tarfile.open(tarname, encoding=encoding,
2026 errors="surrogateescape") as tar:
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002027 try:
2028 t = tar.getmember(name)
2029 except KeyError:
2030 self.fail("unable to read bad GNU tar pax header")
2031
Guido van Rossumd8faa362007-04-27 19:54:29 +00002032
Lars Gustäbel0f450ab2016-04-19 08:43:17 +02002033class PAXUnicodeTest(UnicodeTest, unittest.TestCase):
Lars Gustäbel3741eff2007-08-21 12:17:05 +00002034
2035 format = tarfile.PAX_FORMAT
2036
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002037 # PAX_FORMAT ignores encoding in write mode.
2038 test_unicode_filename_error = None
2039
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002040 def test_binary_header(self):
2041 # Test a POSIX.1-2008 compatible header with a hdrcharset=BINARY field.
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002042 for encoding, name in (
2043 ("utf-8", "pax/hdrcharset-\udce4\udcf6\udcfc"),
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002044 ("iso8859-1", "pax/hdrcharset-\xe4\xf6\xfc"),):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002045 with tarfile.open(tarname, encoding=encoding,
2046 errors="surrogateescape") as tar:
Lars Gustäbel1465cc22010-05-17 18:02:50 +00002047 try:
2048 t = tar.getmember(name)
2049 except KeyError:
2050 self.fail("unable to read POSIX.1-2008 binary header")
2051
Lars Gustäbel3741eff2007-08-21 12:17:05 +00002052
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002053class AppendTestBase:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002054 # Test append mode (cp. patch #1652681).
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002055
Guido van Rossumd8faa362007-04-27 19:54:29 +00002056 def setUp(self):
2057 self.tarname = tmpname
2058 if os.path.exists(self.tarname):
Hai Shia7f5d932020-08-04 00:41:24 +08002059 os_helper.unlink(self.tarname)
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002060
Guido van Rossumd8faa362007-04-27 19:54:29 +00002061 def _create_testtar(self, mode="w:"):
Antoine Pitrou95f55602010-09-23 18:36:46 +00002062 with tarfile.open(tarname, encoding="iso8859-1") as src:
2063 t = src.getmember("ustar/regtype")
2064 t.name = "foo"
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002065 with src.extractfile(t) as f:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00002066 with tarfile.open(self.tarname, mode) as tar:
2067 tar.addfile(t, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002068
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002069 def test_append_compressed(self):
2070 self._create_testtar("w:" + self.suffix)
2071 self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
2072
2073class AppendTest(AppendTestBase, unittest.TestCase):
2074 test_append_compressed = None
2075
2076 def _add_testfile(self, fileobj=None):
2077 with tarfile.open(self.tarname, "a", fileobj=fileobj) as tar:
2078 tar.addfile(tarfile.TarInfo("bar"))
2079
Guido van Rossumd8faa362007-04-27 19:54:29 +00002080 def _test(self, names=["bar"], fileobj=None):
Antoine Pitrou95f55602010-09-23 18:36:46 +00002081 with tarfile.open(self.tarname, fileobj=fileobj) as tar:
2082 self.assertEqual(tar.getnames(), names)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002083
2084 def test_non_existing(self):
2085 self._add_testfile()
2086 self._test()
2087
2088 def test_empty(self):
Lars Gustäbel9520a432009-11-22 18:48:49 +00002089 tarfile.open(self.tarname, "w:").close()
Guido van Rossumd8faa362007-04-27 19:54:29 +00002090 self._add_testfile()
2091 self._test()
2092
2093 def test_empty_fileobj(self):
Lars Gustäbel9520a432009-11-22 18:48:49 +00002094 fobj = io.BytesIO(b"\0" * 1024)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002095 self._add_testfile(fobj)
2096 fobj.seek(0)
2097 self._test(fileobj=fobj)
2098
2099 def test_fileobj(self):
2100 self._create_testtar()
Antoine Pitrou95f55602010-09-23 18:36:46 +00002101 with open(self.tarname, "rb") as fobj:
2102 data = fobj.read()
Guido van Rossum34d19282007-08-09 01:03:29 +00002103 fobj = io.BytesIO(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002104 self._add_testfile(fobj)
2105 fobj.seek(0)
2106 self._test(names=["foo", "bar"], fileobj=fobj)
2107
2108 def test_existing(self):
2109 self._create_testtar()
2110 self._add_testfile()
2111 self._test(names=["foo", "bar"])
2112
Lars Gustäbel9520a432009-11-22 18:48:49 +00002113 # Append mode is supposed to fail if the tarfile to append to
2114 # does not end with a zero block.
2115 def _test_error(self, data):
Antoine Pitrou95f55602010-09-23 18:36:46 +00002116 with open(self.tarname, "wb") as fobj:
2117 fobj.write(data)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002118 self.assertRaises(tarfile.ReadError, self._add_testfile)
2119
2120 def test_null(self):
2121 self._test_error(b"")
2122
2123 def test_incomplete(self):
2124 self._test_error(b"\0" * 13)
2125
2126 def test_premature_eof(self):
2127 data = tarfile.TarInfo("foo").tobuf()
2128 self._test_error(data)
2129
2130 def test_trailing_garbage(self):
2131 data = tarfile.TarInfo("foo").tobuf()
2132 self._test_error(data + b"\0" * 13)
2133
2134 def test_invalid(self):
2135 self._test_error(b"a" * 512)
2136
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002137class GzipAppendTest(GzipTest, AppendTestBase, unittest.TestCase):
2138 pass
2139
2140class Bz2AppendTest(Bz2Test, AppendTestBase, unittest.TestCase):
2141 pass
2142
2143class LzmaAppendTest(LzmaTest, AppendTestBase, unittest.TestCase):
2144 pass
2145
Guido van Rossumd8faa362007-04-27 19:54:29 +00002146
2147class LimitsTest(unittest.TestCase):
2148
2149 def test_ustar_limits(self):
2150 # 100 char name
2151 tarinfo = tarfile.TarInfo("0123456789" * 10)
Guido van Rossume7ba4952007-06-06 23:52:48 +00002152 tarinfo.tobuf(tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002153
2154 # 101 char name that cannot be stored
2155 tarinfo = tarfile.TarInfo("0123456789" * 10 + "0")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002156 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002157
2158 # 256 char name with a slash at pos 156
2159 tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002160 tarinfo.tobuf(tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002161
2162 # 256 char name that cannot be stored
2163 tarinfo = tarfile.TarInfo("1234567/" * 31 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002164 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002165
2166 # 512 char name
2167 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002168 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002169
2170 # 512 char linkname
2171 tarinfo = tarfile.TarInfo("longlink")
2172 tarinfo.linkname = "123/" * 126 + "longname"
Guido van Rossume7ba4952007-06-06 23:52:48 +00002173 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002174
2175 # uid > 8 digits
2176 tarinfo = tarfile.TarInfo("name")
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002177 tarinfo.uid = 0o10000000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002178 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002179
2180 def test_gnu_limits(self):
2181 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002182 tarinfo.tobuf(tarfile.GNU_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002183
2184 tarinfo = tarfile.TarInfo("longlink")
2185 tarinfo.linkname = "123/" * 126 + "longname"
Guido van Rossume7ba4952007-06-06 23:52:48 +00002186 tarinfo.tobuf(tarfile.GNU_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002187
2188 # uid >= 256 ** 7
2189 tarinfo = tarfile.TarInfo("name")
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002190 tarinfo.uid = 0o4000000000000000000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002191 self.assertRaises(ValueError, tarinfo.tobuf, tarfile.GNU_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002192
2193 def test_pax_limits(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +00002194 tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
Guido van Rossume7ba4952007-06-06 23:52:48 +00002195 tarinfo.tobuf(tarfile.PAX_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002196
2197 tarinfo = tarfile.TarInfo("longlink")
2198 tarinfo.linkname = "123/" * 126 + "longname"
Guido van Rossume7ba4952007-06-06 23:52:48 +00002199 tarinfo.tobuf(tarfile.PAX_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002200
2201 tarinfo = tarfile.TarInfo("name")
Guido van Rossumcd16bf62007-06-13 18:07:49 +00002202 tarinfo.uid = 0o4000000000000000000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002203 tarinfo.tobuf(tarfile.PAX_FORMAT)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002204
2205
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002206class MiscTest(unittest.TestCase):
2207
2208 def test_char_fields(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002209 self.assertEqual(tarfile.stn("foo", 8, "ascii", "strict"),
2210 b"foo\0\0\0\0\0")
2211 self.assertEqual(tarfile.stn("foobar", 3, "ascii", "strict"),
2212 b"foo")
2213 self.assertEqual(tarfile.nts(b"foo\0\0\0\0\0", "ascii", "strict"),
2214 "foo")
2215 self.assertEqual(tarfile.nts(b"foo\0bar\0", "ascii", "strict"),
2216 "foo")
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002217
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002218 def test_read_number_fields(self):
2219 # Issue 13158: Test if GNU tar specific base-256 number fields
2220 # are decoded correctly.
2221 self.assertEqual(tarfile.nti(b"0000001\x00"), 1)
2222 self.assertEqual(tarfile.nti(b"7777777\x00"), 0o7777777)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002223 self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\x00\x20\x00\x00"),
2224 0o10000000)
2225 self.assertEqual(tarfile.nti(b"\x80\x00\x00\x00\xff\xff\xff\xff"),
2226 0xffffffff)
2227 self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\xff"),
2228 -1)
2229 self.assertEqual(tarfile.nti(b"\xff\xff\xff\xff\xff\xff\xff\x9c"),
2230 -100)
2231 self.assertEqual(tarfile.nti(b"\xff\x00\x00\x00\x00\x00\x00\x00"),
2232 -0x100000000000000)
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002233
Lars Gustäbelb7a688b2015-07-02 19:38:38 +02002234 # Issue 24514: Test if empty number fields are converted to zero.
2235 self.assertEqual(tarfile.nti(b"\0"), 0)
2236 self.assertEqual(tarfile.nti(b" \0"), 0)
2237
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002238 def test_write_number_fields(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002239 self.assertEqual(tarfile.itn(1), b"0000001\x00")
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002240 self.assertEqual(tarfile.itn(0o7777777), b"7777777\x00")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002241 self.assertEqual(tarfile.itn(0o10000000, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002242 b"\x80\x00\x00\x00\x00\x20\x00\x00")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002243 self.assertEqual(tarfile.itn(0xffffffff, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002244 b"\x80\x00\x00\x00\xff\xff\xff\xff")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002245 self.assertEqual(tarfile.itn(-1, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002246 b"\xff\xff\xff\xff\xff\xff\xff\xff")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002247 self.assertEqual(tarfile.itn(-100, format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002248 b"\xff\xff\xff\xff\xff\xff\xff\x9c")
CAM Gerlache680c3d2019-03-21 09:44:51 -05002249 self.assertEqual(tarfile.itn(-0x100000000000000,
2250 format=tarfile.GNU_FORMAT),
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002251 b"\xff\x00\x00\x00\x00\x00\x00\x00")
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002252
Joffrey F72d9b2b2018-02-26 16:02:21 -08002253 # Issue 32713: Test if itn() supports float values outside the
2254 # non-GNU format range
2255 self.assertEqual(tarfile.itn(-100.0, format=tarfile.GNU_FORMAT),
2256 b"\xff\xff\xff\xff\xff\xff\xff\x9c")
2257 self.assertEqual(tarfile.itn(8 ** 12 + 0.0, format=tarfile.GNU_FORMAT),
2258 b"\x80\x00\x00\x10\x00\x00\x00\x00")
2259 self.assertEqual(tarfile.nti(tarfile.itn(-0.1, format=tarfile.GNU_FORMAT)), 0)
2260
Lars Gustäbelac3d1372011-10-14 12:46:40 +02002261 def test_number_field_limits(self):
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002262 with self.assertRaises(ValueError):
2263 tarfile.itn(-1, 8, tarfile.USTAR_FORMAT)
2264 with self.assertRaises(ValueError):
2265 tarfile.itn(0o10000000, 8, tarfile.USTAR_FORMAT)
2266 with self.assertRaises(ValueError):
2267 tarfile.itn(-0x10000000001, 6, tarfile.GNU_FORMAT)
2268 with self.assertRaises(ValueError):
2269 tarfile.itn(0x10000000000, 6, tarfile.GNU_FORMAT)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002270
Martin Panter104dcda2016-01-16 06:59:13 +00002271 def test__all__(self):
Victor Stinnerfbf43f02020-08-17 07:20:40 +02002272 not_exported = {
2273 'version', 'grp', 'pwd', 'symlink_exception', 'NUL', 'BLOCKSIZE',
2274 'RECORDSIZE', 'GNU_MAGIC', 'POSIX_MAGIC', 'LENGTH_NAME',
2275 'LENGTH_LINK', 'LENGTH_PREFIX', 'REGTYPE', 'AREGTYPE', 'LNKTYPE',
2276 'SYMTYPE', 'CHRTYPE', 'BLKTYPE', 'DIRTYPE', 'FIFOTYPE', 'CONTTYPE',
2277 'GNUTYPE_LONGNAME', 'GNUTYPE_LONGLINK', 'GNUTYPE_SPARSE',
2278 'XHDTYPE', 'XGLTYPE', 'SOLARIS_XHDTYPE', 'SUPPORTED_TYPES',
2279 'REGULAR_TYPES', 'GNU_TYPES', 'PAX_FIELDS', 'PAX_NAME_FIELDS',
2280 'PAX_NUMBER_FIELDS', 'stn', 'nts', 'nti', 'itn', 'calc_chksums',
2281 'copyfileobj', 'filemode', 'EmptyHeaderError',
2282 'TruncatedHeaderError', 'EOFHeaderError', 'InvalidHeaderError',
2283 'SubsequentHeaderError', 'ExFileObject', 'main'}
2284 support.check__all__(self, tarfile, not_exported=not_exported)
Martin Panter104dcda2016-01-16 06:59:13 +00002285
Lars Gustäbelb506dc32007-08-07 18:36:16 +00002286
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002287class CommandLineTest(unittest.TestCase):
2288
Serhiy Storchaka255493c2014-02-05 20:54:43 +02002289 def tarfilecmd(self, *args, **kwargs):
2290 rc, out, err = script_helper.assert_python_ok('-m', 'tarfile', *args,
2291 **kwargs)
Antoine Pitrou3b7b1e52013-11-24 01:55:05 +01002292 return out.replace(os.linesep.encode(), b'\n')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002293
2294 def tarfilecmd_failure(self, *args):
2295 return script_helper.assert_python_failure('-m', 'tarfile', *args)
2296
2297 def make_simple_tarfile(self, tar_name):
2298 files = [support.findfile('tokenize_tests.txt'),
2299 support.findfile('tokenize_tests-no-coding-cookie-'
2300 'and-utf8-bom-sig-only.txt')]
Hai Shia7f5d932020-08-04 00:41:24 +08002301 self.addCleanup(os_helper.unlink, tar_name)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002302 with tarfile.open(tar_name, 'w') as tf:
2303 for tardata in files:
2304 tf.add(tardata, arcname=os.path.basename(tardata))
2305
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002306 def test_bad_use(self):
2307 rc, out, err = self.tarfilecmd_failure()
2308 self.assertEqual(out, b'')
2309 self.assertIn(b'usage', err.lower())
2310 self.assertIn(b'error', err.lower())
2311 self.assertIn(b'required', err.lower())
2312 rc, out, err = self.tarfilecmd_failure('-l', '')
2313 self.assertEqual(out, b'')
2314 self.assertNotEqual(err.strip(), b'')
2315
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002316 def test_test_command(self):
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002317 for tar_name in testtarnames:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002318 for opt in '-t', '--test':
2319 out = self.tarfilecmd(opt, tar_name)
2320 self.assertEqual(out, b'')
2321
2322 def test_test_command_verbose(self):
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002323 for tar_name in testtarnames:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002324 for opt in '-v', '--verbose':
Serhiy Storchaka700cfa82020-06-25 17:56:31 +03002325 out = self.tarfilecmd(opt, '-t', tar_name,
2326 PYTHONIOENCODING='utf-8')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002327 self.assertIn(b'is a tar archive.\n', out)
2328
2329 def test_test_command_invalid_file(self):
2330 zipname = support.findfile('zipdir.zip')
2331 rc, out, err = self.tarfilecmd_failure('-t', zipname)
2332 self.assertIn(b' is not a tar archive.', err)
2333 self.assertEqual(out, b'')
2334 self.assertEqual(rc, 1)
2335
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002336 for tar_name in testtarnames:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002337 with self.subTest(tar_name=tar_name):
2338 with open(tar_name, 'rb') as f:
2339 data = f.read()
2340 try:
2341 with open(tmpname, 'wb') as f:
2342 f.write(data[:511])
2343 rc, out, err = self.tarfilecmd_failure('-t', tmpname)
2344 self.assertEqual(out, b'')
2345 self.assertEqual(rc, 1)
2346 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002347 os_helper.unlink(tmpname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002348
2349 def test_list_command(self):
Serhiy Storchaka255493c2014-02-05 20:54:43 +02002350 for tar_name in testtarnames:
2351 with support.captured_stdout() as t:
2352 with tarfile.open(tar_name, 'r') as tf:
2353 tf.list(verbose=False)
2354 expected = t.getvalue().encode('ascii', 'backslashreplace')
2355 for opt in '-l', '--list':
2356 out = self.tarfilecmd(opt, tar_name,
2357 PYTHONIOENCODING='ascii')
2358 self.assertEqual(out, expected)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002359
2360 def test_list_command_verbose(self):
Serhiy Storchaka255493c2014-02-05 20:54:43 +02002361 for tar_name in testtarnames:
2362 with support.captured_stdout() as t:
2363 with tarfile.open(tar_name, 'r') as tf:
2364 tf.list(verbose=True)
2365 expected = t.getvalue().encode('ascii', 'backslashreplace')
2366 for opt in '-v', '--verbose':
2367 out = self.tarfilecmd(opt, '-l', tar_name,
2368 PYTHONIOENCODING='ascii')
2369 self.assertEqual(out, expected)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002370
2371 def test_list_command_invalid_file(self):
2372 zipname = support.findfile('zipdir.zip')
2373 rc, out, err = self.tarfilecmd_failure('-l', zipname)
2374 self.assertIn(b' is not a tar archive.', err)
2375 self.assertEqual(out, b'')
2376 self.assertEqual(rc, 1)
2377
2378 def test_create_command(self):
2379 files = [support.findfile('tokenize_tests.txt'),
2380 support.findfile('tokenize_tests-no-coding-cookie-'
2381 'and-utf8-bom-sig-only.txt')]
2382 for opt in '-c', '--create':
2383 try:
2384 out = self.tarfilecmd(opt, tmpname, *files)
2385 self.assertEqual(out, b'')
2386 with tarfile.open(tmpname) as tar:
2387 tar.getmembers()
2388 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002389 os_helper.unlink(tmpname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002390
2391 def test_create_command_verbose(self):
2392 files = [support.findfile('tokenize_tests.txt'),
2393 support.findfile('tokenize_tests-no-coding-cookie-'
2394 'and-utf8-bom-sig-only.txt')]
2395 for opt in '-v', '--verbose':
2396 try:
Serhiy Storchaka700cfa82020-06-25 17:56:31 +03002397 out = self.tarfilecmd(opt, '-c', tmpname, *files,
2398 PYTHONIOENCODING='utf-8')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002399 self.assertIn(b' file created.', out)
2400 with tarfile.open(tmpname) as tar:
2401 tar.getmembers()
2402 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002403 os_helper.unlink(tmpname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002404
2405 def test_create_command_dotless_filename(self):
2406 files = [support.findfile('tokenize_tests.txt')]
2407 try:
2408 out = self.tarfilecmd('-c', dotlessname, *files)
2409 self.assertEqual(out, b'')
2410 with tarfile.open(dotlessname) as tar:
2411 tar.getmembers()
2412 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002413 os_helper.unlink(dotlessname)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002414
2415 def test_create_command_dot_started_filename(self):
2416 tar_name = os.path.join(TEMPDIR, ".testtar")
2417 files = [support.findfile('tokenize_tests.txt')]
2418 try:
2419 out = self.tarfilecmd('-c', tar_name, *files)
2420 self.assertEqual(out, b'')
2421 with tarfile.open(tar_name) as tar:
2422 tar.getmembers()
2423 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002424 os_helper.unlink(tar_name)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002425
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002426 def test_create_command_compressed(self):
2427 files = [support.findfile('tokenize_tests.txt'),
2428 support.findfile('tokenize_tests-no-coding-cookie-'
2429 'and-utf8-bom-sig-only.txt')]
2430 for filetype in (GzipTest, Bz2Test, LzmaTest):
2431 if not filetype.open:
2432 continue
2433 try:
2434 tar_name = tmpname + '.' + filetype.suffix
2435 out = self.tarfilecmd('-c', tar_name, *files)
2436 with filetype.taropen(tar_name) as tar:
2437 tar.getmembers()
2438 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002439 os_helper.unlink(tar_name)
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002440
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002441 def test_extract_command(self):
2442 self.make_simple_tarfile(tmpname)
2443 for opt in '-e', '--extract':
2444 try:
Hai Shia7f5d932020-08-04 00:41:24 +08002445 with os_helper.temp_cwd(tarextdir):
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002446 out = self.tarfilecmd(opt, tmpname)
2447 self.assertEqual(out, b'')
2448 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002449 os_helper.rmtree(tarextdir)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002450
2451 def test_extract_command_verbose(self):
2452 self.make_simple_tarfile(tmpname)
2453 for opt in '-v', '--verbose':
2454 try:
Hai Shia7f5d932020-08-04 00:41:24 +08002455 with os_helper.temp_cwd(tarextdir):
Serhiy Storchaka700cfa82020-06-25 17:56:31 +03002456 out = self.tarfilecmd(opt, '-e', tmpname,
2457 PYTHONIOENCODING='utf-8')
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002458 self.assertIn(b' file is extracted.', out)
2459 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002460 os_helper.rmtree(tarextdir)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002461
2462 def test_extract_command_different_directory(self):
2463 self.make_simple_tarfile(tmpname)
2464 try:
Hai Shia7f5d932020-08-04 00:41:24 +08002465 with os_helper.temp_cwd(tarextdir):
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002466 out = self.tarfilecmd('-e', tmpname, 'spamdir')
2467 self.assertEqual(out, b'')
2468 finally:
Hai Shia7f5d932020-08-04 00:41:24 +08002469 os_helper.rmtree(tarextdir)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002470
2471 def test_extract_command_invalid_file(self):
2472 zipname = support.findfile('zipdir.zip')
Hai Shia7f5d932020-08-04 00:41:24 +08002473 with os_helper.temp_cwd(tarextdir):
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002474 rc, out, err = self.tarfilecmd_failure('-e', zipname)
2475 self.assertIn(b' is not a tar archive.', err)
2476 self.assertEqual(out, b'')
2477 self.assertEqual(rc, 1)
2478
2479
Lars Gustäbel01385812010-03-03 12:08:54 +00002480class ContextManagerTest(unittest.TestCase):
2481
2482 def test_basic(self):
2483 with tarfile.open(tarname) as tar:
2484 self.assertFalse(tar.closed, "closed inside runtime context")
2485 self.assertTrue(tar.closed, "context manager failed")
2486
2487 def test_closed(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002488 # The __enter__() method is supposed to raise OSError
Lars Gustäbel01385812010-03-03 12:08:54 +00002489 # if the TarFile object is already closed.
2490 tar = tarfile.open(tarname)
2491 tar.close()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002492 with self.assertRaises(OSError):
Lars Gustäbel01385812010-03-03 12:08:54 +00002493 with tar:
2494 pass
2495
2496 def test_exception(self):
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002497 # Test if the OSError exception is passed through properly.
Lars Gustäbel01385812010-03-03 12:08:54 +00002498 with self.assertRaises(Exception) as exc:
2499 with tarfile.open(tarname) as tar:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002500 raise OSError
2501 self.assertIsInstance(exc.exception, OSError,
Lars Gustäbel01385812010-03-03 12:08:54 +00002502 "wrong exception raised in context manager")
2503 self.assertTrue(tar.closed, "context manager failed")
2504
2505 def test_no_eof(self):
2506 # __exit__() must not write end-of-archive blocks if an
2507 # exception was raised.
2508 try:
2509 with tarfile.open(tmpname, "w") as tar:
2510 raise Exception
2511 except:
2512 pass
2513 self.assertEqual(os.path.getsize(tmpname), 0,
2514 "context manager wrote an end-of-archive block")
2515 self.assertTrue(tar.closed, "context manager failed")
2516
2517 def test_eof(self):
2518 # __exit__() must write end-of-archive blocks, i.e. call
2519 # TarFile.close() if there was no error.
2520 with tarfile.open(tmpname, "w"):
2521 pass
2522 self.assertNotEqual(os.path.getsize(tmpname), 0,
2523 "context manager wrote no end-of-archive block")
2524
2525 def test_fileobj(self):
2526 # Test that __exit__() did not close the external file
2527 # object.
Antoine Pitrou95f55602010-09-23 18:36:46 +00002528 with open(tmpname, "wb") as fobj:
2529 try:
2530 with tarfile.open(fileobj=fobj, mode="w") as tar:
2531 raise Exception
2532 except:
2533 pass
2534 self.assertFalse(fobj.closed, "external file object was closed")
2535 self.assertTrue(tar.closed, "context manager failed")
Lars Gustäbel01385812010-03-03 12:08:54 +00002536
2537
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002538@unittest.skipIf(hasattr(os, "link"), "requires os.link to be missing")
2539class LinkEmulationTest(ReadTest, unittest.TestCase):
Lars Gustäbel1b512722010-06-03 12:45:16 +00002540
2541 # Test for issue #8741 regression. On platforms that do not support
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002542 # symbolic or hard links tarfile tries to extract these types of members
2543 # as the regular files they point to.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002544 def _test_link_extraction(self, name):
2545 self.tar.extract(name, TEMPDIR)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002546 with open(os.path.join(TEMPDIR, name), "rb") as f:
2547 data = f.read()
Christian Heimesc64a1a62019-09-25 16:30:20 +02002548 self.assertEqual(sha256sum(data), sha256_regtype)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002549
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002550 # See issues #1578269, #8879, and #17689 for some history on these skips
Brian Curtind40e6f72010-07-08 21:39:08 +00002551 @unittest.skipIf(hasattr(os.path, "islink"),
2552 "Skip emulation - has os.path.islink but not os.link")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002553 def test_hardlink_extraction1(self):
2554 self._test_link_extraction("ustar/lnktype")
2555
Brian Curtind40e6f72010-07-08 21:39:08 +00002556 @unittest.skipIf(hasattr(os.path, "islink"),
2557 "Skip emulation - has os.path.islink but not os.link")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002558 def test_hardlink_extraction2(self):
2559 self._test_link_extraction("./ustar/linktest2/lnktype")
2560
Brian Curtin74e45612010-07-09 15:58:59 +00002561 @unittest.skipIf(hasattr(os, "symlink"),
2562 "Skip emulation if symlink exists")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002563 def test_symlink_extraction1(self):
2564 self._test_link_extraction("ustar/symtype")
2565
Brian Curtin74e45612010-07-09 15:58:59 +00002566 @unittest.skipIf(hasattr(os, "symlink"),
2567 "Skip emulation if symlink exists")
Lars Gustäbel1b512722010-06-03 12:45:16 +00002568 def test_symlink_extraction2(self):
2569 self._test_link_extraction("./ustar/linktest2/symtype")
2570
2571
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002572class Bz2PartialReadTest(Bz2Test, unittest.TestCase):
Lars Gustäbel42e00912009-03-22 20:34:29 +00002573 # Issue5068: The _BZ2Proxy.read() method loops forever
2574 # on an empty or partial bzipped file.
2575
2576 def _test_partial_input(self, mode):
2577 class MyBytesIO(io.BytesIO):
2578 hit_eof = False
2579 def read(self, n):
2580 if self.hit_eof:
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002581 raise AssertionError("infinite loop detected in "
2582 "tarfile.open()")
Lars Gustäbel42e00912009-03-22 20:34:29 +00002583 self.hit_eof = self.tell() == len(self.getvalue())
2584 return super(MyBytesIO, self).read(n)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002585 def seek(self, *args):
2586 self.hit_eof = False
2587 return super(MyBytesIO, self).seek(*args)
Lars Gustäbel42e00912009-03-22 20:34:29 +00002588
2589 data = bz2.compress(tarfile.TarInfo("foo").tobuf())
2590 for x in range(len(data) + 1):
Lars Gustäbel9520a432009-11-22 18:48:49 +00002591 try:
2592 tarfile.open(fileobj=MyBytesIO(data[:x]), mode=mode)
2593 except tarfile.ReadError:
2594 pass # we have no interest in ReadErrors
Lars Gustäbel42e00912009-03-22 20:34:29 +00002595
2596 def test_partial_input(self):
2597 self._test_partial_input("r")
2598
2599 def test_partial_input_bz2(self):
2600 self._test_partial_input("r:bz2")
2601
2602
Eric V. Smith7a803892015-04-15 10:27:58 -04002603def root_is_uid_gid_0():
2604 try:
2605 import pwd, grp
2606 except ImportError:
2607 return False
2608 if pwd.getpwuid(0)[0] != 'root':
2609 return False
2610 if grp.getgrgid(0)[0] != 'root':
2611 return False
2612 return True
2613
2614
Zachary Waread3e27a2015-05-12 23:57:21 -05002615@unittest.skipUnless(hasattr(os, 'chown'), "missing os.chown")
2616@unittest.skipUnless(hasattr(os, 'geteuid'), "missing os.geteuid")
Eric V. Smith7a803892015-04-15 10:27:58 -04002617class NumericOwnerTest(unittest.TestCase):
2618 # mock the following:
2619 # os.chown: so we can test what's being called
2620 # os.chmod: so the modes are not actually changed. if they are, we can't
2621 # delete the files/directories
2622 # os.geteuid: so we can lie and say we're root (uid = 0)
2623
2624 @staticmethod
2625 def _make_test_archive(filename_1, dirname_1, filename_2):
2626 # the file contents to write
2627 fobj = io.BytesIO(b"content")
2628
2629 # create a tar file with a file, a directory, and a file within that
2630 # directory. Assign various .uid/.gid values to them
2631 items = [(filename_1, 99, 98, tarfile.REGTYPE, fobj),
2632 (dirname_1, 77, 76, tarfile.DIRTYPE, None),
2633 (filename_2, 88, 87, tarfile.REGTYPE, fobj),
2634 ]
2635 with tarfile.open(tmpname, 'w') as tarfl:
2636 for name, uid, gid, typ, contents in items:
2637 t = tarfile.TarInfo(name)
2638 t.uid = uid
2639 t.gid = gid
2640 t.uname = 'root'
2641 t.gname = 'root'
2642 t.type = typ
2643 tarfl.addfile(t, contents)
2644
2645 # return the full pathname to the tar file
2646 return tmpname
2647
2648 @staticmethod
2649 @contextmanager
2650 def _setup_test(mock_geteuid):
2651 mock_geteuid.return_value = 0 # lie and say we're root
2652 fname = 'numeric-owner-testfile'
2653 dirname = 'dir'
2654
2655 # the names we want stored in the tarfile
2656 filename_1 = fname
2657 dirname_1 = dirname
2658 filename_2 = os.path.join(dirname, fname)
2659
2660 # create the tarfile with the contents we're after
2661 tar_filename = NumericOwnerTest._make_test_archive(filename_1,
2662 dirname_1,
2663 filename_2)
2664
2665 # open the tarfile for reading. yield it and the names of the items
2666 # we stored into the file
2667 with tarfile.open(tar_filename) as tarfl:
2668 yield tarfl, filename_1, dirname_1, filename_2
2669
2670 @unittest.mock.patch('os.chown')
2671 @unittest.mock.patch('os.chmod')
2672 @unittest.mock.patch('os.geteuid')
2673 def test_extract_with_numeric_owner(self, mock_geteuid, mock_chmod,
2674 mock_chown):
2675 with self._setup_test(mock_geteuid) as (tarfl, filename_1, _,
2676 filename_2):
2677 tarfl.extract(filename_1, TEMPDIR, numeric_owner=True)
2678 tarfl.extract(filename_2 , TEMPDIR, numeric_owner=True)
2679
2680 # convert to filesystem paths
2681 f_filename_1 = os.path.join(TEMPDIR, filename_1)
2682 f_filename_2 = os.path.join(TEMPDIR, filename_2)
2683
2684 mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
2685 unittest.mock.call(f_filename_2, 88, 87),
2686 ],
2687 any_order=True)
2688
2689 @unittest.mock.patch('os.chown')
2690 @unittest.mock.patch('os.chmod')
2691 @unittest.mock.patch('os.geteuid')
2692 def test_extractall_with_numeric_owner(self, mock_geteuid, mock_chmod,
2693 mock_chown):
2694 with self._setup_test(mock_geteuid) as (tarfl, filename_1, dirname_1,
2695 filename_2):
2696 tarfl.extractall(TEMPDIR, numeric_owner=True)
2697
2698 # convert to filesystem paths
2699 f_filename_1 = os.path.join(TEMPDIR, filename_1)
2700 f_dirname_1 = os.path.join(TEMPDIR, dirname_1)
2701 f_filename_2 = os.path.join(TEMPDIR, filename_2)
2702
2703 mock_chown.assert_has_calls([unittest.mock.call(f_filename_1, 99, 98),
2704 unittest.mock.call(f_dirname_1, 77, 76),
2705 unittest.mock.call(f_filename_2, 88, 87),
2706 ],
2707 any_order=True)
2708
2709 # this test requires that uid=0 and gid=0 really be named 'root'. that's
2710 # because the uname and gname in the test file are 'root', and extract()
2711 # will look them up using pwd and grp to find their uid and gid, which we
2712 # test here to be 0.
2713 @unittest.skipUnless(root_is_uid_gid_0(),
2714 'uid=0,gid=0 must be named "root"')
2715 @unittest.mock.patch('os.chown')
2716 @unittest.mock.patch('os.chmod')
2717 @unittest.mock.patch('os.geteuid')
2718 def test_extract_without_numeric_owner(self, mock_geteuid, mock_chmod,
2719 mock_chown):
2720 with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
2721 tarfl.extract(filename_1, TEMPDIR, numeric_owner=False)
2722
2723 # convert to filesystem paths
2724 f_filename_1 = os.path.join(TEMPDIR, filename_1)
2725
2726 mock_chown.assert_called_with(f_filename_1, 0, 0)
2727
2728 @unittest.mock.patch('os.geteuid')
2729 def test_keyword_only(self, mock_geteuid):
2730 with self._setup_test(mock_geteuid) as (tarfl, filename_1, _, _):
2731 self.assertRaises(TypeError,
2732 tarfl.extract, filename_1, TEMPDIR, False, True)
2733
2734
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002735def setUpModule():
Hai Shia7f5d932020-08-04 00:41:24 +08002736 os_helper.unlink(TEMPDIR)
Antoine Pitrou941ee882009-11-11 20:59:38 +00002737 os.makedirs(TEMPDIR)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002738
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002739 global testtarnames
2740 testtarnames = [tarname]
Antoine Pitrou95f55602010-09-23 18:36:46 +00002741 with open(tarname, "rb") as fobj:
2742 data = fobj.read()
Neal Norwitza4f651a2004-07-20 22:07:44 +00002743
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002744 # Create compressed tarfiles.
2745 for c in GzipTest, Bz2Test, LzmaTest:
2746 if c.open:
Hai Shia7f5d932020-08-04 00:41:24 +08002747 os_helper.unlink(c.tarname)
Serhiy Storchaka5e8c8092013-11-24 02:30:59 +02002748 testtarnames.append(c.tarname)
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002749 with c.open(c.tarname, "wb") as tar:
2750 tar.write(data)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002751
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002752def tearDownModule():
2753 if os.path.exists(TEMPDIR):
Hai Shia7f5d932020-08-04 00:41:24 +08002754 os_helper.rmtree(TEMPDIR)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002755
Neal Norwitz996acf12003-02-17 14:51:41 +00002756if __name__ == "__main__":
Serhiy Storchaka8b562922013-06-17 15:38:50 +03002757 unittest.main()