blob: 5d4c86ce36662d82c304382e4f0c831ab5f51e2a [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020041from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000042import sys
43import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020044import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import shutil
46import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000047import time
48import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000049import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000050import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000051
52try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040054except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010055 pwd = None
56try:
57 import grp
58except ImportError:
59 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060
Brian Curtin16633fa2010-07-09 13:54:27 +000061# os.symlink on Windows prior to 6.0 raises NotImplementedError
62symlink_exception = (AttributeError, NotImplementedError)
63try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000065 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020066 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000067except NameError:
68 pass
69
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000071__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
72 "CompressionError", "StreamError", "ExtractError", "HeaderError",
73 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
74 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
76#---------------------------------------------------------
77# tar constants
78#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000079NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000080BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000081RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000082GNU_MAGIC = b"ustar \0" # magic gnu tar string
83POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Guido van Rossumd8faa362007-04-27 19:54:29 +000085LENGTH_NAME = 100 # maximum length of a filename
86LENGTH_LINK = 100 # maximum length of a linkname
87LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000088
Lars Gustäbelb506dc32007-08-07 18:36:16 +000089REGTYPE = b"0" # regular file
90AREGTYPE = b"\0" # regular file
91LNKTYPE = b"1" # link (inside tarfile)
92SYMTYPE = b"2" # symbolic link
93CHRTYPE = b"3" # character special device
94BLKTYPE = b"4" # block special device
95DIRTYPE = b"5" # directory
96FIFOTYPE = b"6" # fifo special device
97CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
Lars Gustäbelb506dc32007-08-07 18:36:16 +000099GNUTYPE_LONGNAME = b"L" # GNU tar longname
100GNUTYPE_LONGLINK = b"K" # GNU tar longlink
101GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000103XHDTYPE = b"x" # POSIX.1-2001 extended header
104XGLTYPE = b"g" # POSIX.1-2001 global header
105SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000106
107USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
108GNU_FORMAT = 1 # GNU tar format
109PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
110DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000111
112#---------------------------------------------------------
113# tarfile constants
114#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000115# File types that tarfile supports:
116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
117 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000118 CONTTYPE, CHRTYPE, BLKTYPE,
119 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120 GNUTYPE_SPARSE)
121
Guido van Rossumd8faa362007-04-27 19:54:29 +0000122# File types that will be treated as a regular file.
123REGULAR_TYPES = (REGTYPE, AREGTYPE,
124 CONTTYPE, GNUTYPE_SPARSE)
125
126# File types that are part of the GNU tar format.
127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
128 GNUTYPE_SPARSE)
129
130# Fields from a pax header that override a TarInfo attribute.
131PAX_FIELDS = ("path", "linkpath", "size", "mtime",
132 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000133
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000134# Fields from a pax header that are affected by hdrcharset.
135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
136
Guido van Rossume7ba4952007-06-06 23:52:48 +0000137# Fields in a pax header that are numbers, all other fields
138# are treated as strings.
139PAX_NUMBER_FIELDS = {
140 "atime": float,
141 "ctime": float,
142 "mtime": float,
143 "uid": int,
144 "gid": int,
145 "size": int
146}
147
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000148#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000149# initialization
150#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700151if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000152 ENCODING = "utf-8"
153else:
154 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000155
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157# Some useful functions
158#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000159
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000160def stn(s, length, encoding, errors):
161 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000162 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000163 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000164 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000165
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000166def nts(s, encoding, errors):
167 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000168 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000169 p = s.find(b"\0")
170 if p != -1:
171 s = s[:p]
172 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000173
Thomas Wouters477c8d52006-05-27 19:21:47 +0000174def nti(s):
175 """Convert a number field to a python number.
176 """
177 # There are two possible encodings for a number field, see
178 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200179 if s[0] in (0o200, 0o377):
180 n = 0
181 for i in range(len(s) - 1):
182 n <<= 8
183 n += s[i + 1]
184 if s[0] == 0o377:
185 n = -(256 ** (len(s) - 1) - n)
186 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000187 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200188 s = nts(s, "ascii", "strict")
189 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000190 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000191 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000192 return n
193
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000195 """Convert a python number to a number field.
196 """
197 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
198 # octal digits followed by a null-byte, this allows values up to
199 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200200 # that if necessary. A leading 0o200 or 0o377 byte indicate this
201 # particular encoding, the following digits-1 bytes are a big-endian
202 # base-256 representation. This allows values up to (256**(digits-1))-1.
203 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
204 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800206 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200207 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
208 if n >= 0:
209 s = bytearray([0o200])
210 else:
211 s = bytearray([0o377])
212 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213
Guido van Rossum805365e2007-05-07 22:24:25 +0000214 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200215 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000216 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200217 else:
218 raise ValueError("overflow in number field")
219
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 return s
221
222def calc_chksums(buf):
223 """Calculate the checksum for a member's header by summing up all
224 characters except for the chksum field which is treated as if
225 it was filled with spaces. According to the GNU tar sources,
226 some tars (Sun and NeXT) calculate chksum with signed char,
227 which will be different if there are chars in the buffer with
228 the high bit set. So we calculate two checksums, unsigned and
229 signed.
230 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200231 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
232 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000233 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000234
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700235def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000236 """Copy length bytes from fileobj src to fileobj dst.
237 If length is None, copy the entire content.
238 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700239 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000240 if length == 0:
241 return
242 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700243 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244 return
245
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700246 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000247 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700248 buf = src.read(bufsize)
249 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200250 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251 dst.write(buf)
252
253 if remainder != 0:
254 buf = src.read(remainder)
255 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200256 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257 dst.write(buf)
258 return
259
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200261 """Deprecated in this location; use stat.filemode."""
262 import warnings
263 warnings.warn("deprecated in favor of stat.filemode",
264 DeprecationWarning, 2)
265 return stat.filemode(mode)
266
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200267def _safe_print(s):
268 encoding = getattr(sys.stdout, 'encoding', None)
269 if encoding is not None:
270 s = s.encode(encoding, 'backslashreplace').decode(encoding)
271 print(s, end=' ')
272
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000273
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274class TarError(Exception):
275 """Base exception."""
276 pass
277class ExtractError(TarError):
278 """General exception for extract errors."""
279 pass
280class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300281 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000282 pass
283class CompressionError(TarError):
284 """Exception for unavailable compression methods."""
285 pass
286class StreamError(TarError):
287 """Exception for unsupported operations on stream-like TarFiles."""
288 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000289class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000290 """Base exception for header errors."""
291 pass
292class EmptyHeaderError(HeaderError):
293 """Exception for empty headers."""
294 pass
295class TruncatedHeaderError(HeaderError):
296 """Exception for truncated headers."""
297 pass
298class EOFHeaderError(HeaderError):
299 """Exception for end of file headers."""
300 pass
301class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000302 """Exception for invalid headers."""
303 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000304class SubsequentHeaderError(HeaderError):
305 """Exception for missing and invalid extended headers."""
306 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000307
308#---------------------------
309# internal stream interface
310#---------------------------
311class _LowLevelFile:
312 """Low-level file object. Supports reading and writing.
313 It is used instead of a regular file object for streaming
314 access.
315 """
316
317 def __init__(self, name, mode):
318 mode = {
319 "r": os.O_RDONLY,
320 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
321 }[mode]
322 if hasattr(os, "O_BINARY"):
323 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000324 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325
326 def close(self):
327 os.close(self.fd)
328
329 def read(self, size):
330 return os.read(self.fd, size)
331
332 def write(self, s):
333 os.write(self.fd, s)
334
335class _Stream:
336 """Class that serves as an adapter between TarFile and
337 a stream-like object. The stream-like object only
338 needs to have a read() or write() method and is accessed
339 blockwise. Use of gzip or bzip2 compression is possible.
340 A stream-like object could be for example: sys.stdin,
341 sys.stdout, a socket, a tape device etc.
342
343 _Stream is intended to be used only internally.
344 """
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 """Construct a _Stream object.
348 """
349 self._extfileobj = True
350 if fileobj is None:
351 fileobj = _LowLevelFile(name, mode)
352 self._extfileobj = False
353
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000354 if comptype == '*':
355 # Enable transparent compression detection for the
356 # stream interface
357 fileobj = _StreamProxy(fileobj)
358 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 self.name = name or ""
361 self.mode = mode
362 self.comptype = comptype
363 self.fileobj = fileobj
364 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000365 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000366 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000367 self.closed = False
368
Antoine Pitrou605c2932010-09-23 20:15:14 +0000369 try:
370 if comptype == "gz":
371 try:
372 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400373 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000374 raise CompressionError("zlib module is not available")
375 self.zlib = zlib
376 self.crc = zlib.crc32(b"")
377 if mode == "r":
378 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100379 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 else:
381 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000382
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100383 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 try:
385 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400386 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000387 raise CompressionError("bz2 module is not available")
388 if mode == "r":
389 self.dbuf = b""
390 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200391 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000392 else:
393 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100394
395 elif comptype == "xz":
396 try:
397 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400398 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100399 raise CompressionError("lzma module is not available")
400 if mode == "r":
401 self.dbuf = b""
402 self.cmp = lzma.LZMADecompressor()
403 self.exception = lzma.LZMAError
404 else:
405 self.cmp = lzma.LZMACompressor()
406
407 elif comptype != "tar":
408 raise CompressionError("unknown compression type %r" % comptype)
409
Antoine Pitrou605c2932010-09-23 20:15:14 +0000410 except:
411 if not self._extfileobj:
412 self.fileobj.close()
413 self.closed = True
414 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415
416 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418 self.close()
419
420 def _init_write_gz(self):
421 """Initialize for writing with gzip compression.
422 """
423 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
424 -self.zlib.MAX_WBITS,
425 self.zlib.DEF_MEM_LEVEL,
426 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000427 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000428 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 if self.name.endswith(".gz"):
430 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000431 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
432 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433
434 def write(self, s):
435 """Write string s to the stream.
436 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000437 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000438 self.crc = self.zlib.crc32(s, self.crc)
439 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000440 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 s = self.cmp.compress(s)
442 self.__write(s)
443
444 def __write(self, s):
445 """Write string s to the stream if a whole new block
446 is ready to be written.
447 """
448 self.buf += s
449 while len(self.buf) > self.bufsize:
450 self.fileobj.write(self.buf[:self.bufsize])
451 self.buf = self.buf[self.bufsize:]
452
453 def close(self):
454 """Close the _Stream object. No operation should be
455 done on it afterwards.
456 """
457 if self.closed:
458 return
459
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000460 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300461 try:
462 if self.mode == "w" and self.comptype != "tar":
463 self.buf += self.cmp.flush()
464
465 if self.mode == "w" and self.buf:
466 self.fileobj.write(self.buf)
467 self.buf = b""
468 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000469 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300470 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
471 finally:
472 if not self._extfileobj:
473 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474
475 def _init_read_gz(self):
476 """Initialize for reading a gzip compressed fileobj.
477 """
478 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000479 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000480
481 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000482 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000483 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000484 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000485 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486
487 flag = ord(self.__read(1))
488 self.__read(6)
489
490 if flag & 4:
491 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
492 self.read(xlen)
493 if flag & 8:
494 while True:
495 s = self.__read(1)
496 if not s or s == NUL:
497 break
498 if flag & 16:
499 while True:
500 s = self.__read(1)
501 if not s or s == NUL:
502 break
503 if flag & 2:
504 self.__read(2)
505
506 def tell(self):
507 """Return the stream's file pointer position.
508 """
509 return self.pos
510
511 def seek(self, pos=0):
512 """Set the stream's file pointer to pos. Negative seeking
513 is forbidden.
514 """
515 if pos - self.pos >= 0:
516 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000517 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 self.read(self.bufsize)
519 self.read(remainder)
520 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000521 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000522 return self.pos
523
524 def read(self, size=None):
525 """Return the next size number of bytes from the stream.
526 If size is not defined, return all bytes of the stream
527 up to EOF.
528 """
529 if size is None:
530 t = []
531 while True:
532 buf = self._read(self.bufsize)
533 if not buf:
534 break
535 t.append(buf)
536 buf = "".join(t)
537 else:
538 buf = self._read(size)
539 self.pos += len(buf)
540 return buf
541
542 def _read(self, size):
543 """Return size bytes from the stream.
544 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000545 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000546 return self.__read(size)
547
548 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549 while c < size:
550 buf = self.__read(self.bufsize)
551 if not buf:
552 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 try:
554 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100555 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000557 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000559 buf = self.dbuf[:size]
560 self.dbuf = self.dbuf[size:]
561 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562
563 def __read(self, size):
564 """Return size bytes from stream. If internal buffer is empty,
565 read another block from the stream.
566 """
567 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000568 while c < size:
569 buf = self.fileobj.read(self.bufsize)
570 if not buf:
571 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000572 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000573 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000574 buf = self.buf[:size]
575 self.buf = self.buf[size:]
576 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000577# class _Stream
578
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000579class _StreamProxy(object):
580 """Small proxy class that enables transparent compression
581 detection for the Stream interface (mode 'r|*').
582 """
583
584 def __init__(self, fileobj):
585 self.fileobj = fileobj
586 self.buf = self.fileobj.read(BLOCKSIZE)
587
588 def read(self, size):
589 self.read = self.fileobj.read
590 return self.buf
591
592 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100593 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000594 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100595 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000596 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100597 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
598 return "xz"
599 else:
600 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000601
602 def close(self):
603 self.fileobj.close()
604# class StreamProxy
605
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000606#------------------------
607# Extraction file object
608#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000609class _FileInFile(object):
610 """A thin wrapper around an existing file object that
611 provides a part of its data as an individual file
612 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000613 """
614
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000615 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000616 self.fileobj = fileobj
617 self.offset = offset
618 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000619 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200620 self.name = getattr(fileobj, "name", None)
621 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000623 if blockinfo is None:
624 blockinfo = [(0, size)]
625
626 # Construct a map with data and zero blocks.
627 self.map_index = 0
628 self.map = []
629 lastpos = 0
630 realpos = self.offset
631 for offset, size in blockinfo:
632 if offset > lastpos:
633 self.map.append((False, lastpos, offset, None))
634 self.map.append((True, offset, offset + size, realpos))
635 realpos += size
636 lastpos = offset + size
637 if lastpos < self.size:
638 self.map.append((False, lastpos, self.size, None))
639
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200640 def flush(self):
641 pass
642
643 def readable(self):
644 return True
645
646 def writable(self):
647 return False
648
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000649 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000650 return self.fileobj.seekable()
651
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000652 def tell(self):
653 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000654 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000655 return self.position
656
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200657 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000658 """Seek to a position in the file.
659 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200660 if whence == io.SEEK_SET:
661 self.position = min(max(position, 0), self.size)
662 elif whence == io.SEEK_CUR:
663 if position < 0:
664 self.position = max(self.position + position, 0)
665 else:
666 self.position = min(self.position + position, self.size)
667 elif whence == io.SEEK_END:
668 self.position = max(min(self.size + position, self.size), 0)
669 else:
670 raise ValueError("Invalid argument")
671 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000672
673 def read(self, size=None):
674 """Read data from the file.
675 """
676 if size is None:
677 size = self.size - self.position
678 else:
679 size = min(size, self.size - self.position)
680
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000681 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000682 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000683 while True:
684 data, start, stop, offset = self.map[self.map_index]
685 if start <= self.position < stop:
686 break
687 else:
688 self.map_index += 1
689 if self.map_index == len(self.map):
690 self.map_index = 0
691 length = min(size, stop - self.position)
692 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000693 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200694 b = self.fileobj.read(length)
695 if len(b) != length:
696 raise ReadError("unexpected end of data")
697 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000698 else:
699 buf += NUL * length
700 size -= length
701 self.position += length
702 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000703
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200704 def readinto(self, b):
705 buf = self.read(len(b))
706 b[:len(buf)] = buf
707 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708
709 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200711#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000712
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200713class ExFileObject(io.BufferedReader):
714
715 def __init__(self, tarfile, tarinfo):
716 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
717 tarinfo.size, tarinfo.sparse)
718 super().__init__(fileobj)
719#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000720
721#------------------
722# Exported Classes
723#------------------
724class TarInfo(object):
725 """Informational class which holds the details about an
726 archive member given by a tar header block.
727 TarInfo objects are returned by TarFile.getmember(),
728 TarFile.getmembers() and TarFile.gettarinfo() and are
729 usually created internally.
730 """
731
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000732 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
733 "chksum", "type", "linkname", "uname", "gname",
734 "devmajor", "devminor",
735 "offset", "offset_data", "pax_headers", "sparse",
736 "tarfile", "_sparse_structs", "_link_target")
737
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000738 def __init__(self, name=""):
739 """Construct a TarInfo object. name is the optional name
740 of the member.
741 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000742 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000743 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000744 self.uid = 0 # user id
745 self.gid = 0 # group id
746 self.size = 0 # file size
747 self.mtime = 0 # modification time
748 self.chksum = 0 # header checksum
749 self.type = REGTYPE # member type
750 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000751 self.uname = "" # user name
752 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000753 self.devmajor = 0 # device major number
754 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755
Thomas Wouters477c8d52006-05-27 19:21:47 +0000756 self.offset = 0 # the tar header starts here
757 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000759 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760 self.pax_headers = {} # pax header information
761
762 # In pax headers the "name" and "linkname" field are called
763 # "path" and "linkpath".
764 def _getpath(self):
765 return self.name
766 def _setpath(self, name):
767 self.name = name
768 path = property(_getpath, _setpath)
769
770 def _getlinkpath(self):
771 return self.linkname
772 def _setlinkpath(self, linkname):
773 self.linkname = linkname
774 linkpath = property(_getlinkpath, _setlinkpath)
775
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000776 def __repr__(self):
777 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
778
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000779 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780 """Return the TarInfo's attributes as a dictionary.
781 """
782 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000783 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000784 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785 "uid": self.uid,
786 "gid": self.gid,
787 "size": self.size,
788 "mtime": self.mtime,
789 "chksum": self.chksum,
790 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000791 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000792 "uname": self.uname,
793 "gname": self.gname,
794 "devmajor": self.devmajor,
795 "devminor": self.devminor
796 }
797
798 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
799 info["name"] += "/"
800
801 return info
802
Victor Stinnerde629d42010-05-05 21:43:57 +0000803 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 """Return a tar header as a string of 512 byte blocks.
805 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000806 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000807
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000809 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000811 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000813 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 else:
815 raise ValueError("invalid format")
816
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000817 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818 """Return the object as a ustar header block.
819 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000820 info["magic"] = POSIX_MAGIC
821
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200822 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 raise ValueError("linkname is too long")
824
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200825 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
826 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000828 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831 """Return the object as a GNU header block sequence.
832 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 info["magic"] = GNU_MAGIC
834
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000835 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200836 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000837 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200839 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000840 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000842 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000844 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 """Return the object as a ustar header block. If it cannot be
846 represented this way, prepend a pax extended header sequence
847 with supplement information.
848 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000849 info["magic"] = POSIX_MAGIC
850 pax_headers = self.pax_headers.copy()
851
852 # Test string fields for values that exceed the field length or cannot
853 # be represented in ASCII encoding.
854 for name, hname, length in (
855 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
856 ("uname", "uname", 32), ("gname", "gname", 32)):
857
Guido van Rossume7ba4952007-06-06 23:52:48 +0000858 if hname in pax_headers:
859 # The pax header has priority.
860 continue
861
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862 # Try to encode the string as ASCII.
863 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000864 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000866 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867 continue
868
Guido van Rossume7ba4952007-06-06 23:52:48 +0000869 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000870 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871
872 # Test number fields for values that exceed the field limit or values
873 # that like to be stored as float.
874 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000875 if name in pax_headers:
876 # The pax header has priority. Avoid overflow.
877 info[name] = 0
878 continue
879
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 val = info[name]
881 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000882 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883 info[name] = 0
884
Guido van Rossume7ba4952007-06-06 23:52:48 +0000885 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000887 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000889 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000891 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892
893 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000894 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895 """Return the object as a pax global header block sequence.
896 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000897 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200899 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 """Split a name longer than 100 chars into a prefix
901 and a name part.
902 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200903 components = name.split("/")
904 for i in range(1, len(components)):
905 prefix = "/".join(components[:i])
906 name = "/".join(components[i:])
907 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
908 len(name.encode(encoding, errors)) <= LENGTH_NAME:
909 break
910 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200912
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 return prefix, name
914
915 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000916 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917 """Return a header block. info is a dictionary with file
918 information, format must be one of the *_FORMAT constants.
919 """
920 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000921 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000922 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 itn(info.get("uid", 0), 8, format),
924 itn(info.get("gid", 0), 8, format),
925 itn(info.get("size", 0), 12, format),
926 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000927 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 stn(info.get("linkname", ""), 100, encoding, errors),
930 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000931 stn(info.get("uname", ""), 32, encoding, errors),
932 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933 itn(info.get("devmajor", 0), 8, format),
934 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000935 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 ]
937
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000938 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000940 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 return buf
942
943 @staticmethod
944 def _create_payload(payload):
945 """Return the string payload filled with zero bytes
946 up to the next 512 byte border.
947 """
948 blocks, remainder = divmod(len(payload), BLOCKSIZE)
949 if remainder > 0:
950 payload += (BLOCKSIZE - remainder) * NUL
951 return payload
952
953 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000954 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
956 for name.
957 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000958 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959
960 info = {}
961 info["name"] = "././@LongLink"
962 info["type"] = type
963 info["size"] = len(name)
964 info["magic"] = GNU_MAGIC
965
966 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000967 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 cls._create_payload(name)
969
970 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000971 def _create_pax_generic_header(cls, pax_headers, type, encoding):
972 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000973 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000974 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000975 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000976 # Check if one of the fields contains surrogate characters and thereby
977 # forces hdrcharset=BINARY, see _proc_pax() for more information.
978 binary = False
979 for keyword, value in pax_headers.items():
980 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000981 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000982 except UnicodeEncodeError:
983 binary = True
984 break
985
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000986 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000987 if binary:
988 # Put the hdrcharset field at the beginning of the header.
989 records += b"21 hdrcharset=BINARY\n"
990
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000992 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000993 if binary:
994 # Try to restore the original byte representation of `value'.
995 # Needless to say, that the encoding must match the string.
996 value = value.encode(encoding, "surrogateescape")
997 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000998 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000999
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1001 n = p = 0
1002 while True:
1003 n = l + len(str(p))
1004 if n == p:
1005 break
1006 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001007 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001008
1009 # We use a hardcoded "././@PaxHeader" name like star does
1010 # instead of the one that POSIX recommends.
1011 info = {}
1012 info["name"] = "././@PaxHeader"
1013 info["type"] = type
1014 info["size"] = len(records)
1015 info["magic"] = POSIX_MAGIC
1016
1017 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001018 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001019 cls._create_payload(records)
1020
Guido van Rossum75b64e62005-01-16 00:16:11 +00001021 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001022 def frombuf(cls, buf, encoding, errors):
1023 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001024 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001025 if len(buf) == 0:
1026 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001028 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001029 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001030 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001031
1032 chksum = nti(buf[148:156])
1033 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001034 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001035
Guido van Rossumd8faa362007-04-27 19:54:29 +00001036 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001037 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001038 obj.mode = nti(buf[100:108])
1039 obj.uid = nti(buf[108:116])
1040 obj.gid = nti(buf[116:124])
1041 obj.size = nti(buf[124:136])
1042 obj.mtime = nti(buf[136:148])
1043 obj.chksum = chksum
1044 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001045 obj.linkname = nts(buf[157:257], encoding, errors)
1046 obj.uname = nts(buf[265:297], encoding, errors)
1047 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001048 obj.devmajor = nti(buf[329:337])
1049 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001050 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001051
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 # Old V7 tar format represents a directory as a regular
1053 # file with a trailing slash.
1054 if obj.type == AREGTYPE and obj.name.endswith("/"):
1055 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001056
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001057 # The old GNU sparse format occupies some of the unused
1058 # space in the buffer for up to 4 sparse structures.
1059 # Save the them for later processing in _proc_sparse().
1060 if obj.type == GNUTYPE_SPARSE:
1061 pos = 386
1062 structs = []
1063 for i in range(4):
1064 try:
1065 offset = nti(buf[pos:pos + 12])
1066 numbytes = nti(buf[pos + 12:pos + 24])
1067 except ValueError:
1068 break
1069 structs.append((offset, numbytes))
1070 pos += 24
1071 isextended = bool(buf[482])
1072 origsize = nti(buf[483:495])
1073 obj._sparse_structs = (structs, isextended, origsize)
1074
Guido van Rossumd8faa362007-04-27 19:54:29 +00001075 # Remove redundant slashes from directories.
1076 if obj.isdir():
1077 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001078
Guido van Rossumd8faa362007-04-27 19:54:29 +00001079 # Reconstruct a ustar longname.
1080 if prefix and obj.type not in GNU_TYPES:
1081 obj.name = prefix + "/" + obj.name
1082 return obj
1083
1084 @classmethod
1085 def fromtarfile(cls, tarfile):
1086 """Return the next TarInfo object from TarFile object
1087 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001088 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001089 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001090 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001091 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1092 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001093
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 #--------------------------------------------------------------------------
1095 # The following are methods that are called depending on the type of a
1096 # member. The entry point is _proc_member() which can be overridden in a
1097 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1098 # implement the following
1099 # operations:
1100 # 1. Set self.offset_data to the position where the data blocks begin,
1101 # if there is data that follows.
1102 # 2. Set tarfile.offset to the position where the next member's header will
1103 # begin.
1104 # 3. Return self or another valid TarInfo object.
1105 def _proc_member(self, tarfile):
1106 """Choose the right processing method depending on
1107 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001108 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001109 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1110 return self._proc_gnulong(tarfile)
1111 elif self.type == GNUTYPE_SPARSE:
1112 return self._proc_sparse(tarfile)
1113 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1114 return self._proc_pax(tarfile)
1115 else:
1116 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001117
Guido van Rossumd8faa362007-04-27 19:54:29 +00001118 def _proc_builtin(self, tarfile):
1119 """Process a builtin type or an unknown type which
1120 will be treated as a regular file.
1121 """
1122 self.offset_data = tarfile.fileobj.tell()
1123 offset = self.offset_data
1124 if self.isreg() or self.type not in SUPPORTED_TYPES:
1125 # Skip the following data blocks.
1126 offset += self._block(self.size)
1127 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001128
Guido van Rossume7ba4952007-06-06 23:52:48 +00001129 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001131 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001132
1133 return self
1134
1135 def _proc_gnulong(self, tarfile):
1136 """Process the blocks that hold a GNU longname
1137 or longlink member.
1138 """
1139 buf = tarfile.fileobj.read(self._block(self.size))
1140
1141 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001142 try:
1143 next = self.fromtarfile(tarfile)
1144 except HeaderError:
1145 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001146
1147 # Patch the TarInfo object from the next header with
1148 # the longname information.
1149 next.offset = self.offset
1150 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001151 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001152 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001153 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001154
1155 return next
1156
1157 def _proc_sparse(self, tarfile):
1158 """Process a GNU sparse header plus extra headers.
1159 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001160 # We already collected some sparse structures in frombuf().
1161 structs, isextended, origsize = self._sparse_structs
1162 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001163
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001164 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001165 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001166 buf = tarfile.fileobj.read(BLOCKSIZE)
1167 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001168 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001169 try:
1170 offset = nti(buf[pos:pos + 12])
1171 numbytes = nti(buf[pos + 12:pos + 24])
1172 except ValueError:
1173 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001174 if offset and numbytes:
1175 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001177 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001178 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179
1180 self.offset_data = tarfile.fileobj.tell()
1181 tarfile.offset = self.offset_data + self._block(self.size)
1182 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001183 return self
1184
1185 def _proc_pax(self, tarfile):
1186 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001187 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001188 """
1189 # Read the header information.
1190 buf = tarfile.fileobj.read(self._block(self.size))
1191
1192 # A pax header stores supplemental information for either
1193 # the following file (extended) or all following files
1194 # (global).
1195 if self.type == XGLTYPE:
1196 pax_headers = tarfile.pax_headers
1197 else:
1198 pax_headers = tarfile.pax_headers.copy()
1199
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001200 # Check if the pax header contains a hdrcharset field. This tells us
1201 # the encoding of the path, linkpath, uname and gname fields. Normally,
1202 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1203 # implementations are allowed to store them as raw binary strings if
1204 # the translation to UTF-8 fails.
1205 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1206 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001207 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001208
1209 # For the time being, we don't care about anything other than "BINARY".
1210 # The only other value that is currently allowed by the standard is
1211 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1212 hdrcharset = pax_headers.get("hdrcharset")
1213 if hdrcharset == "BINARY":
1214 encoding = tarfile.encoding
1215 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001216 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001217
Guido van Rossumd8faa362007-04-27 19:54:29 +00001218 # Parse pax header information. A record looks like that:
1219 # "%d %s=%s\n" % (length, keyword, value). length is the size
1220 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001221 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001222 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001223 pos = 0
1224 while True:
1225 match = regex.match(buf, pos)
1226 if not match:
1227 break
1228
1229 length, keyword = match.groups()
1230 length = int(length)
1231 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1232
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001233 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001234 # as the error handler, but we better not take the risk. For
1235 # example, GNU tar <= 1.23 is known to store filenames it cannot
1236 # translate to UTF-8 as raw strings (unfortunately without a
1237 # hdrcharset=BINARY header).
1238 # We first try the strict standard encoding, and if that fails we
1239 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001240 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001241 tarfile.errors)
1242 if keyword in PAX_NAME_FIELDS:
1243 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1244 tarfile.errors)
1245 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001246 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001247 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001248
1249 pax_headers[keyword] = value
1250 pos += length
1251
Guido van Rossume7ba4952007-06-06 23:52:48 +00001252 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001253 try:
1254 next = self.fromtarfile(tarfile)
1255 except HeaderError:
1256 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001257
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001258 # Process GNU sparse information.
1259 if "GNU.sparse.map" in pax_headers:
1260 # GNU extended sparse format version 0.1.
1261 self._proc_gnusparse_01(next, pax_headers)
1262
1263 elif "GNU.sparse.size" in pax_headers:
1264 # GNU extended sparse format version 0.0.
1265 self._proc_gnusparse_00(next, pax_headers, buf)
1266
1267 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1268 # GNU extended sparse format version 1.0.
1269 self._proc_gnusparse_10(next, pax_headers, tarfile)
1270
Guido van Rossume7ba4952007-06-06 23:52:48 +00001271 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001272 # Patch the TarInfo object with the extended header info.
1273 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1274 next.offset = self.offset
1275
1276 if "size" in pax_headers:
1277 # If the extended header replaces the size field,
1278 # we need to recalculate the offset where the next
1279 # header starts.
1280 offset = next.offset_data
1281 if next.isreg() or next.type not in SUPPORTED_TYPES:
1282 offset += next._block(next.size)
1283 tarfile.offset = offset
1284
1285 return next
1286
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001287 def _proc_gnusparse_00(self, next, pax_headers, buf):
1288 """Process a GNU tar extended sparse header, version 0.0.
1289 """
1290 offsets = []
1291 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1292 offsets.append(int(match.group(1)))
1293 numbytes = []
1294 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1295 numbytes.append(int(match.group(1)))
1296 next.sparse = list(zip(offsets, numbytes))
1297
1298 def _proc_gnusparse_01(self, next, pax_headers):
1299 """Process a GNU tar extended sparse header, version 0.1.
1300 """
1301 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1302 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1303
1304 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1305 """Process a GNU tar extended sparse header, version 1.0.
1306 """
1307 fields = None
1308 sparse = []
1309 buf = tarfile.fileobj.read(BLOCKSIZE)
1310 fields, buf = buf.split(b"\n", 1)
1311 fields = int(fields)
1312 while len(sparse) < fields * 2:
1313 if b"\n" not in buf:
1314 buf += tarfile.fileobj.read(BLOCKSIZE)
1315 number, buf = buf.split(b"\n", 1)
1316 sparse.append(int(number))
1317 next.offset_data = tarfile.fileobj.tell()
1318 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1319
Guido van Rossume7ba4952007-06-06 23:52:48 +00001320 def _apply_pax_info(self, pax_headers, encoding, errors):
1321 """Replace fields with supplemental information from a previous
1322 pax extended or global header.
1323 """
1324 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001325 if keyword == "GNU.sparse.name":
1326 setattr(self, "path", value)
1327 elif keyword == "GNU.sparse.size":
1328 setattr(self, "size", int(value))
1329 elif keyword == "GNU.sparse.realsize":
1330 setattr(self, "size", int(value))
1331 elif keyword in PAX_FIELDS:
1332 if keyword in PAX_NUMBER_FIELDS:
1333 try:
1334 value = PAX_NUMBER_FIELDS[keyword](value)
1335 except ValueError:
1336 value = 0
1337 if keyword == "path":
1338 value = value.rstrip("/")
1339 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001340
1341 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001342
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001343 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1344 """Decode a single field from a pax record.
1345 """
1346 try:
1347 return value.decode(encoding, "strict")
1348 except UnicodeDecodeError:
1349 return value.decode(fallback_encoding, fallback_errors)
1350
Guido van Rossumd8faa362007-04-27 19:54:29 +00001351 def _block(self, count):
1352 """Round up a byte count by BLOCKSIZE and return it,
1353 e.g. _block(834) => 1024.
1354 """
1355 blocks, remainder = divmod(count, BLOCKSIZE)
1356 if remainder:
1357 blocks += 1
1358 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001359
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001360 def isreg(self):
1361 return self.type in REGULAR_TYPES
1362 def isfile(self):
1363 return self.isreg()
1364 def isdir(self):
1365 return self.type == DIRTYPE
1366 def issym(self):
1367 return self.type == SYMTYPE
1368 def islnk(self):
1369 return self.type == LNKTYPE
1370 def ischr(self):
1371 return self.type == CHRTYPE
1372 def isblk(self):
1373 return self.type == BLKTYPE
1374 def isfifo(self):
1375 return self.type == FIFOTYPE
1376 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001377 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001378 def isdev(self):
1379 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1380# class TarInfo
1381
1382class TarFile(object):
1383 """The TarFile Class provides an interface to tar archives.
1384 """
1385
1386 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1387
1388 dereference = False # If true, add content of linked file to the
1389 # tar file, else the link.
1390
1391 ignore_zeros = False # If true, skips empty or invalid blocks and
1392 # continues processing.
1393
Lars Gustäbel365aff32009-12-13 11:42:29 +00001394 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395 # messages (if debug >= 0). If > 0, errors
1396 # are passed to the caller as exceptions.
1397
Guido van Rossumd8faa362007-04-27 19:54:29 +00001398 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399
Guido van Rossume7ba4952007-06-06 23:52:48 +00001400 encoding = ENCODING # Encoding for 8-bit character strings.
1401
1402 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001403
Guido van Rossumd8faa362007-04-27 19:54:29 +00001404 tarinfo = TarInfo # The default TarInfo class to use.
1405
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001406 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001407
1408 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1409 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001410 errors="surrogateescape", pax_headers=None, debug=None,
1411 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001412 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1413 read from an existing archive, 'a' to append data to an existing
1414 file or 'w' to create a new file overwriting an existing one. `mode'
1415 defaults to 'r'.
1416 If `fileobj' is given, it is used for reading or writing data. If it
1417 can be determined, `mode' is overridden by `fileobj's mode.
1418 `fileobj' is not closed, when TarFile is closed.
1419 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001420 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001421 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001422 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001423 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001424 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001425
1426 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001427 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001428 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001429 self.mode = "w"
1430 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001431 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001432 self._extfileobj = False
1433 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001434 if (name is None and hasattr(fileobj, "name") and
1435 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001436 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001437 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001439 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001440 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001441 self.fileobj = fileobj
1442
Guido van Rossumd8faa362007-04-27 19:54:29 +00001443 # Init attributes.
1444 if format is not None:
1445 self.format = format
1446 if tarinfo is not None:
1447 self.tarinfo = tarinfo
1448 if dereference is not None:
1449 self.dereference = dereference
1450 if ignore_zeros is not None:
1451 self.ignore_zeros = ignore_zeros
1452 if encoding is not None:
1453 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001454 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001455
1456 if pax_headers is not None and self.format == PAX_FORMAT:
1457 self.pax_headers = pax_headers
1458 else:
1459 self.pax_headers = {}
1460
Guido van Rossumd8faa362007-04-27 19:54:29 +00001461 if debug is not None:
1462 self.debug = debug
1463 if errorlevel is not None:
1464 self.errorlevel = errorlevel
1465
1466 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001467 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001468 self.closed = False
1469 self.members = [] # list of members as TarInfo objects
1470 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001471 self.offset = self.fileobj.tell()
1472 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001473 self.inodes = {} # dictionary caching the inodes of
1474 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001475
Lars Gustäbel7b465392009-11-18 20:29:25 +00001476 try:
1477 if self.mode == "r":
1478 self.firstmember = None
1479 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480
Lars Gustäbel7b465392009-11-18 20:29:25 +00001481 if self.mode == "a":
1482 # Move to the end of the archive,
1483 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001484 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001485 self.fileobj.seek(self.offset)
1486 try:
1487 tarinfo = self.tarinfo.fromtarfile(self)
1488 self.members.append(tarinfo)
1489 except EOFHeaderError:
1490 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001491 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001492 except HeaderError as e:
1493 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494
Lars Gustäbel20703c62015-05-27 12:53:44 +02001495 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001496 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
Lars Gustäbel7b465392009-11-18 20:29:25 +00001498 if self.pax_headers:
1499 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1500 self.fileobj.write(buf)
1501 self.offset += len(buf)
1502 except:
1503 if not self._extfileobj:
1504 self.fileobj.close()
1505 self.closed = True
1506 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001507
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001508 #--------------------------------------------------------------------------
1509 # Below are the classmethods which act as alternate constructors to the
1510 # TarFile class. The open() method is the only one that is needed for
1511 # public use; it is the "super"-constructor and is able to select an
1512 # adequate "sub"-constructor for a particular compression using the mapping
1513 # from OPEN_METH.
1514 #
1515 # This concept allows one to subclass TarFile without losing the comfort of
1516 # the super-constructor. A sub-constructor is registered and made available
1517 # by adding it to the mapping in OPEN_METH.
1518
Guido van Rossum75b64e62005-01-16 00:16:11 +00001519 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001520 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001521 """Open a tar archive for reading, writing or appending. Return
1522 an appropriate TarFile class.
1523
1524 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001525 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 'r:' open for reading exclusively uncompressed
1527 'r:gz' open for reading with gzip compression
1528 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001529 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001530 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 'w' or 'w:' open for writing without compression
1532 'w:gz' open for writing with gzip compression
1533 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001534 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001535
Berker Peksag0fe63252015-02-13 21:02:12 +02001536 'x' or 'x:' create a tarfile exclusively without compression, raise
1537 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001538 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001539 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001540 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001541 if the file is already created
1542 'x:xz' create an lzma compressed tarfile, raise an exception
1543 if the file is already created
1544
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001545 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 'r|' open an uncompressed stream of tar blocks for reading
1547 'r|gz' open a gzip compressed stream of tar blocks
1548 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001549 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550 'w|' open an uncompressed stream for writing
1551 'w|gz' open a gzip compressed stream for writing
1552 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001553 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554 """
1555
1556 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001557 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001558
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001559 if mode in ("r", "r:*"):
1560 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001561 def not_compressed(comptype):
1562 return cls.OPEN_METH[comptype] == 'taropen'
1563 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001564 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001565 if fileobj is not None:
1566 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001567 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001568 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001569 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001570 if fileobj is not None:
1571 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001572 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001573 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001574
1575 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576 filemode, comptype = mode.split(":", 1)
1577 filemode = filemode or "r"
1578 comptype = comptype or "tar"
1579
1580 # Select the *open() function according to
1581 # given compression.
1582 if comptype in cls.OPEN_METH:
1583 func = getattr(cls, cls.OPEN_METH[comptype])
1584 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001585 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587
1588 elif "|" in mode:
1589 filemode, comptype = mode.split("|", 1)
1590 filemode = filemode or "r"
1591 comptype = comptype or "tar"
1592
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001593 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001594 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001595
Antoine Pitrou605c2932010-09-23 20:15:14 +00001596 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1597 try:
1598 t = cls(name, filemode, stream, **kwargs)
1599 except:
1600 stream.close()
1601 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602 t._extfileobj = False
1603 return t
1604
Berker Peksag0fe63252015-02-13 21:02:12 +02001605 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001606 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607
Thomas Wouters477c8d52006-05-27 19:21:47 +00001608 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001609
Guido van Rossum75b64e62005-01-16 00:16:11 +00001610 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001611 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612 """Open uncompressed tar archive name for reading or writing.
1613 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001614 if mode not in ("r", "a", "w", "x"):
1615 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001616 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001617
Guido van Rossum75b64e62005-01-16 00:16:11 +00001618 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001619 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001620 """Open gzip compressed tar archive name for reading or writing.
1621 Appending is not allowed.
1622 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001623 if mode not in ("r", "w", "x"):
1624 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001625
1626 try:
1627 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001628 gzip.GzipFile
1629 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001630 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001632 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001633 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001634 except OSError:
1635 if fileobj is not None and mode == 'r':
1636 raise ReadError("not a gzip file")
1637 raise
1638
1639 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001640 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001641 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001642 fileobj.close()
1643 if mode == 'r':
1644 raise ReadError("not a gzip file")
1645 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001646 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001647 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001648 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001649 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650 return t
1651
Guido van Rossum75b64e62005-01-16 00:16:11 +00001652 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001653 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001654 """Open bzip2 compressed tar archive name for reading or writing.
1655 Appending is not allowed.
1656 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001657 if mode not in ("r", "w", "x"):
1658 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659
1660 try:
1661 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001662 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001663 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001665 fileobj = bz2.BZ2File(fileobj or name, mode,
1666 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001667
1668 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001669 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001670 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001671 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001672 if mode == 'r':
1673 raise ReadError("not a bzip2 file")
1674 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001675 except:
1676 fileobj.close()
1677 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001678 t._extfileobj = False
1679 return t
1680
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001681 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001682 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001683 """Open lzma compressed tar archive name for reading or writing.
1684 Appending is not allowed.
1685 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001686 if mode not in ("r", "w", "x"):
1687 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001688
1689 try:
1690 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001691 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001692 raise CompressionError("lzma module is not available")
1693
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001694 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001695
1696 try:
1697 t = cls.taropen(name, mode, fileobj, **kwargs)
1698 except (lzma.LZMAError, EOFError):
1699 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001700 if mode == 'r':
1701 raise ReadError("not an lzma file")
1702 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001703 except:
1704 fileobj.close()
1705 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001706 t._extfileobj = False
1707 return t
1708
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 # All *open() methods are registered here.
1710 OPEN_METH = {
1711 "tar": "taropen", # uncompressed tar
1712 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001713 "bz2": "bz2open", # bzip2 compressed tar
1714 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001715 }
1716
1717 #--------------------------------------------------------------------------
1718 # The public methods which TarFile provides:
1719
1720 def close(self):
1721 """Close the TarFile. In write-mode, two finishing zero blocks are
1722 appended to the archive.
1723 """
1724 if self.closed:
1725 return
1726
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001728 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001729 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001730 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1731 self.offset += (BLOCKSIZE * 2)
1732 # fill up the end with zero-blocks
1733 # (like option -b20 for tar does)
1734 blocks, remainder = divmod(self.offset, RECORDSIZE)
1735 if remainder > 0:
1736 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1737 finally:
1738 if not self._extfileobj:
1739 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740
1741 def getmember(self, name):
1742 """Return a TarInfo object for member `name'. If `name' can not be
1743 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001744 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745 most up-to-date version.
1746 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001747 tarinfo = self._getmember(name)
1748 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001749 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001750 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001751
1752 def getmembers(self):
1753 """Return the members of the archive as a list of TarInfo objects. The
1754 list has the same order as the members in the archive.
1755 """
1756 self._check()
1757 if not self._loaded: # if we want to obtain a list of
1758 self._load() # all members, we first have to
1759 # scan the whole archive.
1760 return self.members
1761
1762 def getnames(self):
1763 """Return the members of the archive as a list of their names. It has
1764 the same order as the list returned by getmembers().
1765 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001766 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767
1768 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001769 """Create a TarInfo object from the result of os.stat or equivalent
1770 on an existing file. The file is either named by `name', or
1771 specified as a file object `fileobj' with a file descriptor. If
1772 given, `arcname' specifies an alternative name for the file in the
1773 archive, otherwise, the name is taken from the 'name' attribute of
1774 'fileobj', or the 'name' argument. The name should be a text
1775 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001777 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001778
1779 # When fileobj is given, replace name by
1780 # fileobj's real name.
1781 if fileobj is not None:
1782 name = fileobj.name
1783
1784 # Building the name of the member in the archive.
1785 # Backward slashes are converted to forward slashes,
1786 # Absolute paths are turned to relative paths.
1787 if arcname is None:
1788 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001790 arcname = arcname.replace(os.sep, "/")
1791 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792
1793 # Now, fill the TarInfo object with
1794 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001795 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001796 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797
1798 # Use os.stat or os.lstat, depending on platform
1799 # and if symlinks shall be resolved.
1800 if fileobj is None:
1801 if hasattr(os, "lstat") and not self.dereference:
1802 statres = os.lstat(name)
1803 else:
1804 statres = os.stat(name)
1805 else:
1806 statres = os.fstat(fileobj.fileno())
1807 linkname = ""
1808
1809 stmd = statres.st_mode
1810 if stat.S_ISREG(stmd):
1811 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001812 if not self.dereference and statres.st_nlink > 1 and \
1813 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001814 # Is it a hardlink to an already
1815 # archived file?
1816 type = LNKTYPE
1817 linkname = self.inodes[inode]
1818 else:
1819 # The inode is added only if its valid.
1820 # For win32 it is always 0.
1821 type = REGTYPE
1822 if inode[0]:
1823 self.inodes[inode] = arcname
1824 elif stat.S_ISDIR(stmd):
1825 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001826 elif stat.S_ISFIFO(stmd):
1827 type = FIFOTYPE
1828 elif stat.S_ISLNK(stmd):
1829 type = SYMTYPE
1830 linkname = os.readlink(name)
1831 elif stat.S_ISCHR(stmd):
1832 type = CHRTYPE
1833 elif stat.S_ISBLK(stmd):
1834 type = BLKTYPE
1835 else:
1836 return None
1837
1838 # Fill the TarInfo object with all
1839 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001840 tarinfo.name = arcname
1841 tarinfo.mode = stmd
1842 tarinfo.uid = statres.st_uid
1843 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001844 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001845 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001846 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001847 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001848 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001849 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850 tarinfo.linkname = linkname
1851 if pwd:
1852 try:
1853 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1854 except KeyError:
1855 pass
1856 if grp:
1857 try:
1858 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1859 except KeyError:
1860 pass
1861
1862 if type in (CHRTYPE, BLKTYPE):
1863 if hasattr(os, "major") and hasattr(os, "minor"):
1864 tarinfo.devmajor = os.major(statres.st_rdev)
1865 tarinfo.devminor = os.minor(statres.st_rdev)
1866 return tarinfo
1867
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001868 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001869 """Print a table of contents to sys.stdout. If `verbose' is False, only
1870 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001871 output is produced. `members' is optional and must be a subset of the
1872 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001873 """
1874 self._check()
1875
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001876 if members is None:
1877 members = self
1878 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001880 _safe_print(stat.filemode(tarinfo.mode))
1881 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1882 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001884 _safe_print("%10s" %
1885 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001887 _safe_print("%10d" % tarinfo.size)
1888 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1889 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001890
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001891 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001892
1893 if verbose:
1894 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001895 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001897 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001898 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899
Raymond Hettingera63a3122011-01-26 20:34:14 +00001900 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901 """Add the file `name' to the archive. `name' may be any type of file
1902 (directory, fifo, symbolic link, etc.). If given, `arcname'
1903 specifies an alternative name for the file in the archive.
1904 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001905 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001906 return True for each filename to be excluded. `filter' is a function
1907 that expects a TarInfo object argument and returns the changed
1908 TarInfo object, if it returns None the TarInfo object will be
1909 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001910 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001911 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912
1913 if arcname is None:
1914 arcname = name
1915
Guido van Rossum486364b2007-06-30 05:01:58 +00001916 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001917 if exclude is not None:
1918 import warnings
1919 warnings.warn("use the filter argument instead",
1920 DeprecationWarning, 2)
1921 if exclude(name):
1922 self._dbg(2, "tarfile: Excluded %r" % name)
1923 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001924
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001925 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001926 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927 self._dbg(2, "tarfile: Skipped %r" % name)
1928 return
1929
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930 self._dbg(1, name)
1931
1932 # Create a TarInfo object from the file.
1933 tarinfo = self.gettarinfo(name, arcname)
1934
1935 if tarinfo is None:
1936 self._dbg(1, "tarfile: Unsupported type %r" % name)
1937 return
1938
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001939 # Change or exclude the TarInfo object.
1940 if filter is not None:
1941 tarinfo = filter(tarinfo)
1942 if tarinfo is None:
1943 self._dbg(2, "tarfile: Excluded %r" % name)
1944 return
1945
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 # Append the tar header and data to the archive.
1947 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001948 with bltn_open(name, "rb") as f:
1949 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001951 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 self.addfile(tarinfo)
1953 if recursive:
1954 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001955 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001956 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001958 else:
1959 self.addfile(tarinfo)
1960
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961 def addfile(self, tarinfo, fileobj=None):
1962 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001963 given, it should be a binary file, and tarinfo.size bytes are read
1964 from it and added to the archive. You can create TarInfo objects
1965 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001967 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
Thomas Wouters89f507f2006-12-13 04:49:30 +00001969 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970
Guido van Rossume7ba4952007-06-06 23:52:48 +00001971 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001972 self.fileobj.write(buf)
1973 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001974 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001975 # If there's data to follow, append it.
1976 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001977 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1979 if remainder > 0:
1980 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1981 blocks += 1
1982 self.offset += blocks * BLOCKSIZE
1983
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001984 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985
Eric V. Smith7a803892015-04-15 10:27:58 -04001986 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001987 """Extract all members from the archive to the current working
1988 directory and set owner, modification time and permissions on
1989 directories afterwards. `path' specifies a different directory
1990 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001991 list returned by getmembers(). If `numeric_owner` is True, only
1992 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001993 """
1994 directories = []
1995
1996 if members is None:
1997 members = self
1998
1999 for tarinfo in members:
2000 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002001 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002002 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002003 tarinfo = copy.copy(tarinfo)
2004 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002005 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002006 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2007 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002008
2009 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002010 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002011 directories.reverse()
2012
2013 # Set correct owner, mtime and filemode on directories.
2014 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002015 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002016 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002017 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002018 self.utime(tarinfo, dirpath)
2019 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002020 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002021 if self.errorlevel > 1:
2022 raise
2023 else:
2024 self._dbg(1, "tarfile: %s" % e)
2025
Eric V. Smith7a803892015-04-15 10:27:58 -04002026 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002027 """Extract a member from the archive to the current working directory,
2028 using its full name. Its file information is extracted as accurately
2029 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002030 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002031 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2032 is True, only the numbers for user/group names are used and not
2033 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034 """
2035 self._check("r")
2036
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002037 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002038 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002039 else:
2040 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041
Neal Norwitza4f651a2004-07-20 22:07:44 +00002042 # Prepare the link target for makelink().
2043 if tarinfo.islnk():
2044 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2045
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002047 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002048 set_attrs=set_attrs,
2049 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002050 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051 if self.errorlevel > 0:
2052 raise
2053 else:
2054 if e.filename is None:
2055 self._dbg(1, "tarfile: %s" % e.strerror)
2056 else:
2057 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002058 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002059 if self.errorlevel > 1:
2060 raise
2061 else:
2062 self._dbg(1, "tarfile: %s" % e)
2063
2064 def extractfile(self, member):
2065 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002066 a filename or a TarInfo object. If `member' is a regular file or a
2067 link, an io.BufferedReader object is returned. Otherwise, None is
2068 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 """
2070 self._check("r")
2071
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002072 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002073 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002074 else:
2075 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002077 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2078 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002079 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080
2081 elif tarinfo.islnk() or tarinfo.issym():
2082 if isinstance(self.fileobj, _Stream):
2083 # A small but ugly workaround for the case that someone tries
2084 # to extract a (sym)link as a file-object from a non-seekable
2085 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002086 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002088 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002089 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002090 else:
2091 # If there's no data associated with the member (directory, chrdev,
2092 # blkdev, etc.), return None instead of a file object.
2093 return None
2094
Eric V. Smith7a803892015-04-15 10:27:58 -04002095 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2096 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097 """Extract the TarInfo object tarinfo to a physical
2098 file called targetpath.
2099 """
2100 # Fetch the TarInfo object for the given name
2101 # and build the destination pathname, replacing
2102 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002103 targetpath = targetpath.rstrip("/")
2104 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002105
2106 # Create all upper directories.
2107 upperdirs = os.path.dirname(targetpath)
2108 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002109 # Create directories that are not part of the archive with
2110 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002111 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002112
2113 if tarinfo.islnk() or tarinfo.issym():
2114 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2115 else:
2116 self._dbg(1, tarinfo.name)
2117
2118 if tarinfo.isreg():
2119 self.makefile(tarinfo, targetpath)
2120 elif tarinfo.isdir():
2121 self.makedir(tarinfo, targetpath)
2122 elif tarinfo.isfifo():
2123 self.makefifo(tarinfo, targetpath)
2124 elif tarinfo.ischr() or tarinfo.isblk():
2125 self.makedev(tarinfo, targetpath)
2126 elif tarinfo.islnk() or tarinfo.issym():
2127 self.makelink(tarinfo, targetpath)
2128 elif tarinfo.type not in SUPPORTED_TYPES:
2129 self.makeunknown(tarinfo, targetpath)
2130 else:
2131 self.makefile(tarinfo, targetpath)
2132
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002133 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002134 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002135 if not tarinfo.issym():
2136 self.chmod(tarinfo, targetpath)
2137 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002138
2139 #--------------------------------------------------------------------------
2140 # Below are the different file methods. They are called via
2141 # _extract_member() when extract() is called. They can be replaced in a
2142 # subclass to implement other functionality.
2143
2144 def makedir(self, tarinfo, targetpath):
2145 """Make a directory called targetpath.
2146 """
2147 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002148 # Use a safe mode for the directory, the real mode is set
2149 # later in _extract_member().
2150 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002151 except FileExistsError:
2152 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002153
2154 def makefile(self, tarinfo, targetpath):
2155 """Make a file called targetpath.
2156 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002157 source = self.fileobj
2158 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002159 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002160 with bltn_open(targetpath, "wb") as target:
2161 if tarinfo.sparse is not None:
2162 for offset, size in tarinfo.sparse:
2163 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002164 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002165 target.seek(tarinfo.size)
2166 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002167 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002168 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002169
2170 def makeunknown(self, tarinfo, targetpath):
2171 """Make a file from a TarInfo object with an unknown type
2172 at targetpath.
2173 """
2174 self.makefile(tarinfo, targetpath)
2175 self._dbg(1, "tarfile: Unknown file type %r, " \
2176 "extracted as regular file." % tarinfo.type)
2177
2178 def makefifo(self, tarinfo, targetpath):
2179 """Make a fifo called targetpath.
2180 """
2181 if hasattr(os, "mkfifo"):
2182 os.mkfifo(targetpath)
2183 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002184 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002185
2186 def makedev(self, tarinfo, targetpath):
2187 """Make a character or block device called targetpath.
2188 """
2189 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002190 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002191
2192 mode = tarinfo.mode
2193 if tarinfo.isblk():
2194 mode |= stat.S_IFBLK
2195 else:
2196 mode |= stat.S_IFCHR
2197
2198 os.mknod(targetpath, mode,
2199 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2200
2201 def makelink(self, tarinfo, targetpath):
2202 """Make a (symbolic) link called targetpath. If it cannot be created
2203 (platform limitation), we try to make a copy of the referenced file
2204 instead of a link.
2205 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002206 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002207 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002208 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002209 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002210 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002211 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002212 if os.path.exists(tarinfo._link_target):
2213 os.link(tarinfo._link_target, targetpath)
2214 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002215 self._extract_member(self._find_link_target(tarinfo),
2216 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002217 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002218 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002219 self._extract_member(self._find_link_target(tarinfo),
2220 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002221 except KeyError:
2222 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002223
Eric V. Smith7a803892015-04-15 10:27:58 -04002224 def chown(self, tarinfo, targetpath, numeric_owner):
2225 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002226 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2227 is False, fall back to .gid/.uid when the search based on name
2228 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002229 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002230 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002231 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002232 g = tarinfo.gid
2233 u = tarinfo.uid
2234 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002235 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002236 if grp:
2237 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002238 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002239 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002240 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002241 if pwd:
2242 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002243 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002244 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002245 try:
2246 if tarinfo.issym() and hasattr(os, "lchown"):
2247 os.lchown(targetpath, u, g)
2248 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002249 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002250 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002251 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002252
2253 def chmod(self, tarinfo, targetpath):
2254 """Set file permissions of targetpath according to tarinfo.
2255 """
Jack Jansen834eff62003-03-07 12:47:06 +00002256 if hasattr(os, 'chmod'):
2257 try:
2258 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002259 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002260 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002261
2262 def utime(self, tarinfo, targetpath):
2263 """Set modification time of targetpath according to tarinfo.
2264 """
Jack Jansen834eff62003-03-07 12:47:06 +00002265 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002266 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002267 try:
2268 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002269 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002270 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002271
2272 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 def next(self):
2274 """Return the next member of the archive as a TarInfo object, when
2275 TarFile is opened for reading. Return None if there is no more
2276 available.
2277 """
2278 self._check("ra")
2279 if self.firstmember is not None:
2280 m = self.firstmember
2281 self.firstmember = None
2282 return m
2283
Lars Gustäbel03572682015-07-06 09:27:24 +02002284 # Advance the file pointer.
2285 if self.offset != self.fileobj.tell():
2286 self.fileobj.seek(self.offset - 1)
2287 if not self.fileobj.read(1):
2288 raise ReadError("unexpected end of data")
2289
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002291 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002292 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002293 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002294 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002295 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002296 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002297 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298 self.offset += BLOCKSIZE
2299 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002300 except InvalidHeaderError as e:
2301 if self.ignore_zeros:
2302 self._dbg(2, "0x%X: %s" % (self.offset, e))
2303 self.offset += BLOCKSIZE
2304 continue
2305 elif self.offset == 0:
2306 raise ReadError(str(e))
2307 except EmptyHeaderError:
2308 if self.offset == 0:
2309 raise ReadError("empty file")
2310 except TruncatedHeaderError as e:
2311 if self.offset == 0:
2312 raise ReadError(str(e))
2313 except SubsequentHeaderError as e:
2314 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315 break
2316
Lars Gustäbel9520a432009-11-22 18:48:49 +00002317 if tarinfo is not None:
2318 self.members.append(tarinfo)
2319 else:
2320 self._loaded = True
2321
Thomas Wouters477c8d52006-05-27 19:21:47 +00002322 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323
2324 #--------------------------------------------------------------------------
2325 # Little helper methods:
2326
Lars Gustäbel1b512722010-06-03 12:45:16 +00002327 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002328 """Find an archive member by name from bottom to top.
2329 If tarinfo is given, it is used as the starting point.
2330 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002331 # Ensure that all members have been loaded.
2332 members = self.getmembers()
2333
Lars Gustäbel1b512722010-06-03 12:45:16 +00002334 # Limit the member search list up to tarinfo.
2335 if tarinfo is not None:
2336 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002337
Lars Gustäbel1b512722010-06-03 12:45:16 +00002338 if normalize:
2339 name = os.path.normpath(name)
2340
2341 for member in reversed(members):
2342 if normalize:
2343 member_name = os.path.normpath(member.name)
2344 else:
2345 member_name = member.name
2346
2347 if name == member_name:
2348 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002349
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002350 def _load(self):
2351 """Read through the entire archive file and look for readable
2352 members.
2353 """
2354 while True:
2355 tarinfo = self.next()
2356 if tarinfo is None:
2357 break
2358 self._loaded = True
2359
2360 def _check(self, mode=None):
2361 """Check if TarFile is still open, and if the operation's mode
2362 corresponds to TarFile's mode.
2363 """
2364 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002365 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002366 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002367 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002368
Lars Gustäbel1b512722010-06-03 12:45:16 +00002369 def _find_link_target(self, tarinfo):
2370 """Find the target member of a symlink or hardlink member in the
2371 archive.
2372 """
2373 if tarinfo.issym():
2374 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002375 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002376 limit = None
2377 else:
2378 # Search the archive before the link, because a hard link is
2379 # just a reference to an already archived file.
2380 linkname = tarinfo.linkname
2381 limit = tarinfo
2382
2383 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2384 if member is None:
2385 raise KeyError("linkname %r not found" % linkname)
2386 return member
2387
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002388 def __iter__(self):
2389 """Provide an iterator object.
2390 """
2391 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002392 yield from self.members
2393 return
2394
2395 # Yield items using TarFile's next() method.
2396 # When all members have been read, set TarFile as _loaded.
2397 index = 0
2398 # Fix for SF #1100429: Under rare circumstances it can
2399 # happen that getmembers() is called during iteration,
2400 # which will have already exhausted the next() method.
2401 if self.firstmember is not None:
2402 tarinfo = self.next()
2403 index += 1
2404 yield tarinfo
2405
2406 while True:
2407 if index < len(self.members):
2408 tarinfo = self.members[index]
2409 elif not self._loaded:
2410 tarinfo = self.next()
2411 if not tarinfo:
2412 self._loaded = True
2413 return
2414 else:
2415 return
2416 index += 1
2417 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002418
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002419 def _dbg(self, level, msg):
2420 """Write debugging output to sys.stderr.
2421 """
2422 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002423 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002424
2425 def __enter__(self):
2426 self._check()
2427 return self
2428
2429 def __exit__(self, type, value, traceback):
2430 if type is None:
2431 self.close()
2432 else:
2433 # An exception occurred. We must not call close() because
2434 # it would try to write end-of-archive blocks and padding.
2435 if not self._extfileobj:
2436 self.fileobj.close()
2437 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002438
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002439#--------------------
2440# exported functions
2441#--------------------
2442def is_tarfile(name):
2443 """Return True if name points to a tar archive that we
2444 are able to handle, else return False.
2445 """
2446 try:
2447 t = open(name)
2448 t.close()
2449 return True
2450 except TarError:
2451 return False
2452
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002453open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002454
2455
2456def main():
2457 import argparse
2458
2459 description = 'A simple command line interface for tarfile module.'
2460 parser = argparse.ArgumentParser(description=description)
2461 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2462 help='Verbose output')
2463 group = parser.add_mutually_exclusive_group()
2464 group.add_argument('-l', '--list', metavar='<tarfile>',
2465 help='Show listing of a tarfile')
2466 group.add_argument('-e', '--extract', nargs='+',
2467 metavar=('<tarfile>', '<output_dir>'),
2468 help='Extract tarfile into target dir')
2469 group.add_argument('-c', '--create', nargs='+',
2470 metavar=('<name>', '<file>'),
2471 help='Create tarfile from sources')
2472 group.add_argument('-t', '--test', metavar='<tarfile>',
2473 help='Test if a tarfile is valid')
2474 args = parser.parse_args()
2475
2476 if args.test:
2477 src = args.test
2478 if is_tarfile(src):
2479 with open(src, 'r') as tar:
2480 tar.getmembers()
2481 print(tar.getmembers(), file=sys.stderr)
2482 if args.verbose:
2483 print('{!r} is a tar archive.'.format(src))
2484 else:
2485 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2486
2487 elif args.list:
2488 src = args.list
2489 if is_tarfile(src):
2490 with TarFile.open(src, 'r:*') as tf:
2491 tf.list(verbose=args.verbose)
2492 else:
2493 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2494
2495 elif args.extract:
2496 if len(args.extract) == 1:
2497 src = args.extract[0]
2498 curdir = os.curdir
2499 elif len(args.extract) == 2:
2500 src, curdir = args.extract
2501 else:
2502 parser.exit(1, parser.format_help())
2503
2504 if is_tarfile(src):
2505 with TarFile.open(src, 'r:*') as tf:
2506 tf.extractall(path=curdir)
2507 if args.verbose:
2508 if curdir == '.':
2509 msg = '{!r} file is extracted.'.format(src)
2510 else:
2511 msg = ('{!r} file is extracted '
2512 'into {!r} directory.').format(src, curdir)
2513 print(msg)
2514 else:
2515 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2516
2517 elif args.create:
2518 tar_name = args.create.pop(0)
2519 _, ext = os.path.splitext(tar_name)
2520 compressions = {
2521 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002522 '.gz': 'gz',
2523 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002524 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002525 '.xz': 'xz',
2526 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002527 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002528 '.bz2': 'bz2',
2529 '.tbz': 'bz2',
2530 '.tbz2': 'bz2',
2531 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002532 }
2533 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2534 tar_files = args.create
2535
2536 with TarFile.open(tar_name, tar_mode) as tf:
2537 for file_name in tar_files:
2538 tf.add(file_name)
2539
2540 if args.verbose:
2541 print('{!r} file created.'.format(tar_name))
2542
2543 else:
2544 parser.exit(1, parser.format_help())
2545
2546if __name__ == '__main__':
2547 main()