blob: 2d702dd2ec6af9feab97908788ee90e724133246 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020041from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000042import sys
43import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020044import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import shutil
46import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000047import time
48import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000049import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000050import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000051
52try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040054except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010055 pwd = None
56try:
57 import grp
58except ImportError:
59 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000060
Brian Curtin16633fa2010-07-09 13:54:27 +000061# os.symlink on Windows prior to 6.0 raises NotImplementedError
62symlink_exception = (AttributeError, NotImplementedError)
63try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000065 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020066 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000067except NameError:
68 pass
69
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000071__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
72 "CompressionError", "StreamError", "ExtractError", "HeaderError",
73 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
74 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075
76#---------------------------------------------------------
77# tar constants
78#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000079NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000080BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000081RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000082GNU_MAGIC = b"ustar \0" # magic gnu tar string
83POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000084
Guido van Rossumd8faa362007-04-27 19:54:29 +000085LENGTH_NAME = 100 # maximum length of a filename
86LENGTH_LINK = 100 # maximum length of a linkname
87LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000088
Lars Gustäbelb506dc32007-08-07 18:36:16 +000089REGTYPE = b"0" # regular file
90AREGTYPE = b"\0" # regular file
91LNKTYPE = b"1" # link (inside tarfile)
92SYMTYPE = b"2" # symbolic link
93CHRTYPE = b"3" # character special device
94BLKTYPE = b"4" # block special device
95DIRTYPE = b"5" # directory
96FIFOTYPE = b"6" # fifo special device
97CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000098
Lars Gustäbelb506dc32007-08-07 18:36:16 +000099GNUTYPE_LONGNAME = b"L" # GNU tar longname
100GNUTYPE_LONGLINK = b"K" # GNU tar longlink
101GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000102
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000103XHDTYPE = b"x" # POSIX.1-2001 extended header
104XGLTYPE = b"g" # POSIX.1-2001 global header
105SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000106
107USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
108GNU_FORMAT = 1 # GNU tar format
109PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
110DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000111
112#---------------------------------------------------------
113# tarfile constants
114#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000115# File types that tarfile supports:
116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
117 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000118 CONTTYPE, CHRTYPE, BLKTYPE,
119 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120 GNUTYPE_SPARSE)
121
Guido van Rossumd8faa362007-04-27 19:54:29 +0000122# File types that will be treated as a regular file.
123REGULAR_TYPES = (REGTYPE, AREGTYPE,
124 CONTTYPE, GNUTYPE_SPARSE)
125
126# File types that are part of the GNU tar format.
127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
128 GNUTYPE_SPARSE)
129
130# Fields from a pax header that override a TarInfo attribute.
131PAX_FIELDS = ("path", "linkpath", "size", "mtime",
132 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000133
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000134# Fields from a pax header that are affected by hdrcharset.
135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
136
Guido van Rossume7ba4952007-06-06 23:52:48 +0000137# Fields in a pax header that are numbers, all other fields
138# are treated as strings.
139PAX_NUMBER_FIELDS = {
140 "atime": float,
141 "ctime": float,
142 "mtime": float,
143 "uid": int,
144 "gid": int,
145 "size": int
146}
147
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000148#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000149# initialization
150#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700151if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000152 ENCODING = "utf-8"
153else:
154 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000155
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157# Some useful functions
158#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000159
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000160def stn(s, length, encoding, errors):
161 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000162 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000163 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000164 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000165
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000166def nts(s, encoding, errors):
167 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000168 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000169 p = s.find(b"\0")
170 if p != -1:
171 s = s[:p]
172 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000173
Thomas Wouters477c8d52006-05-27 19:21:47 +0000174def nti(s):
175 """Convert a number field to a python number.
176 """
177 # There are two possible encodings for a number field, see
178 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200179 if s[0] in (0o200, 0o377):
180 n = 0
181 for i in range(len(s) - 1):
182 n <<= 8
183 n += s[i + 1]
184 if s[0] == 0o377:
185 n = -(256 ** (len(s) - 1) - n)
186 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000187 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200188 s = nts(s, "ascii", "strict")
189 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000190 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000191 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000192 return n
193
Guido van Rossumd8faa362007-04-27 19:54:29 +0000194def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000195 """Convert a python number to a number field.
196 """
197 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
198 # octal digits followed by a null-byte, this allows values up to
199 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200200 # that if necessary. A leading 0o200 or 0o377 byte indicate this
201 # particular encoding, the following digits-1 bytes are a big-endian
202 # base-256 representation. This allows values up to (256**(digits-1))-1.
203 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
204 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000205 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800206 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200207 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
208 if n >= 0:
209 s = bytearray([0o200])
210 else:
211 s = bytearray([0o377])
212 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213
Guido van Rossum805365e2007-05-07 22:24:25 +0000214 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200215 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000216 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200217 else:
218 raise ValueError("overflow in number field")
219
Thomas Wouters477c8d52006-05-27 19:21:47 +0000220 return s
221
222def calc_chksums(buf):
223 """Calculate the checksum for a member's header by summing up all
224 characters except for the chksum field which is treated as if
225 it was filled with spaces. According to the GNU tar sources,
226 some tars (Sun and NeXT) calculate chksum with signed char,
227 which will be different if there are chars in the buffer with
228 the high bit set. So we calculate two checksums, unsigned and
229 signed.
230 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200231 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
232 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000233 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000234
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700235def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000236 """Copy length bytes from fileobj src to fileobj dst.
237 If length is None, copy the entire content.
238 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700239 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000240 if length == 0:
241 return
242 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700243 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244 return
245
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700246 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000247 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700248 buf = src.read(bufsize)
249 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200250 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251 dst.write(buf)
252
253 if remainder != 0:
254 buf = src.read(remainder)
255 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200256 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000257 dst.write(buf)
258 return
259
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200261 """Deprecated in this location; use stat.filemode."""
262 import warnings
263 warnings.warn("deprecated in favor of stat.filemode",
264 DeprecationWarning, 2)
265 return stat.filemode(mode)
266
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200267def _safe_print(s):
268 encoding = getattr(sys.stdout, 'encoding', None)
269 if encoding is not None:
270 s = s.encode(encoding, 'backslashreplace').decode(encoding)
271 print(s, end=' ')
272
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000273
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274class TarError(Exception):
275 """Base exception."""
276 pass
277class ExtractError(TarError):
278 """General exception for extract errors."""
279 pass
280class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300281 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000282 pass
283class CompressionError(TarError):
284 """Exception for unavailable compression methods."""
285 pass
286class StreamError(TarError):
287 """Exception for unsupported operations on stream-like TarFiles."""
288 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000289class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000290 """Base exception for header errors."""
291 pass
292class EmptyHeaderError(HeaderError):
293 """Exception for empty headers."""
294 pass
295class TruncatedHeaderError(HeaderError):
296 """Exception for truncated headers."""
297 pass
298class EOFHeaderError(HeaderError):
299 """Exception for end of file headers."""
300 pass
301class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000302 """Exception for invalid headers."""
303 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000304class SubsequentHeaderError(HeaderError):
305 """Exception for missing and invalid extended headers."""
306 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000307
308#---------------------------
309# internal stream interface
310#---------------------------
311class _LowLevelFile:
312 """Low-level file object. Supports reading and writing.
313 It is used instead of a regular file object for streaming
314 access.
315 """
316
317 def __init__(self, name, mode):
318 mode = {
319 "r": os.O_RDONLY,
320 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
321 }[mode]
322 if hasattr(os, "O_BINARY"):
323 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000324 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000325
326 def close(self):
327 os.close(self.fd)
328
329 def read(self, size):
330 return os.read(self.fd, size)
331
332 def write(self, s):
333 os.write(self.fd, s)
334
335class _Stream:
336 """Class that serves as an adapter between TarFile and
337 a stream-like object. The stream-like object only
338 needs to have a read() or write() method and is accessed
339 blockwise. Use of gzip or bzip2 compression is possible.
340 A stream-like object could be for example: sys.stdin,
341 sys.stdout, a socket, a tape device etc.
342
343 _Stream is intended to be used only internally.
344 """
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000347 """Construct a _Stream object.
348 """
349 self._extfileobj = True
350 if fileobj is None:
351 fileobj = _LowLevelFile(name, mode)
352 self._extfileobj = False
353
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000354 if comptype == '*':
355 # Enable transparent compression detection for the
356 # stream interface
357 fileobj = _StreamProxy(fileobj)
358 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000359
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 self.name = name or ""
361 self.mode = mode
362 self.comptype = comptype
363 self.fileobj = fileobj
364 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000365 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000366 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000367 self.closed = False
368
Antoine Pitrou605c2932010-09-23 20:15:14 +0000369 try:
370 if comptype == "gz":
371 try:
372 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400373 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000374 raise CompressionError("zlib module is not available")
375 self.zlib = zlib
376 self.crc = zlib.crc32(b"")
377 if mode == "r":
378 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100379 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 else:
381 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000382
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100383 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 try:
385 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400386 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000387 raise CompressionError("bz2 module is not available")
388 if mode == "r":
389 self.dbuf = b""
390 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200391 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000392 else:
393 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100394
395 elif comptype == "xz":
396 try:
397 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400398 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100399 raise CompressionError("lzma module is not available")
400 if mode == "r":
401 self.dbuf = b""
402 self.cmp = lzma.LZMADecompressor()
403 self.exception = lzma.LZMAError
404 else:
405 self.cmp = lzma.LZMACompressor()
406
407 elif comptype != "tar":
408 raise CompressionError("unknown compression type %r" % comptype)
409
Antoine Pitrou605c2932010-09-23 20:15:14 +0000410 except:
411 if not self._extfileobj:
412 self.fileobj.close()
413 self.closed = True
414 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000415
416 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000417 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000418 self.close()
419
420 def _init_write_gz(self):
421 """Initialize for writing with gzip compression.
422 """
423 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
424 -self.zlib.MAX_WBITS,
425 self.zlib.DEF_MEM_LEVEL,
426 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000427 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000428 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 if self.name.endswith(".gz"):
430 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000431 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
432 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433
434 def write(self, s):
435 """Write string s to the stream.
436 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000437 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000438 self.crc = self.zlib.crc32(s, self.crc)
439 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000440 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000441 s = self.cmp.compress(s)
442 self.__write(s)
443
444 def __write(self, s):
445 """Write string s to the stream if a whole new block
446 is ready to be written.
447 """
448 self.buf += s
449 while len(self.buf) > self.bufsize:
450 self.fileobj.write(self.buf[:self.bufsize])
451 self.buf = self.buf[self.bufsize:]
452
453 def close(self):
454 """Close the _Stream object. No operation should be
455 done on it afterwards.
456 """
457 if self.closed:
458 return
459
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000460 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300461 try:
462 if self.mode == "w" and self.comptype != "tar":
463 self.buf += self.cmp.flush()
464
465 if self.mode == "w" and self.buf:
466 self.fileobj.write(self.buf)
467 self.buf = b""
468 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000469 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300470 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
471 finally:
472 if not self._extfileobj:
473 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474
475 def _init_read_gz(self):
476 """Initialize for reading a gzip compressed fileobj.
477 """
478 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000479 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000480
481 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000482 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000483 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000484 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000485 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000486
487 flag = ord(self.__read(1))
488 self.__read(6)
489
490 if flag & 4:
491 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
492 self.read(xlen)
493 if flag & 8:
494 while True:
495 s = self.__read(1)
496 if not s or s == NUL:
497 break
498 if flag & 16:
499 while True:
500 s = self.__read(1)
501 if not s or s == NUL:
502 break
503 if flag & 2:
504 self.__read(2)
505
506 def tell(self):
507 """Return the stream's file pointer position.
508 """
509 return self.pos
510
511 def seek(self, pos=0):
512 """Set the stream's file pointer to pos. Negative seeking
513 is forbidden.
514 """
515 if pos - self.pos >= 0:
516 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000517 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000518 self.read(self.bufsize)
519 self.read(remainder)
520 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000521 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000522 return self.pos
523
524 def read(self, size=None):
525 """Return the next size number of bytes from the stream.
526 If size is not defined, return all bytes of the stream
527 up to EOF.
528 """
529 if size is None:
530 t = []
531 while True:
532 buf = self._read(self.bufsize)
533 if not buf:
534 break
535 t.append(buf)
536 buf = "".join(t)
537 else:
538 buf = self._read(size)
539 self.pos += len(buf)
540 return buf
541
542 def _read(self, size):
543 """Return size bytes from the stream.
544 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000545 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000546 return self.__read(size)
547
548 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549 while c < size:
550 buf = self.__read(self.bufsize)
551 if not buf:
552 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 try:
554 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100555 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000557 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000559 buf = self.dbuf[:size]
560 self.dbuf = self.dbuf[size:]
561 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562
563 def __read(self, size):
564 """Return size bytes from stream. If internal buffer is empty,
565 read another block from the stream.
566 """
567 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000568 while c < size:
569 buf = self.fileobj.read(self.bufsize)
570 if not buf:
571 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000572 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000573 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000574 buf = self.buf[:size]
575 self.buf = self.buf[size:]
576 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000577# class _Stream
578
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000579class _StreamProxy(object):
580 """Small proxy class that enables transparent compression
581 detection for the Stream interface (mode 'r|*').
582 """
583
584 def __init__(self, fileobj):
585 self.fileobj = fileobj
586 self.buf = self.fileobj.read(BLOCKSIZE)
587
588 def read(self, size):
589 self.read = self.fileobj.read
590 return self.buf
591
592 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100593 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000594 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100595 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000596 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100597 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
598 return "xz"
599 else:
600 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000601
602 def close(self):
603 self.fileobj.close()
604# class StreamProxy
605
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000606#------------------------
607# Extraction file object
608#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000609class _FileInFile(object):
610 """A thin wrapper around an existing file object that
611 provides a part of its data as an individual file
612 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000613 """
614
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000615 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000616 self.fileobj = fileobj
617 self.offset = offset
618 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000619 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200620 self.name = getattr(fileobj, "name", None)
621 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000622
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000623 if blockinfo is None:
624 blockinfo = [(0, size)]
625
626 # Construct a map with data and zero blocks.
627 self.map_index = 0
628 self.map = []
629 lastpos = 0
630 realpos = self.offset
631 for offset, size in blockinfo:
632 if offset > lastpos:
633 self.map.append((False, lastpos, offset, None))
634 self.map.append((True, offset, offset + size, realpos))
635 realpos += size
636 lastpos = offset + size
637 if lastpos < self.size:
638 self.map.append((False, lastpos, self.size, None))
639
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200640 def flush(self):
641 pass
642
643 def readable(self):
644 return True
645
646 def writable(self):
647 return False
648
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000649 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000650 return self.fileobj.seekable()
651
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000652 def tell(self):
653 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000654 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000655 return self.position
656
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200657 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000658 """Seek to a position in the file.
659 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200660 if whence == io.SEEK_SET:
661 self.position = min(max(position, 0), self.size)
662 elif whence == io.SEEK_CUR:
663 if position < 0:
664 self.position = max(self.position + position, 0)
665 else:
666 self.position = min(self.position + position, self.size)
667 elif whence == io.SEEK_END:
668 self.position = max(min(self.size + position, self.size), 0)
669 else:
670 raise ValueError("Invalid argument")
671 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000672
673 def read(self, size=None):
674 """Read data from the file.
675 """
676 if size is None:
677 size = self.size - self.position
678 else:
679 size = min(size, self.size - self.position)
680
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000681 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000682 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000683 while True:
684 data, start, stop, offset = self.map[self.map_index]
685 if start <= self.position < stop:
686 break
687 else:
688 self.map_index += 1
689 if self.map_index == len(self.map):
690 self.map_index = 0
691 length = min(size, stop - self.position)
692 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000693 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200694 b = self.fileobj.read(length)
695 if len(b) != length:
696 raise ReadError("unexpected end of data")
697 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000698 else:
699 buf += NUL * length
700 size -= length
701 self.position += length
702 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000703
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200704 def readinto(self, b):
705 buf = self.read(len(b))
706 b[:len(buf)] = buf
707 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708
709 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200711#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000712
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200713class ExFileObject(io.BufferedReader):
714
715 def __init__(self, tarfile, tarinfo):
716 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
717 tarinfo.size, tarinfo.sparse)
718 super().__init__(fileobj)
719#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000720
721#------------------
722# Exported Classes
723#------------------
724class TarInfo(object):
725 """Informational class which holds the details about an
726 archive member given by a tar header block.
727 TarInfo objects are returned by TarFile.getmember(),
728 TarFile.getmembers() and TarFile.gettarinfo() and are
729 usually created internally.
730 """
731
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000732 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
733 "chksum", "type", "linkname", "uname", "gname",
734 "devmajor", "devminor",
735 "offset", "offset_data", "pax_headers", "sparse",
736 "tarfile", "_sparse_structs", "_link_target")
737
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000738 def __init__(self, name=""):
739 """Construct a TarInfo object. name is the optional name
740 of the member.
741 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000742 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000743 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000744 self.uid = 0 # user id
745 self.gid = 0 # group id
746 self.size = 0 # file size
747 self.mtime = 0 # modification time
748 self.chksum = 0 # header checksum
749 self.type = REGTYPE # member type
750 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000751 self.uname = "" # user name
752 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000753 self.devmajor = 0 # device major number
754 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000755
Thomas Wouters477c8d52006-05-27 19:21:47 +0000756 self.offset = 0 # the tar header starts here
757 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000758
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000759 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000760 self.pax_headers = {} # pax header information
761
762 # In pax headers the "name" and "linkname" field are called
763 # "path" and "linkpath".
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200764 @property
765 def path(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000767
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200768 @path.setter
769 def path(self, name):
770 self.name = name
771
772 @property
773 def linkpath(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200775
776 @linkpath.setter
777 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000778 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000780 def __repr__(self):
781 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
782
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000783 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000784 """Return the TarInfo's attributes as a dictionary.
785 """
786 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000787 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000788 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000789 "uid": self.uid,
790 "gid": self.gid,
791 "size": self.size,
792 "mtime": self.mtime,
793 "chksum": self.chksum,
794 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000795 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796 "uname": self.uname,
797 "gname": self.gname,
798 "devmajor": self.devmajor,
799 "devminor": self.devminor
800 }
801
802 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
803 info["name"] += "/"
804
805 return info
806
Victor Stinnerde629d42010-05-05 21:43:57 +0000807 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 """Return a tar header as a string of 512 byte blocks.
809 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000810 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000811
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000815 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000817 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000818 else:
819 raise ValueError("invalid format")
820
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000821 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000822 """Return the object as a ustar header block.
823 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000824 info["magic"] = POSIX_MAGIC
825
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200826 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 raise ValueError("linkname is too long")
828
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200829 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
830 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000832 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000834 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835 """Return the object as a GNU header block sequence.
836 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837 info["magic"] = GNU_MAGIC
838
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000839 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200840 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000841 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200843 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000844 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000846 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000848 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000849 """Return the object as a ustar header block. If it cannot be
850 represented this way, prepend a pax extended header sequence
851 with supplement information.
852 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853 info["magic"] = POSIX_MAGIC
854 pax_headers = self.pax_headers.copy()
855
856 # Test string fields for values that exceed the field length or cannot
857 # be represented in ASCII encoding.
858 for name, hname, length in (
859 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
860 ("uname", "uname", 32), ("gname", "gname", 32)):
861
Guido van Rossume7ba4952007-06-06 23:52:48 +0000862 if hname in pax_headers:
863 # The pax header has priority.
864 continue
865
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 # Try to encode the string as ASCII.
867 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000868 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000870 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000871 continue
872
Guido van Rossume7ba4952007-06-06 23:52:48 +0000873 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000874 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875
876 # Test number fields for values that exceed the field limit or values
877 # that like to be stored as float.
878 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000879 if name in pax_headers:
880 # The pax header has priority. Avoid overflow.
881 info[name] = 0
882 continue
883
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 val = info[name]
885 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000886 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 info[name] = 0
888
Guido van Rossume7ba4952007-06-06 23:52:48 +0000889 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000891 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000893 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000895 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896
897 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000898 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 """Return the object as a pax global header block sequence.
900 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000901 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200903 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000904 """Split a name longer than 100 chars into a prefix
905 and a name part.
906 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200907 components = name.split("/")
908 for i in range(1, len(components)):
909 prefix = "/".join(components[:i])
910 name = "/".join(components[i:])
911 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
912 len(name.encode(encoding, errors)) <= LENGTH_NAME:
913 break
914 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200916
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917 return prefix, name
918
919 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000920 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921 """Return a header block. info is a dictionary with file
922 information, format must be one of the *_FORMAT constants.
923 """
924 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000925 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000926 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000927 itn(info.get("uid", 0), 8, format),
928 itn(info.get("gid", 0), 8, format),
929 itn(info.get("size", 0), 12, format),
930 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000931 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000933 stn(info.get("linkname", ""), 100, encoding, errors),
934 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000935 stn(info.get("uname", ""), 32, encoding, errors),
936 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000937 itn(info.get("devmajor", 0), 8, format),
938 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000939 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000940 ]
941
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000942 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000944 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000945 return buf
946
947 @staticmethod
948 def _create_payload(payload):
949 """Return the string payload filled with zero bytes
950 up to the next 512 byte border.
951 """
952 blocks, remainder = divmod(len(payload), BLOCKSIZE)
953 if remainder > 0:
954 payload += (BLOCKSIZE - remainder) * NUL
955 return payload
956
957 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000958 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
960 for name.
961 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000962 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963
964 info = {}
965 info["name"] = "././@LongLink"
966 info["type"] = type
967 info["size"] = len(name)
968 info["magic"] = GNU_MAGIC
969
970 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000971 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000972 cls._create_payload(name)
973
974 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000975 def _create_pax_generic_header(cls, pax_headers, type, encoding):
976 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000977 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000978 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000979 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000980 # Check if one of the fields contains surrogate characters and thereby
981 # forces hdrcharset=BINARY, see _proc_pax() for more information.
982 binary = False
983 for keyword, value in pax_headers.items():
984 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000985 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000986 except UnicodeEncodeError:
987 binary = True
988 break
989
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000990 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000991 if binary:
992 # Put the hdrcharset field at the beginning of the header.
993 records += b"21 hdrcharset=BINARY\n"
994
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000996 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000997 if binary:
998 # Try to restore the original byte representation of `value'.
999 # Needless to say, that the encoding must match the string.
1000 value = value.encode(encoding, "surrogateescape")
1001 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001002 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001003
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1005 n = p = 0
1006 while True:
1007 n = l + len(str(p))
1008 if n == p:
1009 break
1010 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001011 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001012
1013 # We use a hardcoded "././@PaxHeader" name like star does
1014 # instead of the one that POSIX recommends.
1015 info = {}
1016 info["name"] = "././@PaxHeader"
1017 info["type"] = type
1018 info["size"] = len(records)
1019 info["magic"] = POSIX_MAGIC
1020
1021 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001022 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023 cls._create_payload(records)
1024
Guido van Rossum75b64e62005-01-16 00:16:11 +00001025 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001026 def frombuf(cls, buf, encoding, errors):
1027 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001028 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001029 if len(buf) == 0:
1030 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001031 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001032 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001033 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001034 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001035
1036 chksum = nti(buf[148:156])
1037 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001038 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001039
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001041 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001042 obj.mode = nti(buf[100:108])
1043 obj.uid = nti(buf[108:116])
1044 obj.gid = nti(buf[116:124])
1045 obj.size = nti(buf[124:136])
1046 obj.mtime = nti(buf[136:148])
1047 obj.chksum = chksum
1048 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001049 obj.linkname = nts(buf[157:257], encoding, errors)
1050 obj.uname = nts(buf[265:297], encoding, errors)
1051 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001052 obj.devmajor = nti(buf[329:337])
1053 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001054 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001055
Guido van Rossumd8faa362007-04-27 19:54:29 +00001056 # Old V7 tar format represents a directory as a regular
1057 # file with a trailing slash.
1058 if obj.type == AREGTYPE and obj.name.endswith("/"):
1059 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001060
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001061 # The old GNU sparse format occupies some of the unused
1062 # space in the buffer for up to 4 sparse structures.
1063 # Save the them for later processing in _proc_sparse().
1064 if obj.type == GNUTYPE_SPARSE:
1065 pos = 386
1066 structs = []
1067 for i in range(4):
1068 try:
1069 offset = nti(buf[pos:pos + 12])
1070 numbytes = nti(buf[pos + 12:pos + 24])
1071 except ValueError:
1072 break
1073 structs.append((offset, numbytes))
1074 pos += 24
1075 isextended = bool(buf[482])
1076 origsize = nti(buf[483:495])
1077 obj._sparse_structs = (structs, isextended, origsize)
1078
Guido van Rossumd8faa362007-04-27 19:54:29 +00001079 # Remove redundant slashes from directories.
1080 if obj.isdir():
1081 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001082
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083 # Reconstruct a ustar longname.
1084 if prefix and obj.type not in GNU_TYPES:
1085 obj.name = prefix + "/" + obj.name
1086 return obj
1087
1088 @classmethod
1089 def fromtarfile(cls, tarfile):
1090 """Return the next TarInfo object from TarFile object
1091 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001092 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001093 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001094 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001095 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1096 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001097
Guido van Rossumd8faa362007-04-27 19:54:29 +00001098 #--------------------------------------------------------------------------
1099 # The following are methods that are called depending on the type of a
1100 # member. The entry point is _proc_member() which can be overridden in a
1101 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1102 # implement the following
1103 # operations:
1104 # 1. Set self.offset_data to the position where the data blocks begin,
1105 # if there is data that follows.
1106 # 2. Set tarfile.offset to the position where the next member's header will
1107 # begin.
1108 # 3. Return self or another valid TarInfo object.
1109 def _proc_member(self, tarfile):
1110 """Choose the right processing method depending on
1111 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001112 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1114 return self._proc_gnulong(tarfile)
1115 elif self.type == GNUTYPE_SPARSE:
1116 return self._proc_sparse(tarfile)
1117 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1118 return self._proc_pax(tarfile)
1119 else:
1120 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001121
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 def _proc_builtin(self, tarfile):
1123 """Process a builtin type or an unknown type which
1124 will be treated as a regular file.
1125 """
1126 self.offset_data = tarfile.fileobj.tell()
1127 offset = self.offset_data
1128 if self.isreg() or self.type not in SUPPORTED_TYPES:
1129 # Skip the following data blocks.
1130 offset += self._block(self.size)
1131 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001132
Guido van Rossume7ba4952007-06-06 23:52:48 +00001133 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001134 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001135 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001136
1137 return self
1138
1139 def _proc_gnulong(self, tarfile):
1140 """Process the blocks that hold a GNU longname
1141 or longlink member.
1142 """
1143 buf = tarfile.fileobj.read(self._block(self.size))
1144
1145 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001146 try:
1147 next = self.fromtarfile(tarfile)
1148 except HeaderError:
1149 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001150
1151 # Patch the TarInfo object from the next header with
1152 # the longname information.
1153 next.offset = self.offset
1154 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001155 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001157 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158
1159 return next
1160
1161 def _proc_sparse(self, tarfile):
1162 """Process a GNU sparse header plus extra headers.
1163 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001164 # We already collected some sparse structures in frombuf().
1165 structs, isextended, origsize = self._sparse_structs
1166 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001168 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001169 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001170 buf = tarfile.fileobj.read(BLOCKSIZE)
1171 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001172 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001173 try:
1174 offset = nti(buf[pos:pos + 12])
1175 numbytes = nti(buf[pos + 12:pos + 24])
1176 except ValueError:
1177 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001178 if offset and numbytes:
1179 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001180 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001181 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001182 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001183
1184 self.offset_data = tarfile.fileobj.tell()
1185 tarfile.offset = self.offset_data + self._block(self.size)
1186 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001187 return self
1188
1189 def _proc_pax(self, tarfile):
1190 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001191 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001192 """
1193 # Read the header information.
1194 buf = tarfile.fileobj.read(self._block(self.size))
1195
1196 # A pax header stores supplemental information for either
1197 # the following file (extended) or all following files
1198 # (global).
1199 if self.type == XGLTYPE:
1200 pax_headers = tarfile.pax_headers
1201 else:
1202 pax_headers = tarfile.pax_headers.copy()
1203
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001204 # Check if the pax header contains a hdrcharset field. This tells us
1205 # the encoding of the path, linkpath, uname and gname fields. Normally,
1206 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1207 # implementations are allowed to store them as raw binary strings if
1208 # the translation to UTF-8 fails.
1209 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1210 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001211 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001212
1213 # For the time being, we don't care about anything other than "BINARY".
1214 # The only other value that is currently allowed by the standard is
1215 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1216 hdrcharset = pax_headers.get("hdrcharset")
1217 if hdrcharset == "BINARY":
1218 encoding = tarfile.encoding
1219 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001220 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001221
Guido van Rossumd8faa362007-04-27 19:54:29 +00001222 # Parse pax header information. A record looks like that:
1223 # "%d %s=%s\n" % (length, keyword, value). length is the size
1224 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001225 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001226 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001227 pos = 0
1228 while True:
1229 match = regex.match(buf, pos)
1230 if not match:
1231 break
1232
1233 length, keyword = match.groups()
1234 length = int(length)
1235 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1236
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001237 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001238 # as the error handler, but we better not take the risk. For
1239 # example, GNU tar <= 1.23 is known to store filenames it cannot
1240 # translate to UTF-8 as raw strings (unfortunately without a
1241 # hdrcharset=BINARY header).
1242 # We first try the strict standard encoding, and if that fails we
1243 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001244 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001245 tarfile.errors)
1246 if keyword in PAX_NAME_FIELDS:
1247 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1248 tarfile.errors)
1249 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001250 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001251 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001252
1253 pax_headers[keyword] = value
1254 pos += length
1255
Guido van Rossume7ba4952007-06-06 23:52:48 +00001256 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001257 try:
1258 next = self.fromtarfile(tarfile)
1259 except HeaderError:
1260 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001262 # Process GNU sparse information.
1263 if "GNU.sparse.map" in pax_headers:
1264 # GNU extended sparse format version 0.1.
1265 self._proc_gnusparse_01(next, pax_headers)
1266
1267 elif "GNU.sparse.size" in pax_headers:
1268 # GNU extended sparse format version 0.0.
1269 self._proc_gnusparse_00(next, pax_headers, buf)
1270
1271 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1272 # GNU extended sparse format version 1.0.
1273 self._proc_gnusparse_10(next, pax_headers, tarfile)
1274
Guido van Rossume7ba4952007-06-06 23:52:48 +00001275 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001276 # Patch the TarInfo object with the extended header info.
1277 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1278 next.offset = self.offset
1279
1280 if "size" in pax_headers:
1281 # If the extended header replaces the size field,
1282 # we need to recalculate the offset where the next
1283 # header starts.
1284 offset = next.offset_data
1285 if next.isreg() or next.type not in SUPPORTED_TYPES:
1286 offset += next._block(next.size)
1287 tarfile.offset = offset
1288
1289 return next
1290
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001291 def _proc_gnusparse_00(self, next, pax_headers, buf):
1292 """Process a GNU tar extended sparse header, version 0.0.
1293 """
1294 offsets = []
1295 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1296 offsets.append(int(match.group(1)))
1297 numbytes = []
1298 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1299 numbytes.append(int(match.group(1)))
1300 next.sparse = list(zip(offsets, numbytes))
1301
1302 def _proc_gnusparse_01(self, next, pax_headers):
1303 """Process a GNU tar extended sparse header, version 0.1.
1304 """
1305 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1306 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1307
1308 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1309 """Process a GNU tar extended sparse header, version 1.0.
1310 """
1311 fields = None
1312 sparse = []
1313 buf = tarfile.fileobj.read(BLOCKSIZE)
1314 fields, buf = buf.split(b"\n", 1)
1315 fields = int(fields)
1316 while len(sparse) < fields * 2:
1317 if b"\n" not in buf:
1318 buf += tarfile.fileobj.read(BLOCKSIZE)
1319 number, buf = buf.split(b"\n", 1)
1320 sparse.append(int(number))
1321 next.offset_data = tarfile.fileobj.tell()
1322 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1323
Guido van Rossume7ba4952007-06-06 23:52:48 +00001324 def _apply_pax_info(self, pax_headers, encoding, errors):
1325 """Replace fields with supplemental information from a previous
1326 pax extended or global header.
1327 """
1328 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001329 if keyword == "GNU.sparse.name":
1330 setattr(self, "path", value)
1331 elif keyword == "GNU.sparse.size":
1332 setattr(self, "size", int(value))
1333 elif keyword == "GNU.sparse.realsize":
1334 setattr(self, "size", int(value))
1335 elif keyword in PAX_FIELDS:
1336 if keyword in PAX_NUMBER_FIELDS:
1337 try:
1338 value = PAX_NUMBER_FIELDS[keyword](value)
1339 except ValueError:
1340 value = 0
1341 if keyword == "path":
1342 value = value.rstrip("/")
1343 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001344
1345 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001347 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1348 """Decode a single field from a pax record.
1349 """
1350 try:
1351 return value.decode(encoding, "strict")
1352 except UnicodeDecodeError:
1353 return value.decode(fallback_encoding, fallback_errors)
1354
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355 def _block(self, count):
1356 """Round up a byte count by BLOCKSIZE and return it,
1357 e.g. _block(834) => 1024.
1358 """
1359 blocks, remainder = divmod(count, BLOCKSIZE)
1360 if remainder:
1361 blocks += 1
1362 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001363
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001364 def isreg(self):
1365 return self.type in REGULAR_TYPES
1366 def isfile(self):
1367 return self.isreg()
1368 def isdir(self):
1369 return self.type == DIRTYPE
1370 def issym(self):
1371 return self.type == SYMTYPE
1372 def islnk(self):
1373 return self.type == LNKTYPE
1374 def ischr(self):
1375 return self.type == CHRTYPE
1376 def isblk(self):
1377 return self.type == BLKTYPE
1378 def isfifo(self):
1379 return self.type == FIFOTYPE
1380 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001381 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001382 def isdev(self):
1383 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1384# class TarInfo
1385
1386class TarFile(object):
1387 """The TarFile Class provides an interface to tar archives.
1388 """
1389
1390 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1391
1392 dereference = False # If true, add content of linked file to the
1393 # tar file, else the link.
1394
1395 ignore_zeros = False # If true, skips empty or invalid blocks and
1396 # continues processing.
1397
Lars Gustäbel365aff32009-12-13 11:42:29 +00001398 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 # messages (if debug >= 0). If > 0, errors
1400 # are passed to the caller as exceptions.
1401
Guido van Rossumd8faa362007-04-27 19:54:29 +00001402 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001403
Guido van Rossume7ba4952007-06-06 23:52:48 +00001404 encoding = ENCODING # Encoding for 8-bit character strings.
1405
1406 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001407
Guido van Rossumd8faa362007-04-27 19:54:29 +00001408 tarinfo = TarInfo # The default TarInfo class to use.
1409
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001410 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001411
1412 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1413 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001414 errors="surrogateescape", pax_headers=None, debug=None,
1415 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001416 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1417 read from an existing archive, 'a' to append data to an existing
1418 file or 'w' to create a new file overwriting an existing one. `mode'
1419 defaults to 'r'.
1420 If `fileobj' is given, it is used for reading or writing data. If it
1421 can be determined, `mode' is overridden by `fileobj's mode.
1422 `fileobj' is not closed, when TarFile is closed.
1423 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001424 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001425 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001426 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001427 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001428 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001429
1430 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001431 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001432 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001433 self.mode = "w"
1434 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001435 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001436 self._extfileobj = False
1437 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001438 if (name is None and hasattr(fileobj, "name") and
1439 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001441 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001442 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001444 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001445 self.fileobj = fileobj
1446
Guido van Rossumd8faa362007-04-27 19:54:29 +00001447 # Init attributes.
1448 if format is not None:
1449 self.format = format
1450 if tarinfo is not None:
1451 self.tarinfo = tarinfo
1452 if dereference is not None:
1453 self.dereference = dereference
1454 if ignore_zeros is not None:
1455 self.ignore_zeros = ignore_zeros
1456 if encoding is not None:
1457 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001458 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001459
1460 if pax_headers is not None and self.format == PAX_FORMAT:
1461 self.pax_headers = pax_headers
1462 else:
1463 self.pax_headers = {}
1464
Guido van Rossumd8faa362007-04-27 19:54:29 +00001465 if debug is not None:
1466 self.debug = debug
1467 if errorlevel is not None:
1468 self.errorlevel = errorlevel
1469
1470 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001471 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001472 self.closed = False
1473 self.members = [] # list of members as TarInfo objects
1474 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001475 self.offset = self.fileobj.tell()
1476 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001477 self.inodes = {} # dictionary caching the inodes of
1478 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001479
Lars Gustäbel7b465392009-11-18 20:29:25 +00001480 try:
1481 if self.mode == "r":
1482 self.firstmember = None
1483 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001484
Lars Gustäbel7b465392009-11-18 20:29:25 +00001485 if self.mode == "a":
1486 # Move to the end of the archive,
1487 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001488 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001489 self.fileobj.seek(self.offset)
1490 try:
1491 tarinfo = self.tarinfo.fromtarfile(self)
1492 self.members.append(tarinfo)
1493 except EOFHeaderError:
1494 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001495 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001496 except HeaderError as e:
1497 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001498
Lars Gustäbel20703c62015-05-27 12:53:44 +02001499 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001500 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501
Lars Gustäbel7b465392009-11-18 20:29:25 +00001502 if self.pax_headers:
1503 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1504 self.fileobj.write(buf)
1505 self.offset += len(buf)
1506 except:
1507 if not self._extfileobj:
1508 self.fileobj.close()
1509 self.closed = True
1510 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 #--------------------------------------------------------------------------
1513 # Below are the classmethods which act as alternate constructors to the
1514 # TarFile class. The open() method is the only one that is needed for
1515 # public use; it is the "super"-constructor and is able to select an
1516 # adequate "sub"-constructor for a particular compression using the mapping
1517 # from OPEN_METH.
1518 #
1519 # This concept allows one to subclass TarFile without losing the comfort of
1520 # the super-constructor. A sub-constructor is registered and made available
1521 # by adding it to the mapping in OPEN_METH.
1522
Guido van Rossum75b64e62005-01-16 00:16:11 +00001523 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001524 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001525 """Open a tar archive for reading, writing or appending. Return
1526 an appropriate TarFile class.
1527
1528 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001529 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 'r:' open for reading exclusively uncompressed
1531 'r:gz' open for reading with gzip compression
1532 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001533 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001534 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535 'w' or 'w:' open for writing without compression
1536 'w:gz' open for writing with gzip compression
1537 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001538 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001539
Berker Peksag0fe63252015-02-13 21:02:12 +02001540 'x' or 'x:' create a tarfile exclusively without compression, raise
1541 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001542 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001543 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001544 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001545 if the file is already created
1546 'x:xz' create an lzma compressed tarfile, raise an exception
1547 if the file is already created
1548
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001549 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550 'r|' open an uncompressed stream of tar blocks for reading
1551 'r|gz' open a gzip compressed stream of tar blocks
1552 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001553 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554 'w|' open an uncompressed stream for writing
1555 'w|gz' open a gzip compressed stream for writing
1556 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001557 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001558 """
1559
1560 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001561 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001563 if mode in ("r", "r:*"):
1564 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001565 def not_compressed(comptype):
1566 return cls.OPEN_METH[comptype] == 'taropen'
1567 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001568 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001569 if fileobj is not None:
1570 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001571 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001572 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001573 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001574 if fileobj is not None:
1575 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001576 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001578
1579 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001580 filemode, comptype = mode.split(":", 1)
1581 filemode = filemode or "r"
1582 comptype = comptype or "tar"
1583
1584 # Select the *open() function according to
1585 # given compression.
1586 if comptype in cls.OPEN_METH:
1587 func = getattr(cls, cls.OPEN_METH[comptype])
1588 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001590 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001591
1592 elif "|" in mode:
1593 filemode, comptype = mode.split("|", 1)
1594 filemode = filemode or "r"
1595 comptype = comptype or "tar"
1596
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001597 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001598 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001599
Antoine Pitrou605c2932010-09-23 20:15:14 +00001600 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1601 try:
1602 t = cls(name, filemode, stream, **kwargs)
1603 except:
1604 stream.close()
1605 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606 t._extfileobj = False
1607 return t
1608
Berker Peksag0fe63252015-02-13 21:02:12 +02001609 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001610 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611
Thomas Wouters477c8d52006-05-27 19:21:47 +00001612 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613
Guido van Rossum75b64e62005-01-16 00:16:11 +00001614 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001615 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616 """Open uncompressed tar archive name for reading or writing.
1617 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001618 if mode not in ("r", "a", "w", "x"):
1619 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001620 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621
Guido van Rossum75b64e62005-01-16 00:16:11 +00001622 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001623 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001624 """Open gzip compressed tar archive name for reading or writing.
1625 Appending is not allowed.
1626 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001627 if mode not in ("r", "w", "x"):
1628 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001629
1630 try:
1631 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001632 gzip.GzipFile
1633 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001634 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001637 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001638 except OSError:
1639 if fileobj is not None and mode == 'r':
1640 raise ReadError("not a gzip file")
1641 raise
1642
1643 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001644 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001645 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001646 fileobj.close()
1647 if mode == 'r':
1648 raise ReadError("not a gzip file")
1649 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001650 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001651 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001652 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001653 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001654 return t
1655
Guido van Rossum75b64e62005-01-16 00:16:11 +00001656 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001657 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658 """Open bzip2 compressed tar archive name for reading or writing.
1659 Appending is not allowed.
1660 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001661 if mode not in ("r", "w", "x"):
1662 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663
1664 try:
1665 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001666 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001667 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001669 fileobj = bz2.BZ2File(fileobj or name, mode,
1670 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671
1672 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001673 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001674 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001675 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001676 if mode == 'r':
1677 raise ReadError("not a bzip2 file")
1678 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001679 except:
1680 fileobj.close()
1681 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001682 t._extfileobj = False
1683 return t
1684
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001685 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001686 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001687 """Open lzma compressed tar archive name for reading or writing.
1688 Appending is not allowed.
1689 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001690 if mode not in ("r", "w", "x"):
1691 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001692
1693 try:
1694 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001695 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001696 raise CompressionError("lzma module is not available")
1697
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001698 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001699
1700 try:
1701 t = cls.taropen(name, mode, fileobj, **kwargs)
1702 except (lzma.LZMAError, EOFError):
1703 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001704 if mode == 'r':
1705 raise ReadError("not an lzma file")
1706 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001707 except:
1708 fileobj.close()
1709 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001710 t._extfileobj = False
1711 return t
1712
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001713 # All *open() methods are registered here.
1714 OPEN_METH = {
1715 "tar": "taropen", # uncompressed tar
1716 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001717 "bz2": "bz2open", # bzip2 compressed tar
1718 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719 }
1720
1721 #--------------------------------------------------------------------------
1722 # The public methods which TarFile provides:
1723
1724 def close(self):
1725 """Close the TarFile. In write-mode, two finishing zero blocks are
1726 appended to the archive.
1727 """
1728 if self.closed:
1729 return
1730
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001732 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001733 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001734 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1735 self.offset += (BLOCKSIZE * 2)
1736 # fill up the end with zero-blocks
1737 # (like option -b20 for tar does)
1738 blocks, remainder = divmod(self.offset, RECORDSIZE)
1739 if remainder > 0:
1740 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1741 finally:
1742 if not self._extfileobj:
1743 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744
1745 def getmember(self, name):
1746 """Return a TarInfo object for member `name'. If `name' can not be
1747 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001748 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749 most up-to-date version.
1750 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001751 tarinfo = self._getmember(name)
1752 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001753 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001754 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001755
1756 def getmembers(self):
1757 """Return the members of the archive as a list of TarInfo objects. The
1758 list has the same order as the members in the archive.
1759 """
1760 self._check()
1761 if not self._loaded: # if we want to obtain a list of
1762 self._load() # all members, we first have to
1763 # scan the whole archive.
1764 return self.members
1765
1766 def getnames(self):
1767 """Return the members of the archive as a list of their names. It has
1768 the same order as the list returned by getmembers().
1769 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001770 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771
1772 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001773 """Create a TarInfo object from the result of os.stat or equivalent
1774 on an existing file. The file is either named by `name', or
1775 specified as a file object `fileobj' with a file descriptor. If
1776 given, `arcname' specifies an alternative name for the file in the
1777 archive, otherwise, the name is taken from the 'name' attribute of
1778 'fileobj', or the 'name' argument. The name should be a text
1779 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001781 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782
1783 # When fileobj is given, replace name by
1784 # fileobj's real name.
1785 if fileobj is not None:
1786 name = fileobj.name
1787
1788 # Building the name of the member in the archive.
1789 # Backward slashes are converted to forward slashes,
1790 # Absolute paths are turned to relative paths.
1791 if arcname is None:
1792 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001794 arcname = arcname.replace(os.sep, "/")
1795 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796
1797 # Now, fill the TarInfo object with
1798 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001799 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001800 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001801
1802 # Use os.stat or os.lstat, depending on platform
1803 # and if symlinks shall be resolved.
1804 if fileobj is None:
1805 if hasattr(os, "lstat") and not self.dereference:
1806 statres = os.lstat(name)
1807 else:
1808 statres = os.stat(name)
1809 else:
1810 statres = os.fstat(fileobj.fileno())
1811 linkname = ""
1812
1813 stmd = statres.st_mode
1814 if stat.S_ISREG(stmd):
1815 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001816 if not self.dereference and statres.st_nlink > 1 and \
1817 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818 # Is it a hardlink to an already
1819 # archived file?
1820 type = LNKTYPE
1821 linkname = self.inodes[inode]
1822 else:
1823 # The inode is added only if its valid.
1824 # For win32 it is always 0.
1825 type = REGTYPE
1826 if inode[0]:
1827 self.inodes[inode] = arcname
1828 elif stat.S_ISDIR(stmd):
1829 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 elif stat.S_ISFIFO(stmd):
1831 type = FIFOTYPE
1832 elif stat.S_ISLNK(stmd):
1833 type = SYMTYPE
1834 linkname = os.readlink(name)
1835 elif stat.S_ISCHR(stmd):
1836 type = CHRTYPE
1837 elif stat.S_ISBLK(stmd):
1838 type = BLKTYPE
1839 else:
1840 return None
1841
1842 # Fill the TarInfo object with all
1843 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001844 tarinfo.name = arcname
1845 tarinfo.mode = stmd
1846 tarinfo.uid = statres.st_uid
1847 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001848 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001849 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001850 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001851 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001853 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854 tarinfo.linkname = linkname
1855 if pwd:
1856 try:
1857 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1858 except KeyError:
1859 pass
1860 if grp:
1861 try:
1862 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1863 except KeyError:
1864 pass
1865
1866 if type in (CHRTYPE, BLKTYPE):
1867 if hasattr(os, "major") and hasattr(os, "minor"):
1868 tarinfo.devmajor = os.major(statres.st_rdev)
1869 tarinfo.devminor = os.minor(statres.st_rdev)
1870 return tarinfo
1871
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001872 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001873 """Print a table of contents to sys.stdout. If `verbose' is False, only
1874 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001875 output is produced. `members' is optional and must be a subset of the
1876 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001877 """
1878 self._check()
1879
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001880 if members is None:
1881 members = self
1882 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001883 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001884 _safe_print(stat.filemode(tarinfo.mode))
1885 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1886 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001887 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001888 _safe_print("%10s" %
1889 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001890 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001891 _safe_print("%10d" % tarinfo.size)
1892 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1893 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001895 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896
1897 if verbose:
1898 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001899 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001901 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001902 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001904 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 """Add the file `name' to the archive. `name' may be any type of file
1906 (directory, fifo, symbolic link, etc.). If given, `arcname'
1907 specifies an alternative name for the file in the archive.
1908 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001909 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001910 that expects a TarInfo object argument and returns the changed
1911 TarInfo object, if it returns None the TarInfo object will be
1912 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001914 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915
1916 if arcname is None:
1917 arcname = name
1918
1919 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001920 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921 self._dbg(2, "tarfile: Skipped %r" % name)
1922 return
1923
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 self._dbg(1, name)
1925
1926 # Create a TarInfo object from the file.
1927 tarinfo = self.gettarinfo(name, arcname)
1928
1929 if tarinfo is None:
1930 self._dbg(1, "tarfile: Unsupported type %r" % name)
1931 return
1932
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001933 # Change or exclude the TarInfo object.
1934 if filter is not None:
1935 tarinfo = filter(tarinfo)
1936 if tarinfo is None:
1937 self._dbg(2, "tarfile: Excluded %r" % name)
1938 return
1939
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001940 # Append the tar header and data to the archive.
1941 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001942 with bltn_open(name, "rb") as f:
1943 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001945 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946 self.addfile(tarinfo)
1947 if recursive:
1948 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001949 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001950 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001951
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001952 else:
1953 self.addfile(tarinfo)
1954
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001955 def addfile(self, tarinfo, fileobj=None):
1956 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001957 given, it should be a binary file, and tarinfo.size bytes are read
1958 from it and added to the archive. You can create TarInfo objects
1959 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001961 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001962
Thomas Wouters89f507f2006-12-13 04:49:30 +00001963 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964
Guido van Rossume7ba4952007-06-06 23:52:48 +00001965 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001966 self.fileobj.write(buf)
1967 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001968 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001969 # If there's data to follow, append it.
1970 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001971 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1973 if remainder > 0:
1974 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1975 blocks += 1
1976 self.offset += blocks * BLOCKSIZE
1977
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001978 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001979
Eric V. Smith7a803892015-04-15 10:27:58 -04001980 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001981 """Extract all members from the archive to the current working
1982 directory and set owner, modification time and permissions on
1983 directories afterwards. `path' specifies a different directory
1984 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001985 list returned by getmembers(). If `numeric_owner` is True, only
1986 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001987 """
1988 directories = []
1989
1990 if members is None:
1991 members = self
1992
1993 for tarinfo in members:
1994 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001995 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001996 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001997 tarinfo = copy.copy(tarinfo)
1998 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001999 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002000 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2001 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002002
2003 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002004 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002005 directories.reverse()
2006
2007 # Set correct owner, mtime and filemode on directories.
2008 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002009 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002010 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002011 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002012 self.utime(tarinfo, dirpath)
2013 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002014 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002015 if self.errorlevel > 1:
2016 raise
2017 else:
2018 self._dbg(1, "tarfile: %s" % e)
2019
Eric V. Smith7a803892015-04-15 10:27:58 -04002020 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002021 """Extract a member from the archive to the current working directory,
2022 using its full name. Its file information is extracted as accurately
2023 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002024 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002025 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2026 is True, only the numbers for user/group names are used and not
2027 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002028 """
2029 self._check("r")
2030
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002031 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002032 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002033 else:
2034 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002035
Neal Norwitza4f651a2004-07-20 22:07:44 +00002036 # Prepare the link target for makelink().
2037 if tarinfo.islnk():
2038 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2039
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002041 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002042 set_attrs=set_attrs,
2043 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002044 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 if self.errorlevel > 0:
2046 raise
2047 else:
2048 if e.filename is None:
2049 self._dbg(1, "tarfile: %s" % e.strerror)
2050 else:
2051 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002052 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002053 if self.errorlevel > 1:
2054 raise
2055 else:
2056 self._dbg(1, "tarfile: %s" % e)
2057
2058 def extractfile(self, member):
2059 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002060 a filename or a TarInfo object. If `member' is a regular file or a
2061 link, an io.BufferedReader object is returned. Otherwise, None is
2062 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002063 """
2064 self._check("r")
2065
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002066 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002067 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002068 else:
2069 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002070
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002071 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2072 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002073 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002074
2075 elif tarinfo.islnk() or tarinfo.issym():
2076 if isinstance(self.fileobj, _Stream):
2077 # A small but ugly workaround for the case that someone tries
2078 # to extract a (sym)link as a file-object from a non-seekable
2079 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002080 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002081 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002082 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002083 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002084 else:
2085 # If there's no data associated with the member (directory, chrdev,
2086 # blkdev, etc.), return None instead of a file object.
2087 return None
2088
Eric V. Smith7a803892015-04-15 10:27:58 -04002089 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2090 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002091 """Extract the TarInfo object tarinfo to a physical
2092 file called targetpath.
2093 """
2094 # Fetch the TarInfo object for the given name
2095 # and build the destination pathname, replacing
2096 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002097 targetpath = targetpath.rstrip("/")
2098 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002099
2100 # Create all upper directories.
2101 upperdirs = os.path.dirname(targetpath)
2102 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002103 # Create directories that are not part of the archive with
2104 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002105 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002106
2107 if tarinfo.islnk() or tarinfo.issym():
2108 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2109 else:
2110 self._dbg(1, tarinfo.name)
2111
2112 if tarinfo.isreg():
2113 self.makefile(tarinfo, targetpath)
2114 elif tarinfo.isdir():
2115 self.makedir(tarinfo, targetpath)
2116 elif tarinfo.isfifo():
2117 self.makefifo(tarinfo, targetpath)
2118 elif tarinfo.ischr() or tarinfo.isblk():
2119 self.makedev(tarinfo, targetpath)
2120 elif tarinfo.islnk() or tarinfo.issym():
2121 self.makelink(tarinfo, targetpath)
2122 elif tarinfo.type not in SUPPORTED_TYPES:
2123 self.makeunknown(tarinfo, targetpath)
2124 else:
2125 self.makefile(tarinfo, targetpath)
2126
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002127 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002128 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002129 if not tarinfo.issym():
2130 self.chmod(tarinfo, targetpath)
2131 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002132
2133 #--------------------------------------------------------------------------
2134 # Below are the different file methods. They are called via
2135 # _extract_member() when extract() is called. They can be replaced in a
2136 # subclass to implement other functionality.
2137
2138 def makedir(self, tarinfo, targetpath):
2139 """Make a directory called targetpath.
2140 """
2141 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002142 # Use a safe mode for the directory, the real mode is set
2143 # later in _extract_member().
2144 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002145 except FileExistsError:
2146 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002147
2148 def makefile(self, tarinfo, targetpath):
2149 """Make a file called targetpath.
2150 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002151 source = self.fileobj
2152 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002153 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002154 with bltn_open(targetpath, "wb") as target:
2155 if tarinfo.sparse is not None:
2156 for offset, size in tarinfo.sparse:
2157 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002158 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002159 target.seek(tarinfo.size)
2160 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002161 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002162 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002163
2164 def makeunknown(self, tarinfo, targetpath):
2165 """Make a file from a TarInfo object with an unknown type
2166 at targetpath.
2167 """
2168 self.makefile(tarinfo, targetpath)
2169 self._dbg(1, "tarfile: Unknown file type %r, " \
2170 "extracted as regular file." % tarinfo.type)
2171
2172 def makefifo(self, tarinfo, targetpath):
2173 """Make a fifo called targetpath.
2174 """
2175 if hasattr(os, "mkfifo"):
2176 os.mkfifo(targetpath)
2177 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002178 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002179
2180 def makedev(self, tarinfo, targetpath):
2181 """Make a character or block device called targetpath.
2182 """
2183 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002184 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002185
2186 mode = tarinfo.mode
2187 if tarinfo.isblk():
2188 mode |= stat.S_IFBLK
2189 else:
2190 mode |= stat.S_IFCHR
2191
2192 os.mknod(targetpath, mode,
2193 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2194
2195 def makelink(self, tarinfo, targetpath):
2196 """Make a (symbolic) link called targetpath. If it cannot be created
2197 (platform limitation), we try to make a copy of the referenced file
2198 instead of a link.
2199 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002200 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002201 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002202 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002203 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002204 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002205 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002206 if os.path.exists(tarinfo._link_target):
2207 os.link(tarinfo._link_target, targetpath)
2208 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002209 self._extract_member(self._find_link_target(tarinfo),
2210 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002211 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002212 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002213 self._extract_member(self._find_link_target(tarinfo),
2214 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002215 except KeyError:
2216 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002217
Eric V. Smith7a803892015-04-15 10:27:58 -04002218 def chown(self, tarinfo, targetpath, numeric_owner):
2219 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002220 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2221 is False, fall back to .gid/.uid when the search based on name
2222 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002223 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002224 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002225 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002226 g = tarinfo.gid
2227 u = tarinfo.uid
2228 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002229 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002230 if grp:
2231 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002232 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002233 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002234 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002235 if pwd:
2236 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002237 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002238 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002239 try:
2240 if tarinfo.issym() and hasattr(os, "lchown"):
2241 os.lchown(targetpath, u, g)
2242 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002243 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002244 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002245 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002246
2247 def chmod(self, tarinfo, targetpath):
2248 """Set file permissions of targetpath according to tarinfo.
2249 """
Jack Jansen834eff62003-03-07 12:47:06 +00002250 if hasattr(os, 'chmod'):
2251 try:
2252 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002253 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002254 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255
2256 def utime(self, tarinfo, targetpath):
2257 """Set modification time of targetpath according to tarinfo.
2258 """
Jack Jansen834eff62003-03-07 12:47:06 +00002259 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002260 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002261 try:
2262 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002263 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265
2266 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002267 def next(self):
2268 """Return the next member of the archive as a TarInfo object, when
2269 TarFile is opened for reading. Return None if there is no more
2270 available.
2271 """
2272 self._check("ra")
2273 if self.firstmember is not None:
2274 m = self.firstmember
2275 self.firstmember = None
2276 return m
2277
Lars Gustäbel03572682015-07-06 09:27:24 +02002278 # Advance the file pointer.
2279 if self.offset != self.fileobj.tell():
2280 self.fileobj.seek(self.offset - 1)
2281 if not self.fileobj.read(1):
2282 raise ReadError("unexpected end of data")
2283
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002285 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002286 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002287 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002288 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002289 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002291 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002292 self.offset += BLOCKSIZE
2293 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002294 except InvalidHeaderError as e:
2295 if self.ignore_zeros:
2296 self._dbg(2, "0x%X: %s" % (self.offset, e))
2297 self.offset += BLOCKSIZE
2298 continue
2299 elif self.offset == 0:
2300 raise ReadError(str(e))
2301 except EmptyHeaderError:
2302 if self.offset == 0:
2303 raise ReadError("empty file")
2304 except TruncatedHeaderError as e:
2305 if self.offset == 0:
2306 raise ReadError(str(e))
2307 except SubsequentHeaderError as e:
2308 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002309 break
2310
Lars Gustäbel9520a432009-11-22 18:48:49 +00002311 if tarinfo is not None:
2312 self.members.append(tarinfo)
2313 else:
2314 self._loaded = True
2315
Thomas Wouters477c8d52006-05-27 19:21:47 +00002316 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317
2318 #--------------------------------------------------------------------------
2319 # Little helper methods:
2320
Lars Gustäbel1b512722010-06-03 12:45:16 +00002321 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002322 """Find an archive member by name from bottom to top.
2323 If tarinfo is given, it is used as the starting point.
2324 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002325 # Ensure that all members have been loaded.
2326 members = self.getmembers()
2327
Lars Gustäbel1b512722010-06-03 12:45:16 +00002328 # Limit the member search list up to tarinfo.
2329 if tarinfo is not None:
2330 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002331
Lars Gustäbel1b512722010-06-03 12:45:16 +00002332 if normalize:
2333 name = os.path.normpath(name)
2334
2335 for member in reversed(members):
2336 if normalize:
2337 member_name = os.path.normpath(member.name)
2338 else:
2339 member_name = member.name
2340
2341 if name == member_name:
2342 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002343
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344 def _load(self):
2345 """Read through the entire archive file and look for readable
2346 members.
2347 """
2348 while True:
2349 tarinfo = self.next()
2350 if tarinfo is None:
2351 break
2352 self._loaded = True
2353
2354 def _check(self, mode=None):
2355 """Check if TarFile is still open, and if the operation's mode
2356 corresponds to TarFile's mode.
2357 """
2358 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002359 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002360 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002361 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002362
Lars Gustäbel1b512722010-06-03 12:45:16 +00002363 def _find_link_target(self, tarinfo):
2364 """Find the target member of a symlink or hardlink member in the
2365 archive.
2366 """
2367 if tarinfo.issym():
2368 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002369 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002370 limit = None
2371 else:
2372 # Search the archive before the link, because a hard link is
2373 # just a reference to an already archived file.
2374 linkname = tarinfo.linkname
2375 limit = tarinfo
2376
2377 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2378 if member is None:
2379 raise KeyError("linkname %r not found" % linkname)
2380 return member
2381
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002382 def __iter__(self):
2383 """Provide an iterator object.
2384 """
2385 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002386 yield from self.members
2387 return
2388
2389 # Yield items using TarFile's next() method.
2390 # When all members have been read, set TarFile as _loaded.
2391 index = 0
2392 # Fix for SF #1100429: Under rare circumstances it can
2393 # happen that getmembers() is called during iteration,
2394 # which will have already exhausted the next() method.
2395 if self.firstmember is not None:
2396 tarinfo = self.next()
2397 index += 1
2398 yield tarinfo
2399
2400 while True:
2401 if index < len(self.members):
2402 tarinfo = self.members[index]
2403 elif not self._loaded:
2404 tarinfo = self.next()
2405 if not tarinfo:
2406 self._loaded = True
2407 return
2408 else:
2409 return
2410 index += 1
2411 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002413 def _dbg(self, level, msg):
2414 """Write debugging output to sys.stderr.
2415 """
2416 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002417 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002418
2419 def __enter__(self):
2420 self._check()
2421 return self
2422
2423 def __exit__(self, type, value, traceback):
2424 if type is None:
2425 self.close()
2426 else:
2427 # An exception occurred. We must not call close() because
2428 # it would try to write end-of-archive blocks and padding.
2429 if not self._extfileobj:
2430 self.fileobj.close()
2431 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002432
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002433#--------------------
2434# exported functions
2435#--------------------
2436def is_tarfile(name):
2437 """Return True if name points to a tar archive that we
2438 are able to handle, else return False.
2439 """
2440 try:
2441 t = open(name)
2442 t.close()
2443 return True
2444 except TarError:
2445 return False
2446
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002447open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002448
2449
2450def main():
2451 import argparse
2452
2453 description = 'A simple command line interface for tarfile module.'
2454 parser = argparse.ArgumentParser(description=description)
2455 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2456 help='Verbose output')
2457 group = parser.add_mutually_exclusive_group()
2458 group.add_argument('-l', '--list', metavar='<tarfile>',
2459 help='Show listing of a tarfile')
2460 group.add_argument('-e', '--extract', nargs='+',
2461 metavar=('<tarfile>', '<output_dir>'),
2462 help='Extract tarfile into target dir')
2463 group.add_argument('-c', '--create', nargs='+',
2464 metavar=('<name>', '<file>'),
2465 help='Create tarfile from sources')
2466 group.add_argument('-t', '--test', metavar='<tarfile>',
2467 help='Test if a tarfile is valid')
2468 args = parser.parse_args()
2469
2470 if args.test:
2471 src = args.test
2472 if is_tarfile(src):
2473 with open(src, 'r') as tar:
2474 tar.getmembers()
2475 print(tar.getmembers(), file=sys.stderr)
2476 if args.verbose:
2477 print('{!r} is a tar archive.'.format(src))
2478 else:
2479 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2480
2481 elif args.list:
2482 src = args.list
2483 if is_tarfile(src):
2484 with TarFile.open(src, 'r:*') as tf:
2485 tf.list(verbose=args.verbose)
2486 else:
2487 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2488
2489 elif args.extract:
2490 if len(args.extract) == 1:
2491 src = args.extract[0]
2492 curdir = os.curdir
2493 elif len(args.extract) == 2:
2494 src, curdir = args.extract
2495 else:
2496 parser.exit(1, parser.format_help())
2497
2498 if is_tarfile(src):
2499 with TarFile.open(src, 'r:*') as tf:
2500 tf.extractall(path=curdir)
2501 if args.verbose:
2502 if curdir == '.':
2503 msg = '{!r} file is extracted.'.format(src)
2504 else:
2505 msg = ('{!r} file is extracted '
2506 'into {!r} directory.').format(src, curdir)
2507 print(msg)
2508 else:
2509 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2510
2511 elif args.create:
2512 tar_name = args.create.pop(0)
2513 _, ext = os.path.splitext(tar_name)
2514 compressions = {
2515 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002516 '.gz': 'gz',
2517 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002518 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002519 '.xz': 'xz',
2520 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002521 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002522 '.bz2': 'bz2',
2523 '.tbz': 'bz2',
2524 '.tbz2': 'bz2',
2525 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002526 }
2527 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2528 tar_files = args.create
2529
2530 with TarFile.open(tar_name, tar_mode) as tf:
2531 for file_name in tar_files:
2532 tf.add(file_name)
2533
2534 if args.verbose:
2535 print('{!r} file created.'.format(tar_name))
2536
2537 else:
2538 parser.exit(1, parser.format_help())
2539
2540if __name__ == '__main__':
2541 main()