blob: 0b8d31f85cf3eccca35e733072d2c4931fc02bf5 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000203 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800204 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200205 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
206 if n >= 0:
207 s = bytearray([0o200])
208 else:
209 s = bytearray([0o377])
210 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000211
Guido van Rossum805365e2007-05-07 22:24:25 +0000212 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200213 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000214 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200215 else:
216 raise ValueError("overflow in number field")
217
Thomas Wouters477c8d52006-05-27 19:21:47 +0000218 return s
219
220def calc_chksums(buf):
221 """Calculate the checksum for a member's header by summing up all
222 characters except for the chksum field which is treated as if
223 it was filled with spaces. According to the GNU tar sources,
224 some tars (Sun and NeXT) calculate chksum with signed char,
225 which will be different if there are chars in the buffer with
226 the high bit set. So we calculate two checksums, unsigned and
227 signed.
228 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200229 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
230 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000231 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000232
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700233def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000234 """Copy length bytes from fileobj src to fileobj dst.
235 If length is None, copy the entire content.
236 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700237 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000238 if length == 0:
239 return
240 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700241 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000242 return
243
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700244 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000245 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700246 buf = src.read(bufsize)
247 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200248 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000249 dst.write(buf)
250
251 if remainder != 0:
252 buf = src.read(remainder)
253 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200254 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000255 dst.write(buf)
256 return
257
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000258def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200259 """Deprecated in this location; use stat.filemode."""
260 import warnings
261 warnings.warn("deprecated in favor of stat.filemode",
262 DeprecationWarning, 2)
263 return stat.filemode(mode)
264
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200265def _safe_print(s):
266 encoding = getattr(sys.stdout, 'encoding', None)
267 if encoding is not None:
268 s = s.encode(encoding, 'backslashreplace').decode(encoding)
269 print(s, end=' ')
270
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000271
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000272class TarError(Exception):
273 """Base exception."""
274 pass
275class ExtractError(TarError):
276 """General exception for extract errors."""
277 pass
278class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300279 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000280 pass
281class CompressionError(TarError):
282 """Exception for unavailable compression methods."""
283 pass
284class StreamError(TarError):
285 """Exception for unsupported operations on stream-like TarFiles."""
286 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000287class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000288 """Base exception for header errors."""
289 pass
290class EmptyHeaderError(HeaderError):
291 """Exception for empty headers."""
292 pass
293class TruncatedHeaderError(HeaderError):
294 """Exception for truncated headers."""
295 pass
296class EOFHeaderError(HeaderError):
297 """Exception for end of file headers."""
298 pass
299class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000300 """Exception for invalid headers."""
301 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000302class SubsequentHeaderError(HeaderError):
303 """Exception for missing and invalid extended headers."""
304 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000305
306#---------------------------
307# internal stream interface
308#---------------------------
309class _LowLevelFile:
310 """Low-level file object. Supports reading and writing.
311 It is used instead of a regular file object for streaming
312 access.
313 """
314
315 def __init__(self, name, mode):
316 mode = {
317 "r": os.O_RDONLY,
318 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
319 }[mode]
320 if hasattr(os, "O_BINARY"):
321 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000322 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000323
324 def close(self):
325 os.close(self.fd)
326
327 def read(self, size):
328 return os.read(self.fd, size)
329
330 def write(self, s):
331 os.write(self.fd, s)
332
333class _Stream:
334 """Class that serves as an adapter between TarFile and
335 a stream-like object. The stream-like object only
336 needs to have a read() or write() method and is accessed
337 blockwise. Use of gzip or bzip2 compression is possible.
338 A stream-like object could be for example: sys.stdin,
339 sys.stdout, a socket, a tape device etc.
340
341 _Stream is intended to be used only internally.
342 """
343
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000344 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000345 """Construct a _Stream object.
346 """
347 self._extfileobj = True
348 if fileobj is None:
349 fileobj = _LowLevelFile(name, mode)
350 self._extfileobj = False
351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 if comptype == '*':
353 # Enable transparent compression detection for the
354 # stream interface
355 fileobj = _StreamProxy(fileobj)
356 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000357
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000358 self.name = name or ""
359 self.mode = mode
360 self.comptype = comptype
361 self.fileobj = fileobj
362 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000363 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000364 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000365 self.closed = False
366
Antoine Pitrou605c2932010-09-23 20:15:14 +0000367 try:
368 if comptype == "gz":
369 try:
370 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400371 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 raise CompressionError("zlib module is not available")
373 self.zlib = zlib
374 self.crc = zlib.crc32(b"")
375 if mode == "r":
376 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100377 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000378 else:
379 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000380
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100381 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000382 try:
383 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400384 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000385 raise CompressionError("bz2 module is not available")
386 if mode == "r":
387 self.dbuf = b""
388 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200389 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000390 else:
391 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100392
393 elif comptype == "xz":
394 try:
395 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400396 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100397 raise CompressionError("lzma module is not available")
398 if mode == "r":
399 self.dbuf = b""
400 self.cmp = lzma.LZMADecompressor()
401 self.exception = lzma.LZMAError
402 else:
403 self.cmp = lzma.LZMACompressor()
404
405 elif comptype != "tar":
406 raise CompressionError("unknown compression type %r" % comptype)
407
Antoine Pitrou605c2932010-09-23 20:15:14 +0000408 except:
409 if not self._extfileobj:
410 self.fileobj.close()
411 self.closed = True
412 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000413
414 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000415 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 self.close()
417
418 def _init_write_gz(self):
419 """Initialize for writing with gzip compression.
420 """
421 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
422 -self.zlib.MAX_WBITS,
423 self.zlib.DEF_MEM_LEVEL,
424 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000425 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000426 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427 if self.name.endswith(".gz"):
428 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000429 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
430 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000431
432 def write(self, s):
433 """Write string s to the stream.
434 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000435 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000436 self.crc = self.zlib.crc32(s, self.crc)
437 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000438 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000439 s = self.cmp.compress(s)
440 self.__write(s)
441
442 def __write(self, s):
443 """Write string s to the stream if a whole new block
444 is ready to be written.
445 """
446 self.buf += s
447 while len(self.buf) > self.bufsize:
448 self.fileobj.write(self.buf[:self.bufsize])
449 self.buf = self.buf[self.bufsize:]
450
451 def close(self):
452 """Close the _Stream object. No operation should be
453 done on it afterwards.
454 """
455 if self.closed:
456 return
457
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000458 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300459 try:
460 if self.mode == "w" and self.comptype != "tar":
461 self.buf += self.cmp.flush()
462
463 if self.mode == "w" and self.buf:
464 self.fileobj.write(self.buf)
465 self.buf = b""
466 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000467 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300468 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
469 finally:
470 if not self._extfileobj:
471 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000472
473 def _init_read_gz(self):
474 """Initialize for reading a gzip compressed fileobj.
475 """
476 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000477 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000480 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000481 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000482 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000483 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000484
485 flag = ord(self.__read(1))
486 self.__read(6)
487
488 if flag & 4:
489 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
490 self.read(xlen)
491 if flag & 8:
492 while True:
493 s = self.__read(1)
494 if not s or s == NUL:
495 break
496 if flag & 16:
497 while True:
498 s = self.__read(1)
499 if not s or s == NUL:
500 break
501 if flag & 2:
502 self.__read(2)
503
504 def tell(self):
505 """Return the stream's file pointer position.
506 """
507 return self.pos
508
509 def seek(self, pos=0):
510 """Set the stream's file pointer to pos. Negative seeking
511 is forbidden.
512 """
513 if pos - self.pos >= 0:
514 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000515 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000516 self.read(self.bufsize)
517 self.read(remainder)
518 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000519 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 return self.pos
521
522 def read(self, size=None):
523 """Return the next size number of bytes from the stream.
524 If size is not defined, return all bytes of the stream
525 up to EOF.
526 """
527 if size is None:
528 t = []
529 while True:
530 buf = self._read(self.bufsize)
531 if not buf:
532 break
533 t.append(buf)
534 buf = "".join(t)
535 else:
536 buf = self._read(size)
537 self.pos += len(buf)
538 return buf
539
540 def _read(self, size):
541 """Return size bytes from the stream.
542 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000543 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000544 return self.__read(size)
545
546 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000547 while c < size:
548 buf = self.__read(self.bufsize)
549 if not buf:
550 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000551 try:
552 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100553 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000554 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000555 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000557 buf = self.dbuf[:size]
558 self.dbuf = self.dbuf[size:]
559 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000560
561 def __read(self, size):
562 """Return size bytes from stream. If internal buffer is empty,
563 read another block from the stream.
564 """
565 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000566 while c < size:
567 buf = self.fileobj.read(self.bufsize)
568 if not buf:
569 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000570 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000571 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000572 buf = self.buf[:size]
573 self.buf = self.buf[size:]
574 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000575# class _Stream
576
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000577class _StreamProxy(object):
578 """Small proxy class that enables transparent compression
579 detection for the Stream interface (mode 'r|*').
580 """
581
582 def __init__(self, fileobj):
583 self.fileobj = fileobj
584 self.buf = self.fileobj.read(BLOCKSIZE)
585
586 def read(self, size):
587 self.read = self.fileobj.read
588 return self.buf
589
590 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100591 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000592 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100593 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000594 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100595 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
596 return "xz"
597 else:
598 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000599
600 def close(self):
601 self.fileobj.close()
602# class StreamProxy
603
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000604#------------------------
605# Extraction file object
606#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000607class _FileInFile(object):
608 """A thin wrapper around an existing file object that
609 provides a part of its data as an individual file
610 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000611 """
612
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000613 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000614 self.fileobj = fileobj
615 self.offset = offset
616 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000617 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200618 self.name = getattr(fileobj, "name", None)
619 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000620
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000621 if blockinfo is None:
622 blockinfo = [(0, size)]
623
624 # Construct a map with data and zero blocks.
625 self.map_index = 0
626 self.map = []
627 lastpos = 0
628 realpos = self.offset
629 for offset, size in blockinfo:
630 if offset > lastpos:
631 self.map.append((False, lastpos, offset, None))
632 self.map.append((True, offset, offset + size, realpos))
633 realpos += size
634 lastpos = offset + size
635 if lastpos < self.size:
636 self.map.append((False, lastpos, self.size, None))
637
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200638 def flush(self):
639 pass
640
641 def readable(self):
642 return True
643
644 def writable(self):
645 return False
646
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000647 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000648 return self.fileobj.seekable()
649
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000650 def tell(self):
651 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000652 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000653 return self.position
654
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200655 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000656 """Seek to a position in the file.
657 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200658 if whence == io.SEEK_SET:
659 self.position = min(max(position, 0), self.size)
660 elif whence == io.SEEK_CUR:
661 if position < 0:
662 self.position = max(self.position + position, 0)
663 else:
664 self.position = min(self.position + position, self.size)
665 elif whence == io.SEEK_END:
666 self.position = max(min(self.size + position, self.size), 0)
667 else:
668 raise ValueError("Invalid argument")
669 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000670
671 def read(self, size=None):
672 """Read data from the file.
673 """
674 if size is None:
675 size = self.size - self.position
676 else:
677 size = min(size, self.size - self.position)
678
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000679 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000680 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000681 while True:
682 data, start, stop, offset = self.map[self.map_index]
683 if start <= self.position < stop:
684 break
685 else:
686 self.map_index += 1
687 if self.map_index == len(self.map):
688 self.map_index = 0
689 length = min(size, stop - self.position)
690 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000691 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200692 b = self.fileobj.read(length)
693 if len(b) != length:
694 raise ReadError("unexpected end of data")
695 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000696 else:
697 buf += NUL * length
698 size -= length
699 self.position += length
700 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000701
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200702 def readinto(self, b):
703 buf = self.read(len(b))
704 b[:len(buf)] = buf
705 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000706
707 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200709#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000710
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200711class ExFileObject(io.BufferedReader):
712
713 def __init__(self, tarfile, tarinfo):
714 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
715 tarinfo.size, tarinfo.sparse)
716 super().__init__(fileobj)
717#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000718
719#------------------
720# Exported Classes
721#------------------
722class TarInfo(object):
723 """Informational class which holds the details about an
724 archive member given by a tar header block.
725 TarInfo objects are returned by TarFile.getmember(),
726 TarFile.getmembers() and TarFile.gettarinfo() and are
727 usually created internally.
728 """
729
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000730 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
731 "chksum", "type", "linkname", "uname", "gname",
732 "devmajor", "devminor",
733 "offset", "offset_data", "pax_headers", "sparse",
734 "tarfile", "_sparse_structs", "_link_target")
735
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000736 def __init__(self, name=""):
737 """Construct a TarInfo object. name is the optional name
738 of the member.
739 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000740 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000741 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000742 self.uid = 0 # user id
743 self.gid = 0 # group id
744 self.size = 0 # file size
745 self.mtime = 0 # modification time
746 self.chksum = 0 # header checksum
747 self.type = REGTYPE # member type
748 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000749 self.uname = "" # user name
750 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000751 self.devmajor = 0 # device major number
752 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000753
Thomas Wouters477c8d52006-05-27 19:21:47 +0000754 self.offset = 0 # the tar header starts here
755 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000756
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000757 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000758 self.pax_headers = {} # pax header information
759
760 # In pax headers the "name" and "linkname" field are called
761 # "path" and "linkpath".
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200762 @property
763 def path(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000764 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000765
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200766 @path.setter
767 def path(self, name):
768 self.name = name
769
770 @property
771 def linkpath(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200773
774 @linkpath.setter
775 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000778 def __repr__(self):
779 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
780
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000781 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 """Return the TarInfo's attributes as a dictionary.
783 """
784 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000785 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000786 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787 "uid": self.uid,
788 "gid": self.gid,
789 "size": self.size,
790 "mtime": self.mtime,
791 "chksum": self.chksum,
792 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000793 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 "uname": self.uname,
795 "gname": self.gname,
796 "devmajor": self.devmajor,
797 "devminor": self.devminor
798 }
799
800 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
801 info["name"] += "/"
802
803 return info
804
Victor Stinnerde629d42010-05-05 21:43:57 +0000805 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 """Return a tar header as a string of 512 byte blocks.
807 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000808 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000809
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000811 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000815 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816 else:
817 raise ValueError("invalid format")
818
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000819 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000820 """Return the object as a ustar header block.
821 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000822 info["magic"] = POSIX_MAGIC
823
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200824 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 raise ValueError("linkname is too long")
826
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200827 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
828 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000832 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 """Return the object as a GNU header block sequence.
834 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835 info["magic"] = GNU_MAGIC
836
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000837 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200838 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000839 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200841 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000842 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000844 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000846 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000847 """Return the object as a ustar header block. If it cannot be
848 represented this way, prepend a pax extended header sequence
849 with supplement information.
850 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851 info["magic"] = POSIX_MAGIC
852 pax_headers = self.pax_headers.copy()
853
854 # Test string fields for values that exceed the field length or cannot
855 # be represented in ASCII encoding.
856 for name, hname, length in (
857 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
858 ("uname", "uname", 32), ("gname", "gname", 32)):
859
Guido van Rossume7ba4952007-06-06 23:52:48 +0000860 if hname in pax_headers:
861 # The pax header has priority.
862 continue
863
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864 # Try to encode the string as ASCII.
865 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000866 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000868 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000869 continue
870
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000872 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000873
874 # Test number fields for values that exceed the field limit or values
875 # that like to be stored as float.
876 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000877 if name in pax_headers:
878 # The pax header has priority. Avoid overflow.
879 info[name] = 0
880 continue
881
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 val = info[name]
883 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000884 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 info[name] = 0
886
Guido van Rossume7ba4952007-06-06 23:52:48 +0000887 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000889 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000891 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000893 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
895 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000896 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897 """Return the object as a pax global header block sequence.
898 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000899 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200901 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000902 """Split a name longer than 100 chars into a prefix
903 and a name part.
904 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200905 components = name.split("/")
906 for i in range(1, len(components)):
907 prefix = "/".join(components[:i])
908 name = "/".join(components[i:])
909 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
910 len(name.encode(encoding, errors)) <= LENGTH_NAME:
911 break
912 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200914
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 return prefix, name
916
917 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000918 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919 """Return a header block. info is a dictionary with file
920 information, format must be one of the *_FORMAT constants.
921 """
922 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000923 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000924 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925 itn(info.get("uid", 0), 8, format),
926 itn(info.get("gid", 0), 8, format),
927 itn(info.get("size", 0), 12, format),
928 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000931 stn(info.get("linkname", ""), 100, encoding, errors),
932 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000933 stn(info.get("uname", ""), 32, encoding, errors),
934 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000935 itn(info.get("devmajor", 0), 8, format),
936 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000937 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938 ]
939
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000940 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000942 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943 return buf
944
945 @staticmethod
946 def _create_payload(payload):
947 """Return the string payload filled with zero bytes
948 up to the next 512 byte border.
949 """
950 blocks, remainder = divmod(len(payload), BLOCKSIZE)
951 if remainder > 0:
952 payload += (BLOCKSIZE - remainder) * NUL
953 return payload
954
955 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000956 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
958 for name.
959 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000960 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961
962 info = {}
963 info["name"] = "././@LongLink"
964 info["type"] = type
965 info["size"] = len(name)
966 info["magic"] = GNU_MAGIC
967
968 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000969 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 cls._create_payload(name)
971
972 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000973 def _create_pax_generic_header(cls, pax_headers, type, encoding):
974 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000975 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000976 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000977 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000978 # Check if one of the fields contains surrogate characters and thereby
979 # forces hdrcharset=BINARY, see _proc_pax() for more information.
980 binary = False
981 for keyword, value in pax_headers.items():
982 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000983 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000984 except UnicodeEncodeError:
985 binary = True
986 break
987
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000988 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000989 if binary:
990 # Put the hdrcharset field at the beginning of the header.
991 records += b"21 hdrcharset=BINARY\n"
992
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000994 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995 if binary:
996 # Try to restore the original byte representation of `value'.
997 # Needless to say, that the encoding must match the string.
998 value = value.encode(encoding, "surrogateescape")
999 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001000 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001001
Guido van Rossumd8faa362007-04-27 19:54:29 +00001002 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1003 n = p = 0
1004 while True:
1005 n = l + len(str(p))
1006 if n == p:
1007 break
1008 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001009 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001010
1011 # We use a hardcoded "././@PaxHeader" name like star does
1012 # instead of the one that POSIX recommends.
1013 info = {}
1014 info["name"] = "././@PaxHeader"
1015 info["type"] = type
1016 info["size"] = len(records)
1017 info["magic"] = POSIX_MAGIC
1018
1019 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001020 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021 cls._create_payload(records)
1022
Guido van Rossum75b64e62005-01-16 00:16:11 +00001023 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001024 def frombuf(cls, buf, encoding, errors):
1025 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001026 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001027 if len(buf) == 0:
1028 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001029 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001030 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001031 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001032 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001033
1034 chksum = nti(buf[148:156])
1035 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001036 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001037
Guido van Rossumd8faa362007-04-27 19:54:29 +00001038 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001039 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040 obj.mode = nti(buf[100:108])
1041 obj.uid = nti(buf[108:116])
1042 obj.gid = nti(buf[116:124])
1043 obj.size = nti(buf[124:136])
1044 obj.mtime = nti(buf[136:148])
1045 obj.chksum = chksum
1046 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001047 obj.linkname = nts(buf[157:257], encoding, errors)
1048 obj.uname = nts(buf[265:297], encoding, errors)
1049 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001050 obj.devmajor = nti(buf[329:337])
1051 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001052 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001053
Guido van Rossumd8faa362007-04-27 19:54:29 +00001054 # Old V7 tar format represents a directory as a regular
1055 # file with a trailing slash.
1056 if obj.type == AREGTYPE and obj.name.endswith("/"):
1057 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001058
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001059 # The old GNU sparse format occupies some of the unused
1060 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001061 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001062 if obj.type == GNUTYPE_SPARSE:
1063 pos = 386
1064 structs = []
1065 for i in range(4):
1066 try:
1067 offset = nti(buf[pos:pos + 12])
1068 numbytes = nti(buf[pos + 12:pos + 24])
1069 except ValueError:
1070 break
1071 structs.append((offset, numbytes))
1072 pos += 24
1073 isextended = bool(buf[482])
1074 origsize = nti(buf[483:495])
1075 obj._sparse_structs = (structs, isextended, origsize)
1076
Guido van Rossumd8faa362007-04-27 19:54:29 +00001077 # Remove redundant slashes from directories.
1078 if obj.isdir():
1079 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080
Guido van Rossumd8faa362007-04-27 19:54:29 +00001081 # Reconstruct a ustar longname.
1082 if prefix and obj.type not in GNU_TYPES:
1083 obj.name = prefix + "/" + obj.name
1084 return obj
1085
1086 @classmethod
1087 def fromtarfile(cls, tarfile):
1088 """Return the next TarInfo object from TarFile object
1089 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001090 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001091 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001092 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001093 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1094 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001095
Guido van Rossumd8faa362007-04-27 19:54:29 +00001096 #--------------------------------------------------------------------------
1097 # The following are methods that are called depending on the type of a
1098 # member. The entry point is _proc_member() which can be overridden in a
1099 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1100 # implement the following
1101 # operations:
1102 # 1. Set self.offset_data to the position where the data blocks begin,
1103 # if there is data that follows.
1104 # 2. Set tarfile.offset to the position where the next member's header will
1105 # begin.
1106 # 3. Return self or another valid TarInfo object.
1107 def _proc_member(self, tarfile):
1108 """Choose the right processing method depending on
1109 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001110 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001111 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1112 return self._proc_gnulong(tarfile)
1113 elif self.type == GNUTYPE_SPARSE:
1114 return self._proc_sparse(tarfile)
1115 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1116 return self._proc_pax(tarfile)
1117 else:
1118 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001119
Guido van Rossumd8faa362007-04-27 19:54:29 +00001120 def _proc_builtin(self, tarfile):
1121 """Process a builtin type or an unknown type which
1122 will be treated as a regular file.
1123 """
1124 self.offset_data = tarfile.fileobj.tell()
1125 offset = self.offset_data
1126 if self.isreg() or self.type not in SUPPORTED_TYPES:
1127 # Skip the following data blocks.
1128 offset += self._block(self.size)
1129 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001130
Guido van Rossume7ba4952007-06-06 23:52:48 +00001131 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001132 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001133 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001134
1135 return self
1136
1137 def _proc_gnulong(self, tarfile):
1138 """Process the blocks that hold a GNU longname
1139 or longlink member.
1140 """
1141 buf = tarfile.fileobj.read(self._block(self.size))
1142
1143 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001144 try:
1145 next = self.fromtarfile(tarfile)
1146 except HeaderError:
1147 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001148
1149 # Patch the TarInfo object from the next header with
1150 # the longname information.
1151 next.offset = self.offset
1152 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001153 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001154 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001155 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156
1157 return next
1158
1159 def _proc_sparse(self, tarfile):
1160 """Process a GNU sparse header plus extra headers.
1161 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001162 # We already collected some sparse structures in frombuf().
1163 structs, isextended, origsize = self._sparse_structs
1164 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001166 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001167 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001168 buf = tarfile.fileobj.read(BLOCKSIZE)
1169 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001170 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001171 try:
1172 offset = nti(buf[pos:pos + 12])
1173 numbytes = nti(buf[pos + 12:pos + 24])
1174 except ValueError:
1175 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001176 if offset and numbytes:
1177 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001178 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001179 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001180 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181
1182 self.offset_data = tarfile.fileobj.tell()
1183 tarfile.offset = self.offset_data + self._block(self.size)
1184 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001185 return self
1186
1187 def _proc_pax(self, tarfile):
1188 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001189 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001190 """
1191 # Read the header information.
1192 buf = tarfile.fileobj.read(self._block(self.size))
1193
1194 # A pax header stores supplemental information for either
1195 # the following file (extended) or all following files
1196 # (global).
1197 if self.type == XGLTYPE:
1198 pax_headers = tarfile.pax_headers
1199 else:
1200 pax_headers = tarfile.pax_headers.copy()
1201
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001202 # Check if the pax header contains a hdrcharset field. This tells us
1203 # the encoding of the path, linkpath, uname and gname fields. Normally,
1204 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1205 # implementations are allowed to store them as raw binary strings if
1206 # the translation to UTF-8 fails.
1207 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1208 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001209 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001210
1211 # For the time being, we don't care about anything other than "BINARY".
1212 # The only other value that is currently allowed by the standard is
1213 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1214 hdrcharset = pax_headers.get("hdrcharset")
1215 if hdrcharset == "BINARY":
1216 encoding = tarfile.encoding
1217 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001218 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001219
Guido van Rossumd8faa362007-04-27 19:54:29 +00001220 # Parse pax header information. A record looks like that:
1221 # "%d %s=%s\n" % (length, keyword, value). length is the size
1222 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001223 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001224 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001225 pos = 0
1226 while True:
1227 match = regex.match(buf, pos)
1228 if not match:
1229 break
1230
1231 length, keyword = match.groups()
1232 length = int(length)
1233 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1234
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001235 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001236 # as the error handler, but we better not take the risk. For
1237 # example, GNU tar <= 1.23 is known to store filenames it cannot
1238 # translate to UTF-8 as raw strings (unfortunately without a
1239 # hdrcharset=BINARY header).
1240 # We first try the strict standard encoding, and if that fails we
1241 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001242 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001243 tarfile.errors)
1244 if keyword in PAX_NAME_FIELDS:
1245 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1246 tarfile.errors)
1247 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001248 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001249 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001250
1251 pax_headers[keyword] = value
1252 pos += length
1253
Guido van Rossume7ba4952007-06-06 23:52:48 +00001254 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001255 try:
1256 next = self.fromtarfile(tarfile)
1257 except HeaderError:
1258 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001259
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001260 # Process GNU sparse information.
1261 if "GNU.sparse.map" in pax_headers:
1262 # GNU extended sparse format version 0.1.
1263 self._proc_gnusparse_01(next, pax_headers)
1264
1265 elif "GNU.sparse.size" in pax_headers:
1266 # GNU extended sparse format version 0.0.
1267 self._proc_gnusparse_00(next, pax_headers, buf)
1268
1269 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1270 # GNU extended sparse format version 1.0.
1271 self._proc_gnusparse_10(next, pax_headers, tarfile)
1272
Guido van Rossume7ba4952007-06-06 23:52:48 +00001273 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001274 # Patch the TarInfo object with the extended header info.
1275 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1276 next.offset = self.offset
1277
1278 if "size" in pax_headers:
1279 # If the extended header replaces the size field,
1280 # we need to recalculate the offset where the next
1281 # header starts.
1282 offset = next.offset_data
1283 if next.isreg() or next.type not in SUPPORTED_TYPES:
1284 offset += next._block(next.size)
1285 tarfile.offset = offset
1286
1287 return next
1288
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001289 def _proc_gnusparse_00(self, next, pax_headers, buf):
1290 """Process a GNU tar extended sparse header, version 0.0.
1291 """
1292 offsets = []
1293 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1294 offsets.append(int(match.group(1)))
1295 numbytes = []
1296 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1297 numbytes.append(int(match.group(1)))
1298 next.sparse = list(zip(offsets, numbytes))
1299
1300 def _proc_gnusparse_01(self, next, pax_headers):
1301 """Process a GNU tar extended sparse header, version 0.1.
1302 """
1303 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1304 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1305
1306 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1307 """Process a GNU tar extended sparse header, version 1.0.
1308 """
1309 fields = None
1310 sparse = []
1311 buf = tarfile.fileobj.read(BLOCKSIZE)
1312 fields, buf = buf.split(b"\n", 1)
1313 fields = int(fields)
1314 while len(sparse) < fields * 2:
1315 if b"\n" not in buf:
1316 buf += tarfile.fileobj.read(BLOCKSIZE)
1317 number, buf = buf.split(b"\n", 1)
1318 sparse.append(int(number))
1319 next.offset_data = tarfile.fileobj.tell()
1320 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1321
Guido van Rossume7ba4952007-06-06 23:52:48 +00001322 def _apply_pax_info(self, pax_headers, encoding, errors):
1323 """Replace fields with supplemental information from a previous
1324 pax extended or global header.
1325 """
1326 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001327 if keyword == "GNU.sparse.name":
1328 setattr(self, "path", value)
1329 elif keyword == "GNU.sparse.size":
1330 setattr(self, "size", int(value))
1331 elif keyword == "GNU.sparse.realsize":
1332 setattr(self, "size", int(value))
1333 elif keyword in PAX_FIELDS:
1334 if keyword in PAX_NUMBER_FIELDS:
1335 try:
1336 value = PAX_NUMBER_FIELDS[keyword](value)
1337 except ValueError:
1338 value = 0
1339 if keyword == "path":
1340 value = value.rstrip("/")
1341 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001342
1343 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001344
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001345 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1346 """Decode a single field from a pax record.
1347 """
1348 try:
1349 return value.decode(encoding, "strict")
1350 except UnicodeDecodeError:
1351 return value.decode(fallback_encoding, fallback_errors)
1352
Guido van Rossumd8faa362007-04-27 19:54:29 +00001353 def _block(self, count):
1354 """Round up a byte count by BLOCKSIZE and return it,
1355 e.g. _block(834) => 1024.
1356 """
1357 blocks, remainder = divmod(count, BLOCKSIZE)
1358 if remainder:
1359 blocks += 1
1360 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001361
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001362 def isreg(self):
1363 return self.type in REGULAR_TYPES
1364 def isfile(self):
1365 return self.isreg()
1366 def isdir(self):
1367 return self.type == DIRTYPE
1368 def issym(self):
1369 return self.type == SYMTYPE
1370 def islnk(self):
1371 return self.type == LNKTYPE
1372 def ischr(self):
1373 return self.type == CHRTYPE
1374 def isblk(self):
1375 return self.type == BLKTYPE
1376 def isfifo(self):
1377 return self.type == FIFOTYPE
1378 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001379 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001380 def isdev(self):
1381 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1382# class TarInfo
1383
1384class TarFile(object):
1385 """The TarFile Class provides an interface to tar archives.
1386 """
1387
1388 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1389
1390 dereference = False # If true, add content of linked file to the
1391 # tar file, else the link.
1392
1393 ignore_zeros = False # If true, skips empty or invalid blocks and
1394 # continues processing.
1395
Lars Gustäbel365aff32009-12-13 11:42:29 +00001396 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397 # messages (if debug >= 0). If > 0, errors
1398 # are passed to the caller as exceptions.
1399
Guido van Rossumd8faa362007-04-27 19:54:29 +00001400 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401
Guido van Rossume7ba4952007-06-06 23:52:48 +00001402 encoding = ENCODING # Encoding for 8-bit character strings.
1403
1404 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405
Guido van Rossumd8faa362007-04-27 19:54:29 +00001406 tarinfo = TarInfo # The default TarInfo class to use.
1407
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001408 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001409
1410 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1411 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001412 errors="surrogateescape", pax_headers=None, debug=None,
1413 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001414 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1415 read from an existing archive, 'a' to append data to an existing
1416 file or 'w' to create a new file overwriting an existing one. `mode'
1417 defaults to 'r'.
1418 If `fileobj' is given, it is used for reading or writing data. If it
1419 can be determined, `mode' is overridden by `fileobj's mode.
1420 `fileobj' is not closed, when TarFile is closed.
1421 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001422 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001423 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001424 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001425 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001426 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001427
1428 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001429 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001430 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001431 self.mode = "w"
1432 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001433 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434 self._extfileobj = False
1435 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001436 if (name is None and hasattr(fileobj, "name") and
1437 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001439 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001441 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001442 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 self.fileobj = fileobj
1444
Guido van Rossumd8faa362007-04-27 19:54:29 +00001445 # Init attributes.
1446 if format is not None:
1447 self.format = format
1448 if tarinfo is not None:
1449 self.tarinfo = tarinfo
1450 if dereference is not None:
1451 self.dereference = dereference
1452 if ignore_zeros is not None:
1453 self.ignore_zeros = ignore_zeros
1454 if encoding is not None:
1455 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001456 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001457
1458 if pax_headers is not None and self.format == PAX_FORMAT:
1459 self.pax_headers = pax_headers
1460 else:
1461 self.pax_headers = {}
1462
Guido van Rossumd8faa362007-04-27 19:54:29 +00001463 if debug is not None:
1464 self.debug = debug
1465 if errorlevel is not None:
1466 self.errorlevel = errorlevel
1467
1468 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001469 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001470 self.closed = False
1471 self.members = [] # list of members as TarInfo objects
1472 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001473 self.offset = self.fileobj.tell()
1474 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001475 self.inodes = {} # dictionary caching the inodes of
1476 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001477
Lars Gustäbel7b465392009-11-18 20:29:25 +00001478 try:
1479 if self.mode == "r":
1480 self.firstmember = None
1481 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001482
Lars Gustäbel7b465392009-11-18 20:29:25 +00001483 if self.mode == "a":
1484 # Move to the end of the archive,
1485 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001486 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001487 self.fileobj.seek(self.offset)
1488 try:
1489 tarinfo = self.tarinfo.fromtarfile(self)
1490 self.members.append(tarinfo)
1491 except EOFHeaderError:
1492 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001493 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001494 except HeaderError as e:
1495 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001496
Lars Gustäbel20703c62015-05-27 12:53:44 +02001497 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001498 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001499
Lars Gustäbel7b465392009-11-18 20:29:25 +00001500 if self.pax_headers:
1501 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1502 self.fileobj.write(buf)
1503 self.offset += len(buf)
1504 except:
1505 if not self._extfileobj:
1506 self.fileobj.close()
1507 self.closed = True
1508 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001509
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001510 #--------------------------------------------------------------------------
1511 # Below are the classmethods which act as alternate constructors to the
1512 # TarFile class. The open() method is the only one that is needed for
1513 # public use; it is the "super"-constructor and is able to select an
1514 # adequate "sub"-constructor for a particular compression using the mapping
1515 # from OPEN_METH.
1516 #
1517 # This concept allows one to subclass TarFile without losing the comfort of
1518 # the super-constructor. A sub-constructor is registered and made available
1519 # by adding it to the mapping in OPEN_METH.
1520
Guido van Rossum75b64e62005-01-16 00:16:11 +00001521 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001522 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 """Open a tar archive for reading, writing or appending. Return
1524 an appropriate TarFile class.
1525
1526 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001527 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 'r:' open for reading exclusively uncompressed
1529 'r:gz' open for reading with gzip compression
1530 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001531 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001532 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001533 'w' or 'w:' open for writing without compression
1534 'w:gz' open for writing with gzip compression
1535 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001536 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001537
Berker Peksag0fe63252015-02-13 21:02:12 +02001538 'x' or 'x:' create a tarfile exclusively without compression, raise
1539 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001540 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001541 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001542 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001543 if the file is already created
1544 'x:xz' create an lzma compressed tarfile, raise an exception
1545 if the file is already created
1546
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001547 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001548 'r|' open an uncompressed stream of tar blocks for reading
1549 'r|gz' open a gzip compressed stream of tar blocks
1550 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001551 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 'w|' open an uncompressed stream for writing
1553 'w|gz' open a gzip compressed stream for writing
1554 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001555 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 """
1557
1558 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001559 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001560
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001561 if mode in ("r", "r:*"):
1562 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001563 def not_compressed(comptype):
1564 return cls.OPEN_METH[comptype] == 'taropen'
1565 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001567 if fileobj is not None:
1568 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001569 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001570 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001571 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001572 if fileobj is not None:
1573 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001574 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001575 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001576
1577 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001578 filemode, comptype = mode.split(":", 1)
1579 filemode = filemode or "r"
1580 comptype = comptype or "tar"
1581
1582 # Select the *open() function according to
1583 # given compression.
1584 if comptype in cls.OPEN_METH:
1585 func = getattr(cls, cls.OPEN_METH[comptype])
1586 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001587 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001588 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
1590 elif "|" in mode:
1591 filemode, comptype = mode.split("|", 1)
1592 filemode = filemode or "r"
1593 comptype = comptype or "tar"
1594
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001595 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001597
Antoine Pitrou605c2932010-09-23 20:15:14 +00001598 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1599 try:
1600 t = cls(name, filemode, stream, **kwargs)
1601 except:
1602 stream.close()
1603 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001604 t._extfileobj = False
1605 return t
1606
Berker Peksag0fe63252015-02-13 21:02:12 +02001607 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001608 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001609
Thomas Wouters477c8d52006-05-27 19:21:47 +00001610 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611
Guido van Rossum75b64e62005-01-16 00:16:11 +00001612 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001613 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001614 """Open uncompressed tar archive name for reading or writing.
1615 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001616 if mode not in ("r", "a", "w", "x"):
1617 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001618 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619
Guido van Rossum75b64e62005-01-16 00:16:11 +00001620 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001621 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622 """Open gzip compressed tar archive name for reading or writing.
1623 Appending is not allowed.
1624 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001625 if mode not in ("r", "w", "x"):
1626 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627
1628 try:
1629 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001630 gzip.GzipFile
1631 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001632 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001635 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001636 except OSError:
1637 if fileobj is not None and mode == 'r':
1638 raise ReadError("not a gzip file")
1639 raise
1640
1641 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001642 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001643 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001644 fileobj.close()
1645 if mode == 'r':
1646 raise ReadError("not a gzip file")
1647 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001648 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001649 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001650 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001651 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001652 return t
1653
Guido van Rossum75b64e62005-01-16 00:16:11 +00001654 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001655 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001656 """Open bzip2 compressed tar archive name for reading or writing.
1657 Appending is not allowed.
1658 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001659 if mode not in ("r", "w", "x"):
1660 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001661
1662 try:
1663 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001664 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001665 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001667 fileobj = bz2.BZ2File(fileobj or name, mode,
1668 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669
1670 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001671 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001672 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001673 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001674 if mode == 'r':
1675 raise ReadError("not a bzip2 file")
1676 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001677 except:
1678 fileobj.close()
1679 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680 t._extfileobj = False
1681 return t
1682
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001683 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001684 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001685 """Open lzma compressed tar archive name for reading or writing.
1686 Appending is not allowed.
1687 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001688 if mode not in ("r", "w", "x"):
1689 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001690
1691 try:
1692 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001693 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001694 raise CompressionError("lzma module is not available")
1695
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001696 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001697
1698 try:
1699 t = cls.taropen(name, mode, fileobj, **kwargs)
1700 except (lzma.LZMAError, EOFError):
1701 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001702 if mode == 'r':
1703 raise ReadError("not an lzma file")
1704 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001705 except:
1706 fileobj.close()
1707 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001708 t._extfileobj = False
1709 return t
1710
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711 # All *open() methods are registered here.
1712 OPEN_METH = {
1713 "tar": "taropen", # uncompressed tar
1714 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001715 "bz2": "bz2open", # bzip2 compressed tar
1716 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001717 }
1718
1719 #--------------------------------------------------------------------------
1720 # The public methods which TarFile provides:
1721
1722 def close(self):
1723 """Close the TarFile. In write-mode, two finishing zero blocks are
1724 appended to the archive.
1725 """
1726 if self.closed:
1727 return
1728
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001729 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001730 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001731 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001732 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1733 self.offset += (BLOCKSIZE * 2)
1734 # fill up the end with zero-blocks
1735 # (like option -b20 for tar does)
1736 blocks, remainder = divmod(self.offset, RECORDSIZE)
1737 if remainder > 0:
1738 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1739 finally:
1740 if not self._extfileobj:
1741 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001742
1743 def getmember(self, name):
1744 """Return a TarInfo object for member `name'. If `name' can not be
1745 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001746 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747 most up-to-date version.
1748 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001749 tarinfo = self._getmember(name)
1750 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001751 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001752 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001753
1754 def getmembers(self):
1755 """Return the members of the archive as a list of TarInfo objects. The
1756 list has the same order as the members in the archive.
1757 """
1758 self._check()
1759 if not self._loaded: # if we want to obtain a list of
1760 self._load() # all members, we first have to
1761 # scan the whole archive.
1762 return self.members
1763
1764 def getnames(self):
1765 """Return the members of the archive as a list of their names. It has
1766 the same order as the list returned by getmembers().
1767 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001768 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001771 """Create a TarInfo object from the result of os.stat or equivalent
1772 on an existing file. The file is either named by `name', or
1773 specified as a file object `fileobj' with a file descriptor. If
1774 given, `arcname' specifies an alternative name for the file in the
1775 archive, otherwise, the name is taken from the 'name' attribute of
1776 'fileobj', or the 'name' argument. The name should be a text
1777 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001778 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001779 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780
1781 # When fileobj is given, replace name by
1782 # fileobj's real name.
1783 if fileobj is not None:
1784 name = fileobj.name
1785
1786 # Building the name of the member in the archive.
1787 # Backward slashes are converted to forward slashes,
1788 # Absolute paths are turned to relative paths.
1789 if arcname is None:
1790 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001792 arcname = arcname.replace(os.sep, "/")
1793 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794
1795 # Now, fill the TarInfo object with
1796 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001797 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001798 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001799
1800 # Use os.stat or os.lstat, depending on platform
1801 # and if symlinks shall be resolved.
1802 if fileobj is None:
1803 if hasattr(os, "lstat") and not self.dereference:
1804 statres = os.lstat(name)
1805 else:
1806 statres = os.stat(name)
1807 else:
1808 statres = os.fstat(fileobj.fileno())
1809 linkname = ""
1810
1811 stmd = statres.st_mode
1812 if stat.S_ISREG(stmd):
1813 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001814 if not self.dereference and statres.st_nlink > 1 and \
1815 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001816 # Is it a hardlink to an already
1817 # archived file?
1818 type = LNKTYPE
1819 linkname = self.inodes[inode]
1820 else:
1821 # The inode is added only if its valid.
1822 # For win32 it is always 0.
1823 type = REGTYPE
1824 if inode[0]:
1825 self.inodes[inode] = arcname
1826 elif stat.S_ISDIR(stmd):
1827 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 elif stat.S_ISFIFO(stmd):
1829 type = FIFOTYPE
1830 elif stat.S_ISLNK(stmd):
1831 type = SYMTYPE
1832 linkname = os.readlink(name)
1833 elif stat.S_ISCHR(stmd):
1834 type = CHRTYPE
1835 elif stat.S_ISBLK(stmd):
1836 type = BLKTYPE
1837 else:
1838 return None
1839
1840 # Fill the TarInfo object with all
1841 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001842 tarinfo.name = arcname
1843 tarinfo.mode = stmd
1844 tarinfo.uid = statres.st_uid
1845 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001846 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001847 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001848 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001849 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001850 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001851 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 tarinfo.linkname = linkname
1853 if pwd:
1854 try:
1855 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1856 except KeyError:
1857 pass
1858 if grp:
1859 try:
1860 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1861 except KeyError:
1862 pass
1863
1864 if type in (CHRTYPE, BLKTYPE):
1865 if hasattr(os, "major") and hasattr(os, "minor"):
1866 tarinfo.devmajor = os.major(statres.st_rdev)
1867 tarinfo.devminor = os.minor(statres.st_rdev)
1868 return tarinfo
1869
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001870 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 """Print a table of contents to sys.stdout. If `verbose' is False, only
1872 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001873 output is produced. `members' is optional and must be a subset of the
1874 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 """
1876 self._check()
1877
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001878 if members is None:
1879 members = self
1880 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001881 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001882 _safe_print(stat.filemode(tarinfo.mode))
1883 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1884 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001886 _safe_print("%10s" %
1887 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001889 _safe_print("%10d" % tarinfo.size)
1890 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1891 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001892
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001893 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894
1895 if verbose:
1896 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001897 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001898 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001899 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001900 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001902 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 """Add the file `name' to the archive. `name' may be any type of file
1904 (directory, fifo, symbolic link, etc.). If given, `arcname'
1905 specifies an alternative name for the file in the archive.
1906 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001907 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001908 that expects a TarInfo object argument and returns the changed
1909 TarInfo object, if it returns None the TarInfo object will be
1910 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001912 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913
1914 if arcname is None:
1915 arcname = name
1916
1917 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001918 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 self._dbg(2, "tarfile: Skipped %r" % name)
1920 return
1921
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001922 self._dbg(1, name)
1923
1924 # Create a TarInfo object from the file.
1925 tarinfo = self.gettarinfo(name, arcname)
1926
1927 if tarinfo is None:
1928 self._dbg(1, "tarfile: Unsupported type %r" % name)
1929 return
1930
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001931 # Change or exclude the TarInfo object.
1932 if filter is not None:
1933 tarinfo = filter(tarinfo)
1934 if tarinfo is None:
1935 self._dbg(2, "tarfile: Excluded %r" % name)
1936 return
1937
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938 # Append the tar header and data to the archive.
1939 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001940 with bltn_open(name, "rb") as f:
1941 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001942
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001943 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 self.addfile(tarinfo)
1945 if recursive:
1946 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001947 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001948 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001949
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001950 else:
1951 self.addfile(tarinfo)
1952
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001953 def addfile(self, tarinfo, fileobj=None):
1954 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001955 given, it should be a binary file, and tarinfo.size bytes are read
1956 from it and added to the archive. You can create TarInfo objects
1957 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001958 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001959 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960
Thomas Wouters89f507f2006-12-13 04:49:30 +00001961 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001962
Guido van Rossume7ba4952007-06-06 23:52:48 +00001963 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001964 self.fileobj.write(buf)
1965 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001966 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967 # If there's data to follow, append it.
1968 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001969 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1971 if remainder > 0:
1972 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1973 blocks += 1
1974 self.offset += blocks * BLOCKSIZE
1975
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001976 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977
Eric V. Smith7a803892015-04-15 10:27:58 -04001978 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001979 """Extract all members from the archive to the current working
1980 directory and set owner, modification time and permissions on
1981 directories afterwards. `path' specifies a different directory
1982 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001983 list returned by getmembers(). If `numeric_owner` is True, only
1984 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001985 """
1986 directories = []
1987
1988 if members is None:
1989 members = self
1990
1991 for tarinfo in members:
1992 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001993 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001994 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001995 tarinfo = copy.copy(tarinfo)
1996 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001997 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04001998 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
1999 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002000
2001 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002002 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002003 directories.reverse()
2004
2005 # Set correct owner, mtime and filemode on directories.
2006 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002007 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002008 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002009 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002010 self.utime(tarinfo, dirpath)
2011 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002012 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002013 if self.errorlevel > 1:
2014 raise
2015 else:
2016 self._dbg(1, "tarfile: %s" % e)
2017
Eric V. Smith7a803892015-04-15 10:27:58 -04002018 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002019 """Extract a member from the archive to the current working directory,
2020 using its full name. Its file information is extracted as accurately
2021 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002022 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002023 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2024 is True, only the numbers for user/group names are used and not
2025 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002026 """
2027 self._check("r")
2028
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002029 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002030 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002031 else:
2032 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033
Neal Norwitza4f651a2004-07-20 22:07:44 +00002034 # Prepare the link target for makelink().
2035 if tarinfo.islnk():
2036 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2037
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002038 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002039 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002040 set_attrs=set_attrs,
2041 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002042 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002043 if self.errorlevel > 0:
2044 raise
2045 else:
2046 if e.filename is None:
2047 self._dbg(1, "tarfile: %s" % e.strerror)
2048 else:
2049 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002050 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051 if self.errorlevel > 1:
2052 raise
2053 else:
2054 self._dbg(1, "tarfile: %s" % e)
2055
2056 def extractfile(self, member):
2057 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002058 a filename or a TarInfo object. If `member' is a regular file or a
2059 link, an io.BufferedReader object is returned. Otherwise, None is
2060 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 """
2062 self._check("r")
2063
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002064 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002065 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002066 else:
2067 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002068
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002069 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2070 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002071 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002072
2073 elif tarinfo.islnk() or tarinfo.issym():
2074 if isinstance(self.fileobj, _Stream):
2075 # A small but ugly workaround for the case that someone tries
2076 # to extract a (sym)link as a file-object from a non-seekable
2077 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002078 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002080 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002081 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002082 else:
2083 # If there's no data associated with the member (directory, chrdev,
2084 # blkdev, etc.), return None instead of a file object.
2085 return None
2086
Eric V. Smith7a803892015-04-15 10:27:58 -04002087 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2088 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002089 """Extract the TarInfo object tarinfo to a physical
2090 file called targetpath.
2091 """
2092 # Fetch the TarInfo object for the given name
2093 # and build the destination pathname, replacing
2094 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002095 targetpath = targetpath.rstrip("/")
2096 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097
2098 # Create all upper directories.
2099 upperdirs = os.path.dirname(targetpath)
2100 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002101 # Create directories that are not part of the archive with
2102 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002103 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002104
2105 if tarinfo.islnk() or tarinfo.issym():
2106 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2107 else:
2108 self._dbg(1, tarinfo.name)
2109
2110 if tarinfo.isreg():
2111 self.makefile(tarinfo, targetpath)
2112 elif tarinfo.isdir():
2113 self.makedir(tarinfo, targetpath)
2114 elif tarinfo.isfifo():
2115 self.makefifo(tarinfo, targetpath)
2116 elif tarinfo.ischr() or tarinfo.isblk():
2117 self.makedev(tarinfo, targetpath)
2118 elif tarinfo.islnk() or tarinfo.issym():
2119 self.makelink(tarinfo, targetpath)
2120 elif tarinfo.type not in SUPPORTED_TYPES:
2121 self.makeunknown(tarinfo, targetpath)
2122 else:
2123 self.makefile(tarinfo, targetpath)
2124
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002125 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002126 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002127 if not tarinfo.issym():
2128 self.chmod(tarinfo, targetpath)
2129 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002130
2131 #--------------------------------------------------------------------------
2132 # Below are the different file methods. They are called via
2133 # _extract_member() when extract() is called. They can be replaced in a
2134 # subclass to implement other functionality.
2135
2136 def makedir(self, tarinfo, targetpath):
2137 """Make a directory called targetpath.
2138 """
2139 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002140 # Use a safe mode for the directory, the real mode is set
2141 # later in _extract_member().
2142 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002143 except FileExistsError:
2144 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002145
2146 def makefile(self, tarinfo, targetpath):
2147 """Make a file called targetpath.
2148 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002149 source = self.fileobj
2150 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002151 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002152 with bltn_open(targetpath, "wb") as target:
2153 if tarinfo.sparse is not None:
2154 for offset, size in tarinfo.sparse:
2155 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002156 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002157 target.seek(tarinfo.size)
2158 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002159 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002160 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002161
2162 def makeunknown(self, tarinfo, targetpath):
2163 """Make a file from a TarInfo object with an unknown type
2164 at targetpath.
2165 """
2166 self.makefile(tarinfo, targetpath)
2167 self._dbg(1, "tarfile: Unknown file type %r, " \
2168 "extracted as regular file." % tarinfo.type)
2169
2170 def makefifo(self, tarinfo, targetpath):
2171 """Make a fifo called targetpath.
2172 """
2173 if hasattr(os, "mkfifo"):
2174 os.mkfifo(targetpath)
2175 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002177
2178 def makedev(self, tarinfo, targetpath):
2179 """Make a character or block device called targetpath.
2180 """
2181 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002182 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002183
2184 mode = tarinfo.mode
2185 if tarinfo.isblk():
2186 mode |= stat.S_IFBLK
2187 else:
2188 mode |= stat.S_IFCHR
2189
2190 os.mknod(targetpath, mode,
2191 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2192
2193 def makelink(self, tarinfo, targetpath):
2194 """Make a (symbolic) link called targetpath. If it cannot be created
2195 (platform limitation), we try to make a copy of the referenced file
2196 instead of a link.
2197 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002198 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002199 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002200 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002201 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002202 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002203 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002204 if os.path.exists(tarinfo._link_target):
2205 os.link(tarinfo._link_target, targetpath)
2206 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002207 self._extract_member(self._find_link_target(tarinfo),
2208 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002209 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002210 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002211 self._extract_member(self._find_link_target(tarinfo),
2212 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002213 except KeyError:
2214 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002215
Eric V. Smith7a803892015-04-15 10:27:58 -04002216 def chown(self, tarinfo, targetpath, numeric_owner):
2217 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002218 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2219 is False, fall back to .gid/.uid when the search based on name
2220 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002221 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002222 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002223 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002224 g = tarinfo.gid
2225 u = tarinfo.uid
2226 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002227 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002228 if grp:
2229 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002230 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002231 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002232 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002233 if pwd:
2234 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002235 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002236 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002237 try:
2238 if tarinfo.issym() and hasattr(os, "lchown"):
2239 os.lchown(targetpath, u, g)
2240 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002241 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002242 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002243 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002244
2245 def chmod(self, tarinfo, targetpath):
2246 """Set file permissions of targetpath according to tarinfo.
2247 """
Jack Jansen834eff62003-03-07 12:47:06 +00002248 if hasattr(os, 'chmod'):
2249 try:
2250 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002251 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002252 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253
2254 def utime(self, tarinfo, targetpath):
2255 """Set modification time of targetpath according to tarinfo.
2256 """
Jack Jansen834eff62003-03-07 12:47:06 +00002257 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002258 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259 try:
2260 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002261 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002262 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002263
2264 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265 def next(self):
2266 """Return the next member of the archive as a TarInfo object, when
2267 TarFile is opened for reading. Return None if there is no more
2268 available.
2269 """
2270 self._check("ra")
2271 if self.firstmember is not None:
2272 m = self.firstmember
2273 self.firstmember = None
2274 return m
2275
Lars Gustäbel03572682015-07-06 09:27:24 +02002276 # Advance the file pointer.
2277 if self.offset != self.fileobj.tell():
2278 self.fileobj.seek(self.offset - 1)
2279 if not self.fileobj.read(1):
2280 raise ReadError("unexpected end of data")
2281
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002283 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002286 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002287 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002289 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 self.offset += BLOCKSIZE
2291 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002292 except InvalidHeaderError as e:
2293 if self.ignore_zeros:
2294 self._dbg(2, "0x%X: %s" % (self.offset, e))
2295 self.offset += BLOCKSIZE
2296 continue
2297 elif self.offset == 0:
2298 raise ReadError(str(e))
2299 except EmptyHeaderError:
2300 if self.offset == 0:
2301 raise ReadError("empty file")
2302 except TruncatedHeaderError as e:
2303 if self.offset == 0:
2304 raise ReadError(str(e))
2305 except SubsequentHeaderError as e:
2306 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002307 break
2308
Lars Gustäbel9520a432009-11-22 18:48:49 +00002309 if tarinfo is not None:
2310 self.members.append(tarinfo)
2311 else:
2312 self._loaded = True
2313
Thomas Wouters477c8d52006-05-27 19:21:47 +00002314 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315
2316 #--------------------------------------------------------------------------
2317 # Little helper methods:
2318
Lars Gustäbel1b512722010-06-03 12:45:16 +00002319 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002320 """Find an archive member by name from bottom to top.
2321 If tarinfo is given, it is used as the starting point.
2322 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002323 # Ensure that all members have been loaded.
2324 members = self.getmembers()
2325
Lars Gustäbel1b512722010-06-03 12:45:16 +00002326 # Limit the member search list up to tarinfo.
2327 if tarinfo is not None:
2328 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329
Lars Gustäbel1b512722010-06-03 12:45:16 +00002330 if normalize:
2331 name = os.path.normpath(name)
2332
2333 for member in reversed(members):
2334 if normalize:
2335 member_name = os.path.normpath(member.name)
2336 else:
2337 member_name = member.name
2338
2339 if name == member_name:
2340 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002341
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342 def _load(self):
2343 """Read through the entire archive file and look for readable
2344 members.
2345 """
2346 while True:
2347 tarinfo = self.next()
2348 if tarinfo is None:
2349 break
2350 self._loaded = True
2351
2352 def _check(self, mode=None):
2353 """Check if TarFile is still open, and if the operation's mode
2354 corresponds to TarFile's mode.
2355 """
2356 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002357 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002358 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002359 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002360
Lars Gustäbel1b512722010-06-03 12:45:16 +00002361 def _find_link_target(self, tarinfo):
2362 """Find the target member of a symlink or hardlink member in the
2363 archive.
2364 """
2365 if tarinfo.issym():
2366 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002367 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002368 limit = None
2369 else:
2370 # Search the archive before the link, because a hard link is
2371 # just a reference to an already archived file.
2372 linkname = tarinfo.linkname
2373 limit = tarinfo
2374
2375 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2376 if member is None:
2377 raise KeyError("linkname %r not found" % linkname)
2378 return member
2379
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002380 def __iter__(self):
2381 """Provide an iterator object.
2382 """
2383 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002384 yield from self.members
2385 return
2386
2387 # Yield items using TarFile's next() method.
2388 # When all members have been read, set TarFile as _loaded.
2389 index = 0
2390 # Fix for SF #1100429: Under rare circumstances it can
2391 # happen that getmembers() is called during iteration,
2392 # which will have already exhausted the next() method.
2393 if self.firstmember is not None:
2394 tarinfo = self.next()
2395 index += 1
2396 yield tarinfo
2397
2398 while True:
2399 if index < len(self.members):
2400 tarinfo = self.members[index]
2401 elif not self._loaded:
2402 tarinfo = self.next()
2403 if not tarinfo:
2404 self._loaded = True
2405 return
2406 else:
2407 return
2408 index += 1
2409 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002410
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002411 def _dbg(self, level, msg):
2412 """Write debugging output to sys.stderr.
2413 """
2414 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002415 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002416
2417 def __enter__(self):
2418 self._check()
2419 return self
2420
2421 def __exit__(self, type, value, traceback):
2422 if type is None:
2423 self.close()
2424 else:
2425 # An exception occurred. We must not call close() because
2426 # it would try to write end-of-archive blocks and padding.
2427 if not self._extfileobj:
2428 self.fileobj.close()
2429 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002430
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002431#--------------------
2432# exported functions
2433#--------------------
2434def is_tarfile(name):
2435 """Return True if name points to a tar archive that we
2436 are able to handle, else return False.
2437 """
2438 try:
2439 t = open(name)
2440 t.close()
2441 return True
2442 except TarError:
2443 return False
2444
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002445open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002446
2447
2448def main():
2449 import argparse
2450
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002451 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002452 parser = argparse.ArgumentParser(description=description)
2453 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2454 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002455 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002456 group.add_argument('-l', '--list', metavar='<tarfile>',
2457 help='Show listing of a tarfile')
2458 group.add_argument('-e', '--extract', nargs='+',
2459 metavar=('<tarfile>', '<output_dir>'),
2460 help='Extract tarfile into target dir')
2461 group.add_argument('-c', '--create', nargs='+',
2462 metavar=('<name>', '<file>'),
2463 help='Create tarfile from sources')
2464 group.add_argument('-t', '--test', metavar='<tarfile>',
2465 help='Test if a tarfile is valid')
2466 args = parser.parse_args()
2467
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002468 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002469 src = args.test
2470 if is_tarfile(src):
2471 with open(src, 'r') as tar:
2472 tar.getmembers()
2473 print(tar.getmembers(), file=sys.stderr)
2474 if args.verbose:
2475 print('{!r} is a tar archive.'.format(src))
2476 else:
2477 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2478
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002479 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002480 src = args.list
2481 if is_tarfile(src):
2482 with TarFile.open(src, 'r:*') as tf:
2483 tf.list(verbose=args.verbose)
2484 else:
2485 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2486
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002487 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002488 if len(args.extract) == 1:
2489 src = args.extract[0]
2490 curdir = os.curdir
2491 elif len(args.extract) == 2:
2492 src, curdir = args.extract
2493 else:
2494 parser.exit(1, parser.format_help())
2495
2496 if is_tarfile(src):
2497 with TarFile.open(src, 'r:*') as tf:
2498 tf.extractall(path=curdir)
2499 if args.verbose:
2500 if curdir == '.':
2501 msg = '{!r} file is extracted.'.format(src)
2502 else:
2503 msg = ('{!r} file is extracted '
2504 'into {!r} directory.').format(src, curdir)
2505 print(msg)
2506 else:
2507 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2508
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002509 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002510 tar_name = args.create.pop(0)
2511 _, ext = os.path.splitext(tar_name)
2512 compressions = {
2513 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002514 '.gz': 'gz',
2515 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002516 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002517 '.xz': 'xz',
2518 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002519 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002520 '.bz2': 'bz2',
2521 '.tbz': 'bz2',
2522 '.tbz2': 'bz2',
2523 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002524 }
2525 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2526 tar_files = args.create
2527
2528 with TarFile.open(tar_name, tar_mode) as tf:
2529 for file_name in tar_files:
2530 tf.add(file_name)
2531
2532 if args.verbose:
2533 print('{!r} file created.'.format(tar_name))
2534
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002535if __name__ == '__main__':
2536 main()