blob: 7b4732d47197a8667a562e11399af31265529f2f [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200259def _safe_print(s):
260 encoding = getattr(sys.stdout, 'encoding', None)
261 if encoding is not None:
262 s = s.encode(encoding, 'backslashreplace').decode(encoding)
263 print(s, end=' ')
264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266class TarError(Exception):
267 """Base exception."""
268 pass
269class ExtractError(TarError):
270 """General exception for extract errors."""
271 pass
272class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300273 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 pass
275class CompressionError(TarError):
276 """Exception for unavailable compression methods."""
277 pass
278class StreamError(TarError):
279 """Exception for unsupported operations on stream-like TarFiles."""
280 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000281class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000282 """Base exception for header errors."""
283 pass
284class EmptyHeaderError(HeaderError):
285 """Exception for empty headers."""
286 pass
287class TruncatedHeaderError(HeaderError):
288 """Exception for truncated headers."""
289 pass
290class EOFHeaderError(HeaderError):
291 """Exception for end of file headers."""
292 pass
293class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000294 """Exception for invalid headers."""
295 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000296class SubsequentHeaderError(HeaderError):
297 """Exception for missing and invalid extended headers."""
298 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304 """Low-level file object. Supports reading and writing.
305 It is used instead of a regular file object for streaming
306 access.
307 """
308
309 def __init__(self, name, mode):
310 mode = {
311 "r": os.O_RDONLY,
312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313 }[mode]
314 if hasattr(os, "O_BINARY"):
315 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000316 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318 def close(self):
319 os.close(self.fd)
320
321 def read(self, size):
322 return os.read(self.fd, size)
323
324 def write(self, s):
325 os.write(self.fd, s)
326
327class _Stream:
328 """Class that serves as an adapter between TarFile and
329 a stream-like object. The stream-like object only
330 needs to have a read() or write() method and is accessed
331 blockwise. Use of gzip or bzip2 compression is possible.
332 A stream-like object could be for example: sys.stdin,
333 sys.stdout, a socket, a tape device etc.
334
335 _Stream is intended to be used only internally.
336 """
337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000339 """Construct a _Stream object.
340 """
341 self._extfileobj = True
342 if fileobj is None:
343 fileobj = _LowLevelFile(name, mode)
344 self._extfileobj = False
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if comptype == '*':
347 # Enable transparent compression detection for the
348 # stream interface
349 fileobj = _StreamProxy(fileobj)
350 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 self.name = name or ""
353 self.mode = mode
354 self.comptype = comptype
355 self.fileobj = fileobj
356 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000357 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000358 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.closed = False
360
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 try:
362 if comptype == "gz":
363 try:
364 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400365 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000366 raise CompressionError("zlib module is not available")
367 self.zlib = zlib
368 self.crc = zlib.crc32(b"")
369 if mode == "r":
370 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100371 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 else:
373 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000374
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100375 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000376 try:
377 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400378 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 raise CompressionError("bz2 module is not available")
380 if mode == "r":
381 self.dbuf = b""
382 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 else:
385 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386
387 elif comptype == "xz":
388 try:
389 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400390 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100391 raise CompressionError("lzma module is not available")
392 if mode == "r":
393 self.dbuf = b""
394 self.cmp = lzma.LZMADecompressor()
395 self.exception = lzma.LZMAError
396 else:
397 self.cmp = lzma.LZMACompressor()
398
399 elif comptype != "tar":
400 raise CompressionError("unknown compression type %r" % comptype)
401
Antoine Pitrou605c2932010-09-23 20:15:14 +0000402 except:
403 if not self._extfileobj:
404 self.fileobj.close()
405 self.closed = True
406 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407
408 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000409 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 self.close()
411
412 def _init_write_gz(self):
413 """Initialize for writing with gzip compression.
414 """
415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416 -self.zlib.MAX_WBITS,
417 self.zlib.DEF_MEM_LEVEL,
418 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000419 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if self.name.endswith(".gz"):
422 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000423 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
424 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425
426 def write(self, s):
427 """Write string s to the stream.
428 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000429 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 self.crc = self.zlib.crc32(s, self.crc)
431 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 s = self.cmp.compress(s)
434 self.__write(s)
435
436 def __write(self, s):
437 """Write string s to the stream if a whole new block
438 is ready to be written.
439 """
440 self.buf += s
441 while len(self.buf) > self.bufsize:
442 self.fileobj.write(self.buf[:self.bufsize])
443 self.buf = self.buf[self.bufsize:]
444
445 def close(self):
446 """Close the _Stream object. No operation should be
447 done on it afterwards.
448 """
449 if self.closed:
450 return
451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300453 try:
454 if self.mode == "w" and self.comptype != "tar":
455 self.buf += self.cmp.flush()
456
457 if self.mode == "w" and self.buf:
458 self.fileobj.write(self.buf)
459 self.buf = b""
460 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000461 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300462 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
463 finally:
464 if not self._extfileobj:
465 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466
467 def _init_read_gz(self):
468 """Initialize for reading a gzip compressed fileobj.
469 """
470 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000472
473 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000474 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000475 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000477 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 flag = ord(self.__read(1))
480 self.__read(6)
481
482 if flag & 4:
483 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
484 self.read(xlen)
485 if flag & 8:
486 while True:
487 s = self.__read(1)
488 if not s or s == NUL:
489 break
490 if flag & 16:
491 while True:
492 s = self.__read(1)
493 if not s or s == NUL:
494 break
495 if flag & 2:
496 self.__read(2)
497
498 def tell(self):
499 """Return the stream's file pointer position.
500 """
501 return self.pos
502
503 def seek(self, pos=0):
504 """Set the stream's file pointer to pos. Negative seeking
505 is forbidden.
506 """
507 if pos - self.pos >= 0:
508 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000509 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000510 self.read(self.bufsize)
511 self.read(remainder)
512 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000513 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000514 return self.pos
515
516 def read(self, size=None):
517 """Return the next size number of bytes from the stream.
518 If size is not defined, return all bytes of the stream
519 up to EOF.
520 """
521 if size is None:
522 t = []
523 while True:
524 buf = self._read(self.bufsize)
525 if not buf:
526 break
527 t.append(buf)
528 buf = "".join(t)
529 else:
530 buf = self._read(size)
531 self.pos += len(buf)
532 return buf
533
534 def _read(self, size):
535 """Return size bytes from the stream.
536 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000537 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000538 return self.__read(size)
539
540 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000541 while c < size:
542 buf = self.__read(self.bufsize)
543 if not buf:
544 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000545 try:
546 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100547 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000548 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000549 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000550 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000551 buf = self.dbuf[:size]
552 self.dbuf = self.dbuf[size:]
553 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000554
555 def __read(self, size):
556 """Return size bytes from stream. If internal buffer is empty,
557 read another block from the stream.
558 """
559 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000560 while c < size:
561 buf = self.fileobj.read(self.bufsize)
562 if not buf:
563 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000564 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000566 buf = self.buf[:size]
567 self.buf = self.buf[size:]
568 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000569# class _Stream
570
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000571class _StreamProxy(object):
572 """Small proxy class that enables transparent compression
573 detection for the Stream interface (mode 'r|*').
574 """
575
576 def __init__(self, fileobj):
577 self.fileobj = fileobj
578 self.buf = self.fileobj.read(BLOCKSIZE)
579
580 def read(self, size):
581 self.read = self.fileobj.read
582 return self.buf
583
584 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100585 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000586 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100587 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000588 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100589 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
590 return "xz"
591 else:
592 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000593
594 def close(self):
595 self.fileobj.close()
596# class StreamProxy
597
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000598#------------------------
599# Extraction file object
600#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000601class _FileInFile(object):
602 """A thin wrapper around an existing file object that
603 provides a part of its data as an individual file
604 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000605 """
606
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000607 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000608 self.fileobj = fileobj
609 self.offset = offset
610 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000611 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200612 self.name = getattr(fileobj, "name", None)
613 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000614
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000615 if blockinfo is None:
616 blockinfo = [(0, size)]
617
618 # Construct a map with data and zero blocks.
619 self.map_index = 0
620 self.map = []
621 lastpos = 0
622 realpos = self.offset
623 for offset, size in blockinfo:
624 if offset > lastpos:
625 self.map.append((False, lastpos, offset, None))
626 self.map.append((True, offset, offset + size, realpos))
627 realpos += size
628 lastpos = offset + size
629 if lastpos < self.size:
630 self.map.append((False, lastpos, self.size, None))
631
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200632 def flush(self):
633 pass
634
635 def readable(self):
636 return True
637
638 def writable(self):
639 return False
640
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000641 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000642 return self.fileobj.seekable()
643
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000644 def tell(self):
645 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000646 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000647 return self.position
648
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200649 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000650 """Seek to a position in the file.
651 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200652 if whence == io.SEEK_SET:
653 self.position = min(max(position, 0), self.size)
654 elif whence == io.SEEK_CUR:
655 if position < 0:
656 self.position = max(self.position + position, 0)
657 else:
658 self.position = min(self.position + position, self.size)
659 elif whence == io.SEEK_END:
660 self.position = max(min(self.size + position, self.size), 0)
661 else:
662 raise ValueError("Invalid argument")
663 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000664
665 def read(self, size=None):
666 """Read data from the file.
667 """
668 if size is None:
669 size = self.size - self.position
670 else:
671 size = min(size, self.size - self.position)
672
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000673 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000674 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000675 while True:
676 data, start, stop, offset = self.map[self.map_index]
677 if start <= self.position < stop:
678 break
679 else:
680 self.map_index += 1
681 if self.map_index == len(self.map):
682 self.map_index = 0
683 length = min(size, stop - self.position)
684 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000685 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200686 b = self.fileobj.read(length)
687 if len(b) != length:
688 raise ReadError("unexpected end of data")
689 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000690 else:
691 buf += NUL * length
692 size -= length
693 self.position += length
694 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000695
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200696 def readinto(self, b):
697 buf = self.read(len(b))
698 b[:len(buf)] = buf
699 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700
701 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000702 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200703#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000704
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200705class ExFileObject(io.BufferedReader):
706
707 def __init__(self, tarfile, tarinfo):
708 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
709 tarinfo.size, tarinfo.sparse)
710 super().__init__(fileobj)
711#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000712
713#------------------
714# Exported Classes
715#------------------
716class TarInfo(object):
717 """Informational class which holds the details about an
718 archive member given by a tar header block.
719 TarInfo objects are returned by TarFile.getmember(),
720 TarFile.getmembers() and TarFile.gettarinfo() and are
721 usually created internally.
722 """
723
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000724 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
725 "chksum", "type", "linkname", "uname", "gname",
726 "devmajor", "devminor",
727 "offset", "offset_data", "pax_headers", "sparse",
728 "tarfile", "_sparse_structs", "_link_target")
729
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000730 def __init__(self, name=""):
731 """Construct a TarInfo object. name is the optional name
732 of the member.
733 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000734 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000735 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000736 self.uid = 0 # user id
737 self.gid = 0 # group id
738 self.size = 0 # file size
739 self.mtime = 0 # modification time
740 self.chksum = 0 # header checksum
741 self.type = REGTYPE # member type
742 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000743 self.uname = "" # user name
744 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000745 self.devmajor = 0 # device major number
746 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747
Thomas Wouters477c8d52006-05-27 19:21:47 +0000748 self.offset = 0 # the tar header starts here
749 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000750
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000751 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000752 self.pax_headers = {} # pax header information
753
754 # In pax headers the "name" and "linkname" field are called
755 # "path" and "linkpath".
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200756 @property
757 def path(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000758 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000759
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200760 @path.setter
761 def path(self, name):
762 self.name = name
763
764 @property
765 def linkpath(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200767
768 @linkpath.setter
769 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000770 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000771
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000772 def __repr__(self):
773 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
774
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000775 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 """Return the TarInfo's attributes as a dictionary.
777 """
778 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000779 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000780 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781 "uid": self.uid,
782 "gid": self.gid,
783 "size": self.size,
784 "mtime": self.mtime,
785 "chksum": self.chksum,
786 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000787 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788 "uname": self.uname,
789 "gname": self.gname,
790 "devmajor": self.devmajor,
791 "devminor": self.devminor
792 }
793
794 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
795 info["name"] += "/"
796
797 return info
798
Victor Stinnerde629d42010-05-05 21:43:57 +0000799 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 """Return a tar header as a string of 512 byte blocks.
801 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000802 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000803
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000805 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000807 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000809 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 else:
811 raise ValueError("invalid format")
812
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 """Return the object as a ustar header block.
815 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816 info["magic"] = POSIX_MAGIC
817
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200818 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819 raise ValueError("linkname is too long")
820
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200821 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
822 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000826 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 """Return the object as a GNU header block sequence.
828 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 info["magic"] = GNU_MAGIC
830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200832 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000833 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200835 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000836 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000838 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000840 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841 """Return the object as a ustar header block. If it cannot be
842 represented this way, prepend a pax extended header sequence
843 with supplement information.
844 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 info["magic"] = POSIX_MAGIC
846 pax_headers = self.pax_headers.copy()
847
848 # Test string fields for values that exceed the field length or cannot
849 # be represented in ASCII encoding.
850 for name, hname, length in (
851 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
852 ("uname", "uname", 32), ("gname", "gname", 32)):
853
Guido van Rossume7ba4952007-06-06 23:52:48 +0000854 if hname in pax_headers:
855 # The pax header has priority.
856 continue
857
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 # Try to encode the string as ASCII.
859 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000860 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000862 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 continue
864
Guido van Rossume7ba4952007-06-06 23:52:48 +0000865 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000866 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
868 # Test number fields for values that exceed the field limit or values
869 # that like to be stored as float.
870 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if name in pax_headers:
872 # The pax header has priority. Avoid overflow.
873 info[name] = 0
874 continue
875
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 val = info[name]
877 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000878 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 info[name] = 0
880
Guido van Rossume7ba4952007-06-06 23:52:48 +0000881 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000883 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000885 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000887 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
889 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000890 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 """Return the object as a pax global header block sequence.
892 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000893 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200895 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 """Split a name longer than 100 chars into a prefix
897 and a name part.
898 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200899 components = name.split("/")
900 for i in range(1, len(components)):
901 prefix = "/".join(components[:i])
902 name = "/".join(components[i:])
903 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
904 len(name.encode(encoding, errors)) <= LENGTH_NAME:
905 break
906 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000907 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200908
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909 return prefix, name
910
911 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000912 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 """Return a header block. info is a dictionary with file
914 information, format must be one of the *_FORMAT constants.
915 """
916 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000917 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000918 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000919 itn(info.get("uid", 0), 8, format),
920 itn(info.get("gid", 0), 8, format),
921 itn(info.get("size", 0), 12, format),
922 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000923 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000925 stn(info.get("linkname", ""), 100, encoding, errors),
926 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000927 stn(info.get("uname", ""), 32, encoding, errors),
928 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 itn(info.get("devmajor", 0), 8, format),
930 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000931 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 ]
933
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000934 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000935 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000936 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000937 return buf
938
939 @staticmethod
940 def _create_payload(payload):
941 """Return the string payload filled with zero bytes
942 up to the next 512 byte border.
943 """
944 blocks, remainder = divmod(len(payload), BLOCKSIZE)
945 if remainder > 0:
946 payload += (BLOCKSIZE - remainder) * NUL
947 return payload
948
949 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000950 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
952 for name.
953 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000954 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000955
956 info = {}
957 info["name"] = "././@LongLink"
958 info["type"] = type
959 info["size"] = len(name)
960 info["magic"] = GNU_MAGIC
961
962 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000963 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964 cls._create_payload(name)
965
966 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000967 def _create_pax_generic_header(cls, pax_headers, type, encoding):
968 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000969 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000970 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000971 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000972 # Check if one of the fields contains surrogate characters and thereby
973 # forces hdrcharset=BINARY, see _proc_pax() for more information.
974 binary = False
975 for keyword, value in pax_headers.items():
976 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000977 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000978 except UnicodeEncodeError:
979 binary = True
980 break
981
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000982 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000983 if binary:
984 # Put the hdrcharset field at the beginning of the header.
985 records += b"21 hdrcharset=BINARY\n"
986
Guido van Rossumd8faa362007-04-27 19:54:29 +0000987 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000988 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000989 if binary:
990 # Try to restore the original byte representation of `value'.
991 # Needless to say, that the encoding must match the string.
992 value = value.encode(encoding, "surrogateescape")
993 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000994 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
997 n = p = 0
998 while True:
999 n = l + len(str(p))
1000 if n == p:
1001 break
1002 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001003 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004
1005 # We use a hardcoded "././@PaxHeader" name like star does
1006 # instead of the one that POSIX recommends.
1007 info = {}
1008 info["name"] = "././@PaxHeader"
1009 info["type"] = type
1010 info["size"] = len(records)
1011 info["magic"] = POSIX_MAGIC
1012
1013 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001014 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001015 cls._create_payload(records)
1016
Guido van Rossum75b64e62005-01-16 00:16:11 +00001017 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001018 def frombuf(cls, buf, encoding, errors):
1019 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001020 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001021 if len(buf) == 0:
1022 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001023 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001024 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001025 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001026 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001027
1028 chksum = nti(buf[148:156])
1029 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001030 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001031
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001033 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034 obj.mode = nti(buf[100:108])
1035 obj.uid = nti(buf[108:116])
1036 obj.gid = nti(buf[116:124])
1037 obj.size = nti(buf[124:136])
1038 obj.mtime = nti(buf[136:148])
1039 obj.chksum = chksum
1040 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001041 obj.linkname = nts(buf[157:257], encoding, errors)
1042 obj.uname = nts(buf[265:297], encoding, errors)
1043 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001044 obj.devmajor = nti(buf[329:337])
1045 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001046 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001047
Guido van Rossumd8faa362007-04-27 19:54:29 +00001048 # Old V7 tar format represents a directory as a regular
1049 # file with a trailing slash.
1050 if obj.type == AREGTYPE and obj.name.endswith("/"):
1051 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001052
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001053 # The old GNU sparse format occupies some of the unused
1054 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001055 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001056 if obj.type == GNUTYPE_SPARSE:
1057 pos = 386
1058 structs = []
1059 for i in range(4):
1060 try:
1061 offset = nti(buf[pos:pos + 12])
1062 numbytes = nti(buf[pos + 12:pos + 24])
1063 except ValueError:
1064 break
1065 structs.append((offset, numbytes))
1066 pos += 24
1067 isextended = bool(buf[482])
1068 origsize = nti(buf[483:495])
1069 obj._sparse_structs = (structs, isextended, origsize)
1070
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071 # Remove redundant slashes from directories.
1072 if obj.isdir():
1073 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001074
Guido van Rossumd8faa362007-04-27 19:54:29 +00001075 # Reconstruct a ustar longname.
1076 if prefix and obj.type not in GNU_TYPES:
1077 obj.name = prefix + "/" + obj.name
1078 return obj
1079
1080 @classmethod
1081 def fromtarfile(cls, tarfile):
1082 """Return the next TarInfo object from TarFile object
1083 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001084 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001085 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001086 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001087 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1088 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001089
Guido van Rossumd8faa362007-04-27 19:54:29 +00001090 #--------------------------------------------------------------------------
1091 # The following are methods that are called depending on the type of a
1092 # member. The entry point is _proc_member() which can be overridden in a
1093 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1094 # implement the following
1095 # operations:
1096 # 1. Set self.offset_data to the position where the data blocks begin,
1097 # if there is data that follows.
1098 # 2. Set tarfile.offset to the position where the next member's header will
1099 # begin.
1100 # 3. Return self or another valid TarInfo object.
1101 def _proc_member(self, tarfile):
1102 """Choose the right processing method depending on
1103 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001104 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001105 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1106 return self._proc_gnulong(tarfile)
1107 elif self.type == GNUTYPE_SPARSE:
1108 return self._proc_sparse(tarfile)
1109 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1110 return self._proc_pax(tarfile)
1111 else:
1112 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001113
Guido van Rossumd8faa362007-04-27 19:54:29 +00001114 def _proc_builtin(self, tarfile):
1115 """Process a builtin type or an unknown type which
1116 will be treated as a regular file.
1117 """
1118 self.offset_data = tarfile.fileobj.tell()
1119 offset = self.offset_data
1120 if self.isreg() or self.type not in SUPPORTED_TYPES:
1121 # Skip the following data blocks.
1122 offset += self._block(self.size)
1123 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001124
Guido van Rossume7ba4952007-06-06 23:52:48 +00001125 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001126 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001127 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001128
1129 return self
1130
1131 def _proc_gnulong(self, tarfile):
1132 """Process the blocks that hold a GNU longname
1133 or longlink member.
1134 """
1135 buf = tarfile.fileobj.read(self._block(self.size))
1136
1137 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001138 try:
1139 next = self.fromtarfile(tarfile)
1140 except HeaderError:
1141 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001142
1143 # Patch the TarInfo object from the next header with
1144 # the longname information.
1145 next.offset = self.offset
1146 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001147 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001148 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001149 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001150
1151 return next
1152
1153 def _proc_sparse(self, tarfile):
1154 """Process a GNU sparse header plus extra headers.
1155 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001156 # We already collected some sparse structures in frombuf().
1157 structs, isextended, origsize = self._sparse_structs
1158 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001160 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001161 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001162 buf = tarfile.fileobj.read(BLOCKSIZE)
1163 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001164 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 try:
1166 offset = nti(buf[pos:pos + 12])
1167 numbytes = nti(buf[pos + 12:pos + 24])
1168 except ValueError:
1169 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001170 if offset and numbytes:
1171 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001173 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001174 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001175
1176 self.offset_data = tarfile.fileobj.tell()
1177 tarfile.offset = self.offset_data + self._block(self.size)
1178 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179 return self
1180
1181 def _proc_pax(self, tarfile):
1182 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001183 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001184 """
1185 # Read the header information.
1186 buf = tarfile.fileobj.read(self._block(self.size))
1187
1188 # A pax header stores supplemental information for either
1189 # the following file (extended) or all following files
1190 # (global).
1191 if self.type == XGLTYPE:
1192 pax_headers = tarfile.pax_headers
1193 else:
1194 pax_headers = tarfile.pax_headers.copy()
1195
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001196 # Check if the pax header contains a hdrcharset field. This tells us
1197 # the encoding of the path, linkpath, uname and gname fields. Normally,
1198 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1199 # implementations are allowed to store them as raw binary strings if
1200 # the translation to UTF-8 fails.
1201 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1202 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001203 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001204
1205 # For the time being, we don't care about anything other than "BINARY".
1206 # The only other value that is currently allowed by the standard is
1207 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1208 hdrcharset = pax_headers.get("hdrcharset")
1209 if hdrcharset == "BINARY":
1210 encoding = tarfile.encoding
1211 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001212 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001213
Guido van Rossumd8faa362007-04-27 19:54:29 +00001214 # Parse pax header information. A record looks like that:
1215 # "%d %s=%s\n" % (length, keyword, value). length is the size
1216 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001217 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001218 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001219 pos = 0
1220 while True:
1221 match = regex.match(buf, pos)
1222 if not match:
1223 break
1224
1225 length, keyword = match.groups()
1226 length = int(length)
1227 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1228
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001229 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001230 # as the error handler, but we better not take the risk. For
1231 # example, GNU tar <= 1.23 is known to store filenames it cannot
1232 # translate to UTF-8 as raw strings (unfortunately without a
1233 # hdrcharset=BINARY header).
1234 # We first try the strict standard encoding, and if that fails we
1235 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001236 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001237 tarfile.errors)
1238 if keyword in PAX_NAME_FIELDS:
1239 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1240 tarfile.errors)
1241 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001242 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001243 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001244
1245 pax_headers[keyword] = value
1246 pos += length
1247
Guido van Rossume7ba4952007-06-06 23:52:48 +00001248 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001249 try:
1250 next = self.fromtarfile(tarfile)
1251 except HeaderError:
1252 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001254 # Process GNU sparse information.
1255 if "GNU.sparse.map" in pax_headers:
1256 # GNU extended sparse format version 0.1.
1257 self._proc_gnusparse_01(next, pax_headers)
1258
1259 elif "GNU.sparse.size" in pax_headers:
1260 # GNU extended sparse format version 0.0.
1261 self._proc_gnusparse_00(next, pax_headers, buf)
1262
1263 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1264 # GNU extended sparse format version 1.0.
1265 self._proc_gnusparse_10(next, pax_headers, tarfile)
1266
Guido van Rossume7ba4952007-06-06 23:52:48 +00001267 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001268 # Patch the TarInfo object with the extended header info.
1269 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1270 next.offset = self.offset
1271
1272 if "size" in pax_headers:
1273 # If the extended header replaces the size field,
1274 # we need to recalculate the offset where the next
1275 # header starts.
1276 offset = next.offset_data
1277 if next.isreg() or next.type not in SUPPORTED_TYPES:
1278 offset += next._block(next.size)
1279 tarfile.offset = offset
1280
1281 return next
1282
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001283 def _proc_gnusparse_00(self, next, pax_headers, buf):
1284 """Process a GNU tar extended sparse header, version 0.0.
1285 """
1286 offsets = []
1287 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1288 offsets.append(int(match.group(1)))
1289 numbytes = []
1290 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1291 numbytes.append(int(match.group(1)))
1292 next.sparse = list(zip(offsets, numbytes))
1293
1294 def _proc_gnusparse_01(self, next, pax_headers):
1295 """Process a GNU tar extended sparse header, version 0.1.
1296 """
1297 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1298 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1299
1300 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1301 """Process a GNU tar extended sparse header, version 1.0.
1302 """
1303 fields = None
1304 sparse = []
1305 buf = tarfile.fileobj.read(BLOCKSIZE)
1306 fields, buf = buf.split(b"\n", 1)
1307 fields = int(fields)
1308 while len(sparse) < fields * 2:
1309 if b"\n" not in buf:
1310 buf += tarfile.fileobj.read(BLOCKSIZE)
1311 number, buf = buf.split(b"\n", 1)
1312 sparse.append(int(number))
1313 next.offset_data = tarfile.fileobj.tell()
1314 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1315
Guido van Rossume7ba4952007-06-06 23:52:48 +00001316 def _apply_pax_info(self, pax_headers, encoding, errors):
1317 """Replace fields with supplemental information from a previous
1318 pax extended or global header.
1319 """
1320 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001321 if keyword == "GNU.sparse.name":
1322 setattr(self, "path", value)
1323 elif keyword == "GNU.sparse.size":
1324 setattr(self, "size", int(value))
1325 elif keyword == "GNU.sparse.realsize":
1326 setattr(self, "size", int(value))
1327 elif keyword in PAX_FIELDS:
1328 if keyword in PAX_NUMBER_FIELDS:
1329 try:
1330 value = PAX_NUMBER_FIELDS[keyword](value)
1331 except ValueError:
1332 value = 0
1333 if keyword == "path":
1334 value = value.rstrip("/")
1335 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001336
1337 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001338
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001339 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1340 """Decode a single field from a pax record.
1341 """
1342 try:
1343 return value.decode(encoding, "strict")
1344 except UnicodeDecodeError:
1345 return value.decode(fallback_encoding, fallback_errors)
1346
Guido van Rossumd8faa362007-04-27 19:54:29 +00001347 def _block(self, count):
1348 """Round up a byte count by BLOCKSIZE and return it,
1349 e.g. _block(834) => 1024.
1350 """
1351 blocks, remainder = divmod(count, BLOCKSIZE)
1352 if remainder:
1353 blocks += 1
1354 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001355
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001356 def isreg(self):
1357 return self.type in REGULAR_TYPES
1358 def isfile(self):
1359 return self.isreg()
1360 def isdir(self):
1361 return self.type == DIRTYPE
1362 def issym(self):
1363 return self.type == SYMTYPE
1364 def islnk(self):
1365 return self.type == LNKTYPE
1366 def ischr(self):
1367 return self.type == CHRTYPE
1368 def isblk(self):
1369 return self.type == BLKTYPE
1370 def isfifo(self):
1371 return self.type == FIFOTYPE
1372 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001373 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001374 def isdev(self):
1375 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1376# class TarInfo
1377
1378class TarFile(object):
1379 """The TarFile Class provides an interface to tar archives.
1380 """
1381
1382 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1383
1384 dereference = False # If true, add content of linked file to the
1385 # tar file, else the link.
1386
1387 ignore_zeros = False # If true, skips empty or invalid blocks and
1388 # continues processing.
1389
Lars Gustäbel365aff32009-12-13 11:42:29 +00001390 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 # messages (if debug >= 0). If > 0, errors
1392 # are passed to the caller as exceptions.
1393
Guido van Rossumd8faa362007-04-27 19:54:29 +00001394 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395
Guido van Rossume7ba4952007-06-06 23:52:48 +00001396 encoding = ENCODING # Encoding for 8-bit character strings.
1397
1398 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399
Guido van Rossumd8faa362007-04-27 19:54:29 +00001400 tarinfo = TarInfo # The default TarInfo class to use.
1401
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001402 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001403
1404 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1405 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001406 errors="surrogateescape", pax_headers=None, debug=None,
1407 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1409 read from an existing archive, 'a' to append data to an existing
1410 file or 'w' to create a new file overwriting an existing one. `mode'
1411 defaults to 'r'.
1412 If `fileobj' is given, it is used for reading or writing data. If it
1413 can be determined, `mode' is overridden by `fileobj's mode.
1414 `fileobj' is not closed, when TarFile is closed.
1415 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001416 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001417 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001418 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001419 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001420 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001421
1422 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001423 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001424 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001425 self.mode = "w"
1426 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001427 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001428 self._extfileobj = False
1429 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001430 if (name is None and hasattr(fileobj, "name") and
1431 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001432 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001433 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001434 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001435 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001436 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001437 self.fileobj = fileobj
1438
Guido van Rossumd8faa362007-04-27 19:54:29 +00001439 # Init attributes.
1440 if format is not None:
1441 self.format = format
1442 if tarinfo is not None:
1443 self.tarinfo = tarinfo
1444 if dereference is not None:
1445 self.dereference = dereference
1446 if ignore_zeros is not None:
1447 self.ignore_zeros = ignore_zeros
1448 if encoding is not None:
1449 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001450 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001451
1452 if pax_headers is not None and self.format == PAX_FORMAT:
1453 self.pax_headers = pax_headers
1454 else:
1455 self.pax_headers = {}
1456
Guido van Rossumd8faa362007-04-27 19:54:29 +00001457 if debug is not None:
1458 self.debug = debug
1459 if errorlevel is not None:
1460 self.errorlevel = errorlevel
1461
1462 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001463 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001464 self.closed = False
1465 self.members = [] # list of members as TarInfo objects
1466 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001467 self.offset = self.fileobj.tell()
1468 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001469 self.inodes = {} # dictionary caching the inodes of
1470 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001471
Lars Gustäbel7b465392009-11-18 20:29:25 +00001472 try:
1473 if self.mode == "r":
1474 self.firstmember = None
1475 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001476
Lars Gustäbel7b465392009-11-18 20:29:25 +00001477 if self.mode == "a":
1478 # Move to the end of the archive,
1479 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001480 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001481 self.fileobj.seek(self.offset)
1482 try:
1483 tarinfo = self.tarinfo.fromtarfile(self)
1484 self.members.append(tarinfo)
1485 except EOFHeaderError:
1486 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001487 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001488 except HeaderError as e:
1489 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001490
Lars Gustäbel20703c62015-05-27 12:53:44 +02001491 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001492 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493
Lars Gustäbel7b465392009-11-18 20:29:25 +00001494 if self.pax_headers:
1495 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1496 self.fileobj.write(buf)
1497 self.offset += len(buf)
1498 except:
1499 if not self._extfileobj:
1500 self.fileobj.close()
1501 self.closed = True
1502 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001503
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 #--------------------------------------------------------------------------
1505 # Below are the classmethods which act as alternate constructors to the
1506 # TarFile class. The open() method is the only one that is needed for
1507 # public use; it is the "super"-constructor and is able to select an
1508 # adequate "sub"-constructor for a particular compression using the mapping
1509 # from OPEN_METH.
1510 #
1511 # This concept allows one to subclass TarFile without losing the comfort of
1512 # the super-constructor. A sub-constructor is registered and made available
1513 # by adding it to the mapping in OPEN_METH.
1514
Guido van Rossum75b64e62005-01-16 00:16:11 +00001515 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001516 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 """Open a tar archive for reading, writing or appending. Return
1518 an appropriate TarFile class.
1519
1520 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001521 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522 'r:' open for reading exclusively uncompressed
1523 'r:gz' open for reading with gzip compression
1524 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001525 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001526 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001527 'w' or 'w:' open for writing without compression
1528 'w:gz' open for writing with gzip compression
1529 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001530 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001531
Berker Peksag0fe63252015-02-13 21:02:12 +02001532 'x' or 'x:' create a tarfile exclusively without compression, raise
1533 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001534 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001535 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001536 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001537 if the file is already created
1538 'x:xz' create an lzma compressed tarfile, raise an exception
1539 if the file is already created
1540
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001541 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542 'r|' open an uncompressed stream of tar blocks for reading
1543 'r|gz' open a gzip compressed stream of tar blocks
1544 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001545 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 'w|' open an uncompressed stream for writing
1547 'w|gz' open a gzip compressed stream for writing
1548 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001549 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550 """
1551
1552 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001555 if mode in ("r", "r:*"):
1556 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001557 def not_compressed(comptype):
1558 return cls.OPEN_METH[comptype] == 'taropen'
1559 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001560 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001561 if fileobj is not None:
1562 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001563 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001564 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001565 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001566 if fileobj is not None:
1567 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001568 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001569 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001570
1571 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572 filemode, comptype = mode.split(":", 1)
1573 filemode = filemode or "r"
1574 comptype = comptype or "tar"
1575
1576 # Select the *open() function according to
1577 # given compression.
1578 if comptype in cls.OPEN_METH:
1579 func = getattr(cls, cls.OPEN_METH[comptype])
1580 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001581 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001582 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001583
1584 elif "|" in mode:
1585 filemode, comptype = mode.split("|", 1)
1586 filemode = filemode or "r"
1587 comptype = comptype or "tar"
1588
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001589 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001590 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001591
Antoine Pitrou605c2932010-09-23 20:15:14 +00001592 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1593 try:
1594 t = cls(name, filemode, stream, **kwargs)
1595 except:
1596 stream.close()
1597 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598 t._extfileobj = False
1599 return t
1600
Berker Peksag0fe63252015-02-13 21:02:12 +02001601 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001602 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001603
Thomas Wouters477c8d52006-05-27 19:21:47 +00001604 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605
Guido van Rossum75b64e62005-01-16 00:16:11 +00001606 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001607 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001608 """Open uncompressed tar archive name for reading or writing.
1609 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001610 if mode not in ("r", "a", "w", "x"):
1611 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001612 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613
Guido van Rossum75b64e62005-01-16 00:16:11 +00001614 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001615 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001616 """Open gzip compressed tar archive name for reading or writing.
1617 Appending is not allowed.
1618 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001619 if mode not in ("r", "w", "x"):
1620 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001621
1622 try:
1623 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001624 gzip.GzipFile
1625 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001626 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001627
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001629 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001630 except OSError:
1631 if fileobj is not None and mode == 'r':
1632 raise ReadError("not a gzip file")
1633 raise
1634
1635 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001636 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001637 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001638 fileobj.close()
1639 if mode == 'r':
1640 raise ReadError("not a gzip file")
1641 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001642 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001643 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001644 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001645 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646 return t
1647
Guido van Rossum75b64e62005-01-16 00:16:11 +00001648 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001649 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650 """Open bzip2 compressed tar archive name for reading or writing.
1651 Appending is not allowed.
1652 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001653 if mode not in ("r", "w", "x"):
1654 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655
1656 try:
1657 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001658 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001659 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001660
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001661 fileobj = bz2.BZ2File(fileobj or name, mode,
1662 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663
1664 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001665 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001666 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001667 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001668 if mode == 'r':
1669 raise ReadError("not a bzip2 file")
1670 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001671 except:
1672 fileobj.close()
1673 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674 t._extfileobj = False
1675 return t
1676
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001677 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001678 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001679 """Open lzma compressed tar archive name for reading or writing.
1680 Appending is not allowed.
1681 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001682 if mode not in ("r", "w", "x"):
1683 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001684
1685 try:
1686 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001687 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001688 raise CompressionError("lzma module is not available")
1689
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001690 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001691
1692 try:
1693 t = cls.taropen(name, mode, fileobj, **kwargs)
1694 except (lzma.LZMAError, EOFError):
1695 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001696 if mode == 'r':
1697 raise ReadError("not an lzma file")
1698 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001699 except:
1700 fileobj.close()
1701 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001702 t._extfileobj = False
1703 return t
1704
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001705 # All *open() methods are registered here.
1706 OPEN_METH = {
1707 "tar": "taropen", # uncompressed tar
1708 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001709 "bz2": "bz2open", # bzip2 compressed tar
1710 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001711 }
1712
1713 #--------------------------------------------------------------------------
1714 # The public methods which TarFile provides:
1715
1716 def close(self):
1717 """Close the TarFile. In write-mode, two finishing zero blocks are
1718 appended to the archive.
1719 """
1720 if self.closed:
1721 return
1722
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001723 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001724 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001725 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001726 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1727 self.offset += (BLOCKSIZE * 2)
1728 # fill up the end with zero-blocks
1729 # (like option -b20 for tar does)
1730 blocks, remainder = divmod(self.offset, RECORDSIZE)
1731 if remainder > 0:
1732 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1733 finally:
1734 if not self._extfileobj:
1735 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001736
1737 def getmember(self, name):
1738 """Return a TarInfo object for member `name'. If `name' can not be
1739 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001740 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001741 most up-to-date version.
1742 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001743 tarinfo = self._getmember(name)
1744 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001745 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001746 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747
1748 def getmembers(self):
1749 """Return the members of the archive as a list of TarInfo objects. The
1750 list has the same order as the members in the archive.
1751 """
1752 self._check()
1753 if not self._loaded: # if we want to obtain a list of
1754 self._load() # all members, we first have to
1755 # scan the whole archive.
1756 return self.members
1757
1758 def getnames(self):
1759 """Return the members of the archive as a list of their names. It has
1760 the same order as the list returned by getmembers().
1761 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001762 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001763
1764 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001765 """Create a TarInfo object from the result of os.stat or equivalent
1766 on an existing file. The file is either named by `name', or
1767 specified as a file object `fileobj' with a file descriptor. If
1768 given, `arcname' specifies an alternative name for the file in the
1769 archive, otherwise, the name is taken from the 'name' attribute of
1770 'fileobj', or the 'name' argument. The name should be a text
1771 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001772 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001773 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001774
1775 # When fileobj is given, replace name by
1776 # fileobj's real name.
1777 if fileobj is not None:
1778 name = fileobj.name
1779
1780 # Building the name of the member in the archive.
1781 # Backward slashes are converted to forward slashes,
1782 # Absolute paths are turned to relative paths.
1783 if arcname is None:
1784 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001785 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001786 arcname = arcname.replace(os.sep, "/")
1787 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001788
1789 # Now, fill the TarInfo object with
1790 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001791 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001792 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001793
1794 # Use os.stat or os.lstat, depending on platform
1795 # and if symlinks shall be resolved.
1796 if fileobj is None:
1797 if hasattr(os, "lstat") and not self.dereference:
1798 statres = os.lstat(name)
1799 else:
1800 statres = os.stat(name)
1801 else:
1802 statres = os.fstat(fileobj.fileno())
1803 linkname = ""
1804
1805 stmd = statres.st_mode
1806 if stat.S_ISREG(stmd):
1807 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001808 if not self.dereference and statres.st_nlink > 1 and \
1809 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001810 # Is it a hardlink to an already
1811 # archived file?
1812 type = LNKTYPE
1813 linkname = self.inodes[inode]
1814 else:
1815 # The inode is added only if its valid.
1816 # For win32 it is always 0.
1817 type = REGTYPE
1818 if inode[0]:
1819 self.inodes[inode] = arcname
1820 elif stat.S_ISDIR(stmd):
1821 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822 elif stat.S_ISFIFO(stmd):
1823 type = FIFOTYPE
1824 elif stat.S_ISLNK(stmd):
1825 type = SYMTYPE
1826 linkname = os.readlink(name)
1827 elif stat.S_ISCHR(stmd):
1828 type = CHRTYPE
1829 elif stat.S_ISBLK(stmd):
1830 type = BLKTYPE
1831 else:
1832 return None
1833
1834 # Fill the TarInfo object with all
1835 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001836 tarinfo.name = arcname
1837 tarinfo.mode = stmd
1838 tarinfo.uid = statres.st_uid
1839 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001840 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001841 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001842 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001843 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001845 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001846 tarinfo.linkname = linkname
1847 if pwd:
1848 try:
1849 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1850 except KeyError:
1851 pass
1852 if grp:
1853 try:
1854 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1855 except KeyError:
1856 pass
1857
1858 if type in (CHRTYPE, BLKTYPE):
1859 if hasattr(os, "major") and hasattr(os, "minor"):
1860 tarinfo.devmajor = os.major(statres.st_rdev)
1861 tarinfo.devminor = os.minor(statres.st_rdev)
1862 return tarinfo
1863
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001864 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 """Print a table of contents to sys.stdout. If `verbose' is False, only
1866 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001867 output is produced. `members' is optional and must be a subset of the
1868 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001869 """
1870 self._check()
1871
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001872 if members is None:
1873 members = self
1874 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001876 _safe_print(stat.filemode(tarinfo.mode))
1877 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1878 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001880 _safe_print("%10s" %
1881 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001883 _safe_print("%10d" % tarinfo.size)
1884 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1885 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001887 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888
1889 if verbose:
1890 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001891 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001892 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001893 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001894 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001896 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 """Add the file `name' to the archive. `name' may be any type of file
1898 (directory, fifo, symbolic link, etc.). If given, `arcname'
1899 specifies an alternative name for the file in the archive.
1900 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001901 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001902 that expects a TarInfo object argument and returns the changed
1903 TarInfo object, if it returns None the TarInfo object will be
1904 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001906 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907
1908 if arcname is None:
1909 arcname = name
1910
1911 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001912 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 self._dbg(2, "tarfile: Skipped %r" % name)
1914 return
1915
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916 self._dbg(1, name)
1917
1918 # Create a TarInfo object from the file.
1919 tarinfo = self.gettarinfo(name, arcname)
1920
1921 if tarinfo is None:
1922 self._dbg(1, "tarfile: Unsupported type %r" % name)
1923 return
1924
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001925 # Change or exclude the TarInfo object.
1926 if filter is not None:
1927 tarinfo = filter(tarinfo)
1928 if tarinfo is None:
1929 self._dbg(2, "tarfile: Excluded %r" % name)
1930 return
1931
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932 # Append the tar header and data to the archive.
1933 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001934 with bltn_open(name, "rb") as f:
1935 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001936
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001937 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938 self.addfile(tarinfo)
1939 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001940 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001941 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001942 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001944 else:
1945 self.addfile(tarinfo)
1946
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001947 def addfile(self, tarinfo, fileobj=None):
1948 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001949 given, it should be a binary file, and tarinfo.size bytes are read
1950 from it and added to the archive. You can create TarInfo objects
1951 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001953 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954
Thomas Wouters89f507f2006-12-13 04:49:30 +00001955 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001956
Guido van Rossume7ba4952007-06-06 23:52:48 +00001957 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001958 self.fileobj.write(buf)
1959 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001960 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961 # If there's data to follow, append it.
1962 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001963 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1965 if remainder > 0:
1966 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1967 blocks += 1
1968 self.offset += blocks * BLOCKSIZE
1969
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001970 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971
Eric V. Smith7a803892015-04-15 10:27:58 -04001972 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001973 """Extract all members from the archive to the current working
1974 directory and set owner, modification time and permissions on
1975 directories afterwards. `path' specifies a different directory
1976 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001977 list returned by getmembers(). If `numeric_owner` is True, only
1978 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001979 """
1980 directories = []
1981
1982 if members is None:
1983 members = self
1984
1985 for tarinfo in members:
1986 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001987 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001988 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001989 tarinfo = copy.copy(tarinfo)
1990 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001991 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04001992 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
1993 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001994
1995 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001996 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001997 directories.reverse()
1998
1999 # Set correct owner, mtime and filemode on directories.
2000 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002001 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002002 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002003 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002004 self.utime(tarinfo, dirpath)
2005 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002006 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002007 if self.errorlevel > 1:
2008 raise
2009 else:
2010 self._dbg(1, "tarfile: %s" % e)
2011
Eric V. Smith7a803892015-04-15 10:27:58 -04002012 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013 """Extract a member from the archive to the current working directory,
2014 using its full name. Its file information is extracted as accurately
2015 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002016 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002017 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2018 is True, only the numbers for user/group names are used and not
2019 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 """
2021 self._check("r")
2022
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002023 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002024 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002025 else:
2026 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002027
Neal Norwitza4f651a2004-07-20 22:07:44 +00002028 # Prepare the link target for makelink().
2029 if tarinfo.islnk():
2030 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2031
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002032 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002033 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002034 set_attrs=set_attrs,
2035 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002036 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002037 if self.errorlevel > 0:
2038 raise
2039 else:
2040 if e.filename is None:
2041 self._dbg(1, "tarfile: %s" % e.strerror)
2042 else:
2043 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002044 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 if self.errorlevel > 1:
2046 raise
2047 else:
2048 self._dbg(1, "tarfile: %s" % e)
2049
2050 def extractfile(self, member):
2051 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002052 a filename or a TarInfo object. If `member' is a regular file or a
2053 link, an io.BufferedReader object is returned. Otherwise, None is
2054 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055 """
2056 self._check("r")
2057
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002058 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002059 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002060 else:
2061 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002063 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2064 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002065 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066
2067 elif tarinfo.islnk() or tarinfo.issym():
2068 if isinstance(self.fileobj, _Stream):
2069 # A small but ugly workaround for the case that someone tries
2070 # to extract a (sym)link as a file-object from a non-seekable
2071 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002072 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002073 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002074 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002075 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076 else:
2077 # If there's no data associated with the member (directory, chrdev,
2078 # blkdev, etc.), return None instead of a file object.
2079 return None
2080
Eric V. Smith7a803892015-04-15 10:27:58 -04002081 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2082 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083 """Extract the TarInfo object tarinfo to a physical
2084 file called targetpath.
2085 """
2086 # Fetch the TarInfo object for the given name
2087 # and build the destination pathname, replacing
2088 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002089 targetpath = targetpath.rstrip("/")
2090 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002091
2092 # Create all upper directories.
2093 upperdirs = os.path.dirname(targetpath)
2094 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002095 # Create directories that are not part of the archive with
2096 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002097 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098
2099 if tarinfo.islnk() or tarinfo.issym():
2100 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2101 else:
2102 self._dbg(1, tarinfo.name)
2103
2104 if tarinfo.isreg():
2105 self.makefile(tarinfo, targetpath)
2106 elif tarinfo.isdir():
2107 self.makedir(tarinfo, targetpath)
2108 elif tarinfo.isfifo():
2109 self.makefifo(tarinfo, targetpath)
2110 elif tarinfo.ischr() or tarinfo.isblk():
2111 self.makedev(tarinfo, targetpath)
2112 elif tarinfo.islnk() or tarinfo.issym():
2113 self.makelink(tarinfo, targetpath)
2114 elif tarinfo.type not in SUPPORTED_TYPES:
2115 self.makeunknown(tarinfo, targetpath)
2116 else:
2117 self.makefile(tarinfo, targetpath)
2118
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002119 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002120 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002121 if not tarinfo.issym():
2122 self.chmod(tarinfo, targetpath)
2123 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002124
2125 #--------------------------------------------------------------------------
2126 # Below are the different file methods. They are called via
2127 # _extract_member() when extract() is called. They can be replaced in a
2128 # subclass to implement other functionality.
2129
2130 def makedir(self, tarinfo, targetpath):
2131 """Make a directory called targetpath.
2132 """
2133 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002134 # Use a safe mode for the directory, the real mode is set
2135 # later in _extract_member().
2136 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002137 except FileExistsError:
2138 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002139
2140 def makefile(self, tarinfo, targetpath):
2141 """Make a file called targetpath.
2142 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002143 source = self.fileobj
2144 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002145 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002146 with bltn_open(targetpath, "wb") as target:
2147 if tarinfo.sparse is not None:
2148 for offset, size in tarinfo.sparse:
2149 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002150 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002151 target.seek(tarinfo.size)
2152 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002153 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002154 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002155
2156 def makeunknown(self, tarinfo, targetpath):
2157 """Make a file from a TarInfo object with an unknown type
2158 at targetpath.
2159 """
2160 self.makefile(tarinfo, targetpath)
2161 self._dbg(1, "tarfile: Unknown file type %r, " \
2162 "extracted as regular file." % tarinfo.type)
2163
2164 def makefifo(self, tarinfo, targetpath):
2165 """Make a fifo called targetpath.
2166 """
2167 if hasattr(os, "mkfifo"):
2168 os.mkfifo(targetpath)
2169 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002170 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002171
2172 def makedev(self, tarinfo, targetpath):
2173 """Make a character or block device called targetpath.
2174 """
2175 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002177
2178 mode = tarinfo.mode
2179 if tarinfo.isblk():
2180 mode |= stat.S_IFBLK
2181 else:
2182 mode |= stat.S_IFCHR
2183
2184 os.mknod(targetpath, mode,
2185 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2186
2187 def makelink(self, tarinfo, targetpath):
2188 """Make a (symbolic) link called targetpath. If it cannot be created
2189 (platform limitation), we try to make a copy of the referenced file
2190 instead of a link.
2191 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002192 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002193 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002194 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002195 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002196 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002197 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002198 if os.path.exists(tarinfo._link_target):
2199 os.link(tarinfo._link_target, targetpath)
2200 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002201 self._extract_member(self._find_link_target(tarinfo),
2202 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002203 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002204 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002205 self._extract_member(self._find_link_target(tarinfo),
2206 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002207 except KeyError:
2208 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002209
Eric V. Smith7a803892015-04-15 10:27:58 -04002210 def chown(self, tarinfo, targetpath, numeric_owner):
2211 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002212 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2213 is False, fall back to .gid/.uid when the search based on name
2214 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002215 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002216 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002217 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002218 g = tarinfo.gid
2219 u = tarinfo.uid
2220 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002221 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002222 if grp:
2223 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002224 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002225 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002226 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002227 if pwd:
2228 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002229 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002230 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002231 try:
2232 if tarinfo.issym() and hasattr(os, "lchown"):
2233 os.lchown(targetpath, u, g)
2234 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002235 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002236 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002237 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002238
2239 def chmod(self, tarinfo, targetpath):
2240 """Set file permissions of targetpath according to tarinfo.
2241 """
Jack Jansen834eff62003-03-07 12:47:06 +00002242 if hasattr(os, 'chmod'):
2243 try:
2244 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002245 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002246 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247
2248 def utime(self, tarinfo, targetpath):
2249 """Set modification time of targetpath according to tarinfo.
2250 """
Jack Jansen834eff62003-03-07 12:47:06 +00002251 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002252 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253 try:
2254 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002255 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002256 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257
2258 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259 def next(self):
2260 """Return the next member of the archive as a TarInfo object, when
2261 TarFile is opened for reading. Return None if there is no more
2262 available.
2263 """
2264 self._check("ra")
2265 if self.firstmember is not None:
2266 m = self.firstmember
2267 self.firstmember = None
2268 return m
2269
Lars Gustäbel03572682015-07-06 09:27:24 +02002270 # Advance the file pointer.
2271 if self.offset != self.fileobj.tell():
2272 self.fileobj.seek(self.offset - 1)
2273 if not self.fileobj.read(1):
2274 raise ReadError("unexpected end of data")
2275
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002276 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002277 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002280 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002281 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002283 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 self.offset += BLOCKSIZE
2285 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002286 except InvalidHeaderError as e:
2287 if self.ignore_zeros:
2288 self._dbg(2, "0x%X: %s" % (self.offset, e))
2289 self.offset += BLOCKSIZE
2290 continue
2291 elif self.offset == 0:
2292 raise ReadError(str(e))
2293 except EmptyHeaderError:
2294 if self.offset == 0:
2295 raise ReadError("empty file")
2296 except TruncatedHeaderError as e:
2297 if self.offset == 0:
2298 raise ReadError(str(e))
2299 except SubsequentHeaderError as e:
2300 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002301 break
2302
Lars Gustäbel9520a432009-11-22 18:48:49 +00002303 if tarinfo is not None:
2304 self.members.append(tarinfo)
2305 else:
2306 self._loaded = True
2307
Thomas Wouters477c8d52006-05-27 19:21:47 +00002308 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002309
2310 #--------------------------------------------------------------------------
2311 # Little helper methods:
2312
Lars Gustäbel1b512722010-06-03 12:45:16 +00002313 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002314 """Find an archive member by name from bottom to top.
2315 If tarinfo is given, it is used as the starting point.
2316 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002317 # Ensure that all members have been loaded.
2318 members = self.getmembers()
2319
Lars Gustäbel1b512722010-06-03 12:45:16 +00002320 # Limit the member search list up to tarinfo.
2321 if tarinfo is not None:
2322 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323
Lars Gustäbel1b512722010-06-03 12:45:16 +00002324 if normalize:
2325 name = os.path.normpath(name)
2326
2327 for member in reversed(members):
2328 if normalize:
2329 member_name = os.path.normpath(member.name)
2330 else:
2331 member_name = member.name
2332
2333 if name == member_name:
2334 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002335
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002336 def _load(self):
2337 """Read through the entire archive file and look for readable
2338 members.
2339 """
2340 while True:
2341 tarinfo = self.next()
2342 if tarinfo is None:
2343 break
2344 self._loaded = True
2345
2346 def _check(self, mode=None):
2347 """Check if TarFile is still open, and if the operation's mode
2348 corresponds to TarFile's mode.
2349 """
2350 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002351 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002352 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002353 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002354
Lars Gustäbel1b512722010-06-03 12:45:16 +00002355 def _find_link_target(self, tarinfo):
2356 """Find the target member of a symlink or hardlink member in the
2357 archive.
2358 """
2359 if tarinfo.issym():
2360 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002361 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002362 limit = None
2363 else:
2364 # Search the archive before the link, because a hard link is
2365 # just a reference to an already archived file.
2366 linkname = tarinfo.linkname
2367 limit = tarinfo
2368
2369 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2370 if member is None:
2371 raise KeyError("linkname %r not found" % linkname)
2372 return member
2373
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002374 def __iter__(self):
2375 """Provide an iterator object.
2376 """
2377 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002378 yield from self.members
2379 return
2380
2381 # Yield items using TarFile's next() method.
2382 # When all members have been read, set TarFile as _loaded.
2383 index = 0
2384 # Fix for SF #1100429: Under rare circumstances it can
2385 # happen that getmembers() is called during iteration,
2386 # which will have already exhausted the next() method.
2387 if self.firstmember is not None:
2388 tarinfo = self.next()
2389 index += 1
2390 yield tarinfo
2391
2392 while True:
2393 if index < len(self.members):
2394 tarinfo = self.members[index]
2395 elif not self._loaded:
2396 tarinfo = self.next()
2397 if not tarinfo:
2398 self._loaded = True
2399 return
2400 else:
2401 return
2402 index += 1
2403 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002405 def _dbg(self, level, msg):
2406 """Write debugging output to sys.stderr.
2407 """
2408 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002409 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002410
2411 def __enter__(self):
2412 self._check()
2413 return self
2414
2415 def __exit__(self, type, value, traceback):
2416 if type is None:
2417 self.close()
2418 else:
2419 # An exception occurred. We must not call close() because
2420 # it would try to write end-of-archive blocks and padding.
2421 if not self._extfileobj:
2422 self.fileobj.close()
2423 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002424
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002425#--------------------
2426# exported functions
2427#--------------------
2428def is_tarfile(name):
2429 """Return True if name points to a tar archive that we
2430 are able to handle, else return False.
2431 """
2432 try:
2433 t = open(name)
2434 t.close()
2435 return True
2436 except TarError:
2437 return False
2438
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002439open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002440
2441
2442def main():
2443 import argparse
2444
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002445 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002446 parser = argparse.ArgumentParser(description=description)
2447 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2448 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002449 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002450 group.add_argument('-l', '--list', metavar='<tarfile>',
2451 help='Show listing of a tarfile')
2452 group.add_argument('-e', '--extract', nargs='+',
2453 metavar=('<tarfile>', '<output_dir>'),
2454 help='Extract tarfile into target dir')
2455 group.add_argument('-c', '--create', nargs='+',
2456 metavar=('<name>', '<file>'),
2457 help='Create tarfile from sources')
2458 group.add_argument('-t', '--test', metavar='<tarfile>',
2459 help='Test if a tarfile is valid')
2460 args = parser.parse_args()
2461
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002462 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002463 src = args.test
2464 if is_tarfile(src):
2465 with open(src, 'r') as tar:
2466 tar.getmembers()
2467 print(tar.getmembers(), file=sys.stderr)
2468 if args.verbose:
2469 print('{!r} is a tar archive.'.format(src))
2470 else:
2471 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2472
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002473 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002474 src = args.list
2475 if is_tarfile(src):
2476 with TarFile.open(src, 'r:*') as tf:
2477 tf.list(verbose=args.verbose)
2478 else:
2479 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2480
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002481 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002482 if len(args.extract) == 1:
2483 src = args.extract[0]
2484 curdir = os.curdir
2485 elif len(args.extract) == 2:
2486 src, curdir = args.extract
2487 else:
2488 parser.exit(1, parser.format_help())
2489
2490 if is_tarfile(src):
2491 with TarFile.open(src, 'r:*') as tf:
2492 tf.extractall(path=curdir)
2493 if args.verbose:
2494 if curdir == '.':
2495 msg = '{!r} file is extracted.'.format(src)
2496 else:
2497 msg = ('{!r} file is extracted '
2498 'into {!r} directory.').format(src, curdir)
2499 print(msg)
2500 else:
2501 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2502
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002503 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002504 tar_name = args.create.pop(0)
2505 _, ext = os.path.splitext(tar_name)
2506 compressions = {
2507 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002508 '.gz': 'gz',
2509 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002510 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002511 '.xz': 'xz',
2512 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002513 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002514 '.bz2': 'bz2',
2515 '.tbz': 'bz2',
2516 '.tbz2': 'bz2',
2517 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002518 }
2519 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2520 tar_files = args.create
2521
2522 with TarFile.open(tar_name, tar_mode) as tf:
2523 for file_name in tar_files:
2524 tf.add(file_name)
2525
2526 if args.verbose:
2527 print('{!r} file created.'.format(tar_name))
2528
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002529if __name__ == '__main__':
2530 main()