blob: 85119a48a48bfa220c8f249bf5d2499bb952e3df [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000259def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200260 """Deprecated in this location; use stat.filemode."""
261 import warnings
262 warnings.warn("deprecated in favor of stat.filemode",
263 DeprecationWarning, 2)
264 return stat.filemode(mode)
265
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200266def _safe_print(s):
267 encoding = getattr(sys.stdout, 'encoding', None)
268 if encoding is not None:
269 s = s.encode(encoding, 'backslashreplace').decode(encoding)
270 print(s, end=' ')
271
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000272
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000273class TarError(Exception):
274 """Base exception."""
275 pass
276class ExtractError(TarError):
277 """General exception for extract errors."""
278 pass
279class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300280 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000281 pass
282class CompressionError(TarError):
283 """Exception for unavailable compression methods."""
284 pass
285class StreamError(TarError):
286 """Exception for unsupported operations on stream-like TarFiles."""
287 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000288class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000289 """Base exception for header errors."""
290 pass
291class EmptyHeaderError(HeaderError):
292 """Exception for empty headers."""
293 pass
294class TruncatedHeaderError(HeaderError):
295 """Exception for truncated headers."""
296 pass
297class EOFHeaderError(HeaderError):
298 """Exception for end of file headers."""
299 pass
300class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000301 """Exception for invalid headers."""
302 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000303class SubsequentHeaderError(HeaderError):
304 """Exception for missing and invalid extended headers."""
305 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000306
307#---------------------------
308# internal stream interface
309#---------------------------
310class _LowLevelFile:
311 """Low-level file object. Supports reading and writing.
312 It is used instead of a regular file object for streaming
313 access.
314 """
315
316 def __init__(self, name, mode):
317 mode = {
318 "r": os.O_RDONLY,
319 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
320 }[mode]
321 if hasattr(os, "O_BINARY"):
322 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000323 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
325 def close(self):
326 os.close(self.fd)
327
328 def read(self, size):
329 return os.read(self.fd, size)
330
331 def write(self, s):
332 os.write(self.fd, s)
333
334class _Stream:
335 """Class that serves as an adapter between TarFile and
336 a stream-like object. The stream-like object only
337 needs to have a read() or write() method and is accessed
338 blockwise. Use of gzip or bzip2 compression is possible.
339 A stream-like object could be for example: sys.stdin,
340 sys.stdout, a socket, a tape device etc.
341
342 _Stream is intended to be used only internally.
343 """
344
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000345 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346 """Construct a _Stream object.
347 """
348 self._extfileobj = True
349 if fileobj is None:
350 fileobj = _LowLevelFile(name, mode)
351 self._extfileobj = False
352
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000353 if comptype == '*':
354 # Enable transparent compression detection for the
355 # stream interface
356 fileobj = _StreamProxy(fileobj)
357 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.name = name or ""
360 self.mode = mode
361 self.comptype = comptype
362 self.fileobj = fileobj
363 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000364 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000365 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 self.closed = False
367
Antoine Pitrou605c2932010-09-23 20:15:14 +0000368 try:
369 if comptype == "gz":
370 try:
371 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400372 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000373 raise CompressionError("zlib module is not available")
374 self.zlib = zlib
375 self.crc = zlib.crc32(b"")
376 if mode == "r":
377 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100378 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 else:
380 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000381
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100382 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000383 try:
384 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400385 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000386 raise CompressionError("bz2 module is not available")
387 if mode == "r":
388 self.dbuf = b""
389 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200390 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000391 else:
392 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100393
394 elif comptype == "xz":
395 try:
396 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400397 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100398 raise CompressionError("lzma module is not available")
399 if mode == "r":
400 self.dbuf = b""
401 self.cmp = lzma.LZMADecompressor()
402 self.exception = lzma.LZMAError
403 else:
404 self.cmp = lzma.LZMACompressor()
405
406 elif comptype != "tar":
407 raise CompressionError("unknown compression type %r" % comptype)
408
Antoine Pitrou605c2932010-09-23 20:15:14 +0000409 except:
410 if not self._extfileobj:
411 self.fileobj.close()
412 self.closed = True
413 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414
415 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 self.close()
418
419 def _init_write_gz(self):
420 """Initialize for writing with gzip compression.
421 """
422 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
423 -self.zlib.MAX_WBITS,
424 self.zlib.DEF_MEM_LEVEL,
425 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000426 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000427 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428 if self.name.endswith(".gz"):
429 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000430 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
431 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432
433 def write(self, s):
434 """Write string s to the stream.
435 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000436 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000437 self.crc = self.zlib.crc32(s, self.crc)
438 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000439 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440 s = self.cmp.compress(s)
441 self.__write(s)
442
443 def __write(self, s):
444 """Write string s to the stream if a whole new block
445 is ready to be written.
446 """
447 self.buf += s
448 while len(self.buf) > self.bufsize:
449 self.fileobj.write(self.buf[:self.bufsize])
450 self.buf = self.buf[self.bufsize:]
451
452 def close(self):
453 """Close the _Stream object. No operation should be
454 done on it afterwards.
455 """
456 if self.closed:
457 return
458
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000459 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300460 try:
461 if self.mode == "w" and self.comptype != "tar":
462 self.buf += self.cmp.flush()
463
464 if self.mode == "w" and self.buf:
465 self.fileobj.write(self.buf)
466 self.buf = b""
467 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000468 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300469 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470 finally:
471 if not self._extfileobj:
472 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473
474 def _init_read_gz(self):
475 """Initialize for reading a gzip compressed fileobj.
476 """
477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000478 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479
480 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000481 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000482 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000483 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000484 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485
486 flag = ord(self.__read(1))
487 self.__read(6)
488
489 if flag & 4:
490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491 self.read(xlen)
492 if flag & 8:
493 while True:
494 s = self.__read(1)
495 if not s or s == NUL:
496 break
497 if flag & 16:
498 while True:
499 s = self.__read(1)
500 if not s or s == NUL:
501 break
502 if flag & 2:
503 self.__read(2)
504
505 def tell(self):
506 """Return the stream's file pointer position.
507 """
508 return self.pos
509
510 def seek(self, pos=0):
511 """Set the stream's file pointer to pos. Negative seeking
512 is forbidden.
513 """
514 if pos - self.pos >= 0:
515 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000516 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517 self.read(self.bufsize)
518 self.read(remainder)
519 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000520 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000521 return self.pos
522
523 def read(self, size=None):
524 """Return the next size number of bytes from the stream.
525 If size is not defined, return all bytes of the stream
526 up to EOF.
527 """
528 if size is None:
529 t = []
530 while True:
531 buf = self._read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
535 buf = "".join(t)
536 else:
537 buf = self._read(size)
538 self.pos += len(buf)
539 return buf
540
541 def _read(self, size):
542 """Return size bytes from the stream.
543 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 return self.__read(size)
546
547 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000548 while c < size:
549 buf = self.__read(self.bufsize)
550 if not buf:
551 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552 try:
553 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100554 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000556 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000557 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000558 buf = self.dbuf[:size]
559 self.dbuf = self.dbuf[size:]
560 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561
562 def __read(self, size):
563 """Return size bytes from stream. If internal buffer is empty,
564 read another block from the stream.
565 """
566 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000567 while c < size:
568 buf = self.fileobj.read(self.bufsize)
569 if not buf:
570 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000572 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000573 buf = self.buf[:size]
574 self.buf = self.buf[size:]
575 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000576# class _Stream
577
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000578class _StreamProxy(object):
579 """Small proxy class that enables transparent compression
580 detection for the Stream interface (mode 'r|*').
581 """
582
583 def __init__(self, fileobj):
584 self.fileobj = fileobj
585 self.buf = self.fileobj.read(BLOCKSIZE)
586
587 def read(self, size):
588 self.read = self.fileobj.read
589 return self.buf
590
591 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100592 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000593 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100594 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000595 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100596 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
597 return "xz"
598 else:
599 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000600
601 def close(self):
602 self.fileobj.close()
603# class StreamProxy
604
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000605#------------------------
606# Extraction file object
607#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000608class _FileInFile(object):
609 """A thin wrapper around an existing file object that
610 provides a part of its data as an individual file
611 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000612 """
613
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000614 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000615 self.fileobj = fileobj
616 self.offset = offset
617 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000618 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200619 self.name = getattr(fileobj, "name", None)
620 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000621
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000622 if blockinfo is None:
623 blockinfo = [(0, size)]
624
625 # Construct a map with data and zero blocks.
626 self.map_index = 0
627 self.map = []
628 lastpos = 0
629 realpos = self.offset
630 for offset, size in blockinfo:
631 if offset > lastpos:
632 self.map.append((False, lastpos, offset, None))
633 self.map.append((True, offset, offset + size, realpos))
634 realpos += size
635 lastpos = offset + size
636 if lastpos < self.size:
637 self.map.append((False, lastpos, self.size, None))
638
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200639 def flush(self):
640 pass
641
642 def readable(self):
643 return True
644
645 def writable(self):
646 return False
647
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000648 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000649 return self.fileobj.seekable()
650
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000651 def tell(self):
652 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000653 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000654 return self.position
655
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200656 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000657 """Seek to a position in the file.
658 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200659 if whence == io.SEEK_SET:
660 self.position = min(max(position, 0), self.size)
661 elif whence == io.SEEK_CUR:
662 if position < 0:
663 self.position = max(self.position + position, 0)
664 else:
665 self.position = min(self.position + position, self.size)
666 elif whence == io.SEEK_END:
667 self.position = max(min(self.size + position, self.size), 0)
668 else:
669 raise ValueError("Invalid argument")
670 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000671
672 def read(self, size=None):
673 """Read data from the file.
674 """
675 if size is None:
676 size = self.size - self.position
677 else:
678 size = min(size, self.size - self.position)
679
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000680 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000681 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000682 while True:
683 data, start, stop, offset = self.map[self.map_index]
684 if start <= self.position < stop:
685 break
686 else:
687 self.map_index += 1
688 if self.map_index == len(self.map):
689 self.map_index = 0
690 length = min(size, stop - self.position)
691 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000692 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200693 b = self.fileobj.read(length)
694 if len(b) != length:
695 raise ReadError("unexpected end of data")
696 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000697 else:
698 buf += NUL * length
699 size -= length
700 self.position += length
701 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000702
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200703 def readinto(self, b):
704 buf = self.read(len(b))
705 b[:len(buf)] = buf
706 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707
708 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000709 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200710#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000711
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200712class ExFileObject(io.BufferedReader):
713
714 def __init__(self, tarfile, tarinfo):
715 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
716 tarinfo.size, tarinfo.sparse)
717 super().__init__(fileobj)
718#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000719
720#------------------
721# Exported Classes
722#------------------
723class TarInfo(object):
724 """Informational class which holds the details about an
725 archive member given by a tar header block.
726 TarInfo objects are returned by TarFile.getmember(),
727 TarFile.getmembers() and TarFile.gettarinfo() and are
728 usually created internally.
729 """
730
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000731 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
732 "chksum", "type", "linkname", "uname", "gname",
733 "devmajor", "devminor",
734 "offset", "offset_data", "pax_headers", "sparse",
735 "tarfile", "_sparse_structs", "_link_target")
736
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000737 def __init__(self, name=""):
738 """Construct a TarInfo object. name is the optional name
739 of the member.
740 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000742 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000743 self.uid = 0 # user id
744 self.gid = 0 # group id
745 self.size = 0 # file size
746 self.mtime = 0 # modification time
747 self.chksum = 0 # header checksum
748 self.type = REGTYPE # member type
749 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000750 self.uname = "" # user name
751 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000752 self.devmajor = 0 # device major number
753 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000754
Thomas Wouters477c8d52006-05-27 19:21:47 +0000755 self.offset = 0 # the tar header starts here
756 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000757
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000758 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000759 self.pax_headers = {} # pax header information
760
761 # In pax headers the "name" and "linkname" field are called
762 # "path" and "linkpath".
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200763 @property
764 def path(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000765 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200767 @path.setter
768 def path(self, name):
769 self.name = name
770
771 @property
772 def linkpath(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000773 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200774
775 @linkpath.setter
776 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000778
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000779 def __repr__(self):
780 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
781
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000782 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783 """Return the TarInfo's attributes as a dictionary.
784 """
785 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000786 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000787 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788 "uid": self.uid,
789 "gid": self.gid,
790 "size": self.size,
791 "mtime": self.mtime,
792 "chksum": self.chksum,
793 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000794 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795 "uname": self.uname,
796 "gname": self.gname,
797 "devmajor": self.devmajor,
798 "devminor": self.devminor
799 }
800
801 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
802 info["name"] += "/"
803
804 return info
805
Victor Stinnerde629d42010-05-05 21:43:57 +0000806 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807 """Return a tar header as a string of 512 byte blocks.
808 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000809 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000810
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000812 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000814 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000815 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000816 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 else:
818 raise ValueError("invalid format")
819
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000820 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 """Return the object as a ustar header block.
822 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 info["magic"] = POSIX_MAGIC
824
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200825 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 raise ValueError("linkname is too long")
827
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200828 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
829 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000833 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834 """Return the object as a GNU header block sequence.
835 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 info["magic"] = GNU_MAGIC
837
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000838 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200839 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000840 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200842 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000843 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000845 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000847 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848 """Return the object as a ustar header block. If it cannot be
849 represented this way, prepend a pax extended header sequence
850 with supplement information.
851 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852 info["magic"] = POSIX_MAGIC
853 pax_headers = self.pax_headers.copy()
854
855 # Test string fields for values that exceed the field length or cannot
856 # be represented in ASCII encoding.
857 for name, hname, length in (
858 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
859 ("uname", "uname", 32), ("gname", "gname", 32)):
860
Guido van Rossume7ba4952007-06-06 23:52:48 +0000861 if hname in pax_headers:
862 # The pax header has priority.
863 continue
864
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865 # Try to encode the string as ASCII.
866 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000867 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000868 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000869 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870 continue
871
Guido van Rossume7ba4952007-06-06 23:52:48 +0000872 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000873 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874
875 # Test number fields for values that exceed the field limit or values
876 # that like to be stored as float.
877 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000878 if name in pax_headers:
879 # The pax header has priority. Avoid overflow.
880 info[name] = 0
881 continue
882
Guido van Rossumd8faa362007-04-27 19:54:29 +0000883 val = info[name]
884 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000885 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886 info[name] = 0
887
Guido van Rossume7ba4952007-06-06 23:52:48 +0000888 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000890 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000892 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000894 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895
896 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000897 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898 """Return the object as a pax global header block sequence.
899 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000900 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200902 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903 """Split a name longer than 100 chars into a prefix
904 and a name part.
905 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200906 components = name.split("/")
907 for i in range(1, len(components)):
908 prefix = "/".join(components[:i])
909 name = "/".join(components[i:])
910 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
911 len(name.encode(encoding, errors)) <= LENGTH_NAME:
912 break
913 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000914 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200915
Guido van Rossumd8faa362007-04-27 19:54:29 +0000916 return prefix, name
917
918 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000919 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920 """Return a header block. info is a dictionary with file
921 information, format must be one of the *_FORMAT constants.
922 """
923 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000924 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000925 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 itn(info.get("uid", 0), 8, format),
927 itn(info.get("gid", 0), 8, format),
928 itn(info.get("size", 0), 12, format),
929 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000930 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000932 stn(info.get("linkname", ""), 100, encoding, errors),
933 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000934 stn(info.get("uname", ""), 32, encoding, errors),
935 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 itn(info.get("devmajor", 0), 8, format),
937 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000938 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939 ]
940
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000941 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000942 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000943 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 return buf
945
946 @staticmethod
947 def _create_payload(payload):
948 """Return the string payload filled with zero bytes
949 up to the next 512 byte border.
950 """
951 blocks, remainder = divmod(len(payload), BLOCKSIZE)
952 if remainder > 0:
953 payload += (BLOCKSIZE - remainder) * NUL
954 return payload
955
956 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000957 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000958 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
959 for name.
960 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000961 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962
963 info = {}
964 info["name"] = "././@LongLink"
965 info["type"] = type
966 info["size"] = len(name)
967 info["magic"] = GNU_MAGIC
968
969 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000970 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000971 cls._create_payload(name)
972
973 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000974 def _create_pax_generic_header(cls, pax_headers, type, encoding):
975 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000977 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000978 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000979 # Check if one of the fields contains surrogate characters and thereby
980 # forces hdrcharset=BINARY, see _proc_pax() for more information.
981 binary = False
982 for keyword, value in pax_headers.items():
983 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000984 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000985 except UnicodeEncodeError:
986 binary = True
987 break
988
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000989 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000990 if binary:
991 # Put the hdrcharset field at the beginning of the header.
992 records += b"21 hdrcharset=BINARY\n"
993
Guido van Rossumd8faa362007-04-27 19:54:29 +0000994 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000995 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000996 if binary:
997 # Try to restore the original byte representation of `value'.
998 # Needless to say, that the encoding must match the string.
999 value = value.encode(encoding, "surrogateescape")
1000 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001001 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001002
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1004 n = p = 0
1005 while True:
1006 n = l + len(str(p))
1007 if n == p:
1008 break
1009 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001010 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011
1012 # We use a hardcoded "././@PaxHeader" name like star does
1013 # instead of the one that POSIX recommends.
1014 info = {}
1015 info["name"] = "././@PaxHeader"
1016 info["type"] = type
1017 info["size"] = len(records)
1018 info["magic"] = POSIX_MAGIC
1019
1020 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001021 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001022 cls._create_payload(records)
1023
Guido van Rossum75b64e62005-01-16 00:16:11 +00001024 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001025 def frombuf(cls, buf, encoding, errors):
1026 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001027 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001028 if len(buf) == 0:
1029 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001030 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001031 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001032 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001033 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001034
1035 chksum = nti(buf[148:156])
1036 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001037 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001038
Guido van Rossumd8faa362007-04-27 19:54:29 +00001039 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001040 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001041 obj.mode = nti(buf[100:108])
1042 obj.uid = nti(buf[108:116])
1043 obj.gid = nti(buf[116:124])
1044 obj.size = nti(buf[124:136])
1045 obj.mtime = nti(buf[136:148])
1046 obj.chksum = chksum
1047 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001048 obj.linkname = nts(buf[157:257], encoding, errors)
1049 obj.uname = nts(buf[265:297], encoding, errors)
1050 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001051 obj.devmajor = nti(buf[329:337])
1052 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001053 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001054
Guido van Rossumd8faa362007-04-27 19:54:29 +00001055 # Old V7 tar format represents a directory as a regular
1056 # file with a trailing slash.
1057 if obj.type == AREGTYPE and obj.name.endswith("/"):
1058 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001059
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001060 # The old GNU sparse format occupies some of the unused
1061 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001062 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001063 if obj.type == GNUTYPE_SPARSE:
1064 pos = 386
1065 structs = []
1066 for i in range(4):
1067 try:
1068 offset = nti(buf[pos:pos + 12])
1069 numbytes = nti(buf[pos + 12:pos + 24])
1070 except ValueError:
1071 break
1072 structs.append((offset, numbytes))
1073 pos += 24
1074 isextended = bool(buf[482])
1075 origsize = nti(buf[483:495])
1076 obj._sparse_structs = (structs, isextended, origsize)
1077
Guido van Rossumd8faa362007-04-27 19:54:29 +00001078 # Remove redundant slashes from directories.
1079 if obj.isdir():
1080 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001081
Guido van Rossumd8faa362007-04-27 19:54:29 +00001082 # Reconstruct a ustar longname.
1083 if prefix and obj.type not in GNU_TYPES:
1084 obj.name = prefix + "/" + obj.name
1085 return obj
1086
1087 @classmethod
1088 def fromtarfile(cls, tarfile):
1089 """Return the next TarInfo object from TarFile object
1090 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001092 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001093 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1095 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001096
Guido van Rossumd8faa362007-04-27 19:54:29 +00001097 #--------------------------------------------------------------------------
1098 # The following are methods that are called depending on the type of a
1099 # member. The entry point is _proc_member() which can be overridden in a
1100 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1101 # implement the following
1102 # operations:
1103 # 1. Set self.offset_data to the position where the data blocks begin,
1104 # if there is data that follows.
1105 # 2. Set tarfile.offset to the position where the next member's header will
1106 # begin.
1107 # 3. Return self or another valid TarInfo object.
1108 def _proc_member(self, tarfile):
1109 """Choose the right processing method depending on
1110 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001111 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001112 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1113 return self._proc_gnulong(tarfile)
1114 elif self.type == GNUTYPE_SPARSE:
1115 return self._proc_sparse(tarfile)
1116 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1117 return self._proc_pax(tarfile)
1118 else:
1119 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001120
Guido van Rossumd8faa362007-04-27 19:54:29 +00001121 def _proc_builtin(self, tarfile):
1122 """Process a builtin type or an unknown type which
1123 will be treated as a regular file.
1124 """
1125 self.offset_data = tarfile.fileobj.tell()
1126 offset = self.offset_data
1127 if self.isreg() or self.type not in SUPPORTED_TYPES:
1128 # Skip the following data blocks.
1129 offset += self._block(self.size)
1130 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001131
Guido van Rossume7ba4952007-06-06 23:52:48 +00001132 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001133 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001134 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001135
1136 return self
1137
1138 def _proc_gnulong(self, tarfile):
1139 """Process the blocks that hold a GNU longname
1140 or longlink member.
1141 """
1142 buf = tarfile.fileobj.read(self._block(self.size))
1143
1144 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001145 try:
1146 next = self.fromtarfile(tarfile)
1147 except HeaderError:
1148 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001149
1150 # Patch the TarInfo object from the next header with
1151 # the longname information.
1152 next.offset = self.offset
1153 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001154 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001155 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001156 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001157
1158 return next
1159
1160 def _proc_sparse(self, tarfile):
1161 """Process a GNU sparse header plus extra headers.
1162 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001163 # We already collected some sparse structures in frombuf().
1164 structs, isextended, origsize = self._sparse_structs
1165 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001166
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001167 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001168 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001169 buf = tarfile.fileobj.read(BLOCKSIZE)
1170 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001171 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172 try:
1173 offset = nti(buf[pos:pos + 12])
1174 numbytes = nti(buf[pos + 12:pos + 24])
1175 except ValueError:
1176 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001177 if offset and numbytes:
1178 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001180 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001181 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001182
1183 self.offset_data = tarfile.fileobj.tell()
1184 tarfile.offset = self.offset_data + self._block(self.size)
1185 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001186 return self
1187
1188 def _proc_pax(self, tarfile):
1189 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001190 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001191 """
1192 # Read the header information.
1193 buf = tarfile.fileobj.read(self._block(self.size))
1194
1195 # A pax header stores supplemental information for either
1196 # the following file (extended) or all following files
1197 # (global).
1198 if self.type == XGLTYPE:
1199 pax_headers = tarfile.pax_headers
1200 else:
1201 pax_headers = tarfile.pax_headers.copy()
1202
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001203 # Check if the pax header contains a hdrcharset field. This tells us
1204 # the encoding of the path, linkpath, uname and gname fields. Normally,
1205 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1206 # implementations are allowed to store them as raw binary strings if
1207 # the translation to UTF-8 fails.
1208 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1209 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001210 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001211
1212 # For the time being, we don't care about anything other than "BINARY".
1213 # The only other value that is currently allowed by the standard is
1214 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1215 hdrcharset = pax_headers.get("hdrcharset")
1216 if hdrcharset == "BINARY":
1217 encoding = tarfile.encoding
1218 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001219 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001220
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 # Parse pax header information. A record looks like that:
1222 # "%d %s=%s\n" % (length, keyword, value). length is the size
1223 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001224 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001225 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001226 pos = 0
1227 while True:
1228 match = regex.match(buf, pos)
1229 if not match:
1230 break
1231
1232 length, keyword = match.groups()
1233 length = int(length)
1234 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1235
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001236 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001237 # as the error handler, but we better not take the risk. For
1238 # example, GNU tar <= 1.23 is known to store filenames it cannot
1239 # translate to UTF-8 as raw strings (unfortunately without a
1240 # hdrcharset=BINARY header).
1241 # We first try the strict standard encoding, and if that fails we
1242 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001243 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001244 tarfile.errors)
1245 if keyword in PAX_NAME_FIELDS:
1246 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1247 tarfile.errors)
1248 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001249 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001250 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001251
1252 pax_headers[keyword] = value
1253 pos += length
1254
Guido van Rossume7ba4952007-06-06 23:52:48 +00001255 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001256 try:
1257 next = self.fromtarfile(tarfile)
1258 except HeaderError:
1259 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001260
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001261 # Process GNU sparse information.
1262 if "GNU.sparse.map" in pax_headers:
1263 # GNU extended sparse format version 0.1.
1264 self._proc_gnusparse_01(next, pax_headers)
1265
1266 elif "GNU.sparse.size" in pax_headers:
1267 # GNU extended sparse format version 0.0.
1268 self._proc_gnusparse_00(next, pax_headers, buf)
1269
1270 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1271 # GNU extended sparse format version 1.0.
1272 self._proc_gnusparse_10(next, pax_headers, tarfile)
1273
Guido van Rossume7ba4952007-06-06 23:52:48 +00001274 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001275 # Patch the TarInfo object with the extended header info.
1276 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1277 next.offset = self.offset
1278
1279 if "size" in pax_headers:
1280 # If the extended header replaces the size field,
1281 # we need to recalculate the offset where the next
1282 # header starts.
1283 offset = next.offset_data
1284 if next.isreg() or next.type not in SUPPORTED_TYPES:
1285 offset += next._block(next.size)
1286 tarfile.offset = offset
1287
1288 return next
1289
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001290 def _proc_gnusparse_00(self, next, pax_headers, buf):
1291 """Process a GNU tar extended sparse header, version 0.0.
1292 """
1293 offsets = []
1294 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1295 offsets.append(int(match.group(1)))
1296 numbytes = []
1297 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1298 numbytes.append(int(match.group(1)))
1299 next.sparse = list(zip(offsets, numbytes))
1300
1301 def _proc_gnusparse_01(self, next, pax_headers):
1302 """Process a GNU tar extended sparse header, version 0.1.
1303 """
1304 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1305 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1306
1307 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1308 """Process a GNU tar extended sparse header, version 1.0.
1309 """
1310 fields = None
1311 sparse = []
1312 buf = tarfile.fileobj.read(BLOCKSIZE)
1313 fields, buf = buf.split(b"\n", 1)
1314 fields = int(fields)
1315 while len(sparse) < fields * 2:
1316 if b"\n" not in buf:
1317 buf += tarfile.fileobj.read(BLOCKSIZE)
1318 number, buf = buf.split(b"\n", 1)
1319 sparse.append(int(number))
1320 next.offset_data = tarfile.fileobj.tell()
1321 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1322
Guido van Rossume7ba4952007-06-06 23:52:48 +00001323 def _apply_pax_info(self, pax_headers, encoding, errors):
1324 """Replace fields with supplemental information from a previous
1325 pax extended or global header.
1326 """
1327 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001328 if keyword == "GNU.sparse.name":
1329 setattr(self, "path", value)
1330 elif keyword == "GNU.sparse.size":
1331 setattr(self, "size", int(value))
1332 elif keyword == "GNU.sparse.realsize":
1333 setattr(self, "size", int(value))
1334 elif keyword in PAX_FIELDS:
1335 if keyword in PAX_NUMBER_FIELDS:
1336 try:
1337 value = PAX_NUMBER_FIELDS[keyword](value)
1338 except ValueError:
1339 value = 0
1340 if keyword == "path":
1341 value = value.rstrip("/")
1342 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001343
1344 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001345
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001346 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1347 """Decode a single field from a pax record.
1348 """
1349 try:
1350 return value.decode(encoding, "strict")
1351 except UnicodeDecodeError:
1352 return value.decode(fallback_encoding, fallback_errors)
1353
Guido van Rossumd8faa362007-04-27 19:54:29 +00001354 def _block(self, count):
1355 """Round up a byte count by BLOCKSIZE and return it,
1356 e.g. _block(834) => 1024.
1357 """
1358 blocks, remainder = divmod(count, BLOCKSIZE)
1359 if remainder:
1360 blocks += 1
1361 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001362
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001363 def isreg(self):
1364 return self.type in REGULAR_TYPES
1365 def isfile(self):
1366 return self.isreg()
1367 def isdir(self):
1368 return self.type == DIRTYPE
1369 def issym(self):
1370 return self.type == SYMTYPE
1371 def islnk(self):
1372 return self.type == LNKTYPE
1373 def ischr(self):
1374 return self.type == CHRTYPE
1375 def isblk(self):
1376 return self.type == BLKTYPE
1377 def isfifo(self):
1378 return self.type == FIFOTYPE
1379 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001380 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001381 def isdev(self):
1382 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1383# class TarInfo
1384
1385class TarFile(object):
1386 """The TarFile Class provides an interface to tar archives.
1387 """
1388
1389 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1390
1391 dereference = False # If true, add content of linked file to the
1392 # tar file, else the link.
1393
1394 ignore_zeros = False # If true, skips empty or invalid blocks and
1395 # continues processing.
1396
Lars Gustäbel365aff32009-12-13 11:42:29 +00001397 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001398 # messages (if debug >= 0). If > 0, errors
1399 # are passed to the caller as exceptions.
1400
Guido van Rossumd8faa362007-04-27 19:54:29 +00001401 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001402
Guido van Rossume7ba4952007-06-06 23:52:48 +00001403 encoding = ENCODING # Encoding for 8-bit character strings.
1404
1405 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001406
Guido van Rossumd8faa362007-04-27 19:54:29 +00001407 tarinfo = TarInfo # The default TarInfo class to use.
1408
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001409 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001410
1411 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1412 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001413 errors="surrogateescape", pax_headers=None, debug=None,
1414 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001415 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1416 read from an existing archive, 'a' to append data to an existing
1417 file or 'w' to create a new file overwriting an existing one. `mode'
1418 defaults to 'r'.
1419 If `fileobj' is given, it is used for reading or writing data. If it
1420 can be determined, `mode' is overridden by `fileobj's mode.
1421 `fileobj' is not closed, when TarFile is closed.
1422 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001423 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001424 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001425 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001426 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001427 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001428
1429 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001430 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001431 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001432 self.mode = "w"
1433 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001434 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001435 self._extfileobj = False
1436 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001437 if (name is None and hasattr(fileobj, "name") and
1438 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001439 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001440 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001441 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001443 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001444 self.fileobj = fileobj
1445
Guido van Rossumd8faa362007-04-27 19:54:29 +00001446 # Init attributes.
1447 if format is not None:
1448 self.format = format
1449 if tarinfo is not None:
1450 self.tarinfo = tarinfo
1451 if dereference is not None:
1452 self.dereference = dereference
1453 if ignore_zeros is not None:
1454 self.ignore_zeros = ignore_zeros
1455 if encoding is not None:
1456 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001457 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001458
1459 if pax_headers is not None and self.format == PAX_FORMAT:
1460 self.pax_headers = pax_headers
1461 else:
1462 self.pax_headers = {}
1463
Guido van Rossumd8faa362007-04-27 19:54:29 +00001464 if debug is not None:
1465 self.debug = debug
1466 if errorlevel is not None:
1467 self.errorlevel = errorlevel
1468
1469 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001470 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001471 self.closed = False
1472 self.members = [] # list of members as TarInfo objects
1473 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001474 self.offset = self.fileobj.tell()
1475 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001476 self.inodes = {} # dictionary caching the inodes of
1477 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001478
Lars Gustäbel7b465392009-11-18 20:29:25 +00001479 try:
1480 if self.mode == "r":
1481 self.firstmember = None
1482 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001483
Lars Gustäbel7b465392009-11-18 20:29:25 +00001484 if self.mode == "a":
1485 # Move to the end of the archive,
1486 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001487 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001488 self.fileobj.seek(self.offset)
1489 try:
1490 tarinfo = self.tarinfo.fromtarfile(self)
1491 self.members.append(tarinfo)
1492 except EOFHeaderError:
1493 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001494 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001495 except HeaderError as e:
1496 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001497
Lars Gustäbel20703c62015-05-27 12:53:44 +02001498 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001499 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001500
Lars Gustäbel7b465392009-11-18 20:29:25 +00001501 if self.pax_headers:
1502 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1503 self.fileobj.write(buf)
1504 self.offset += len(buf)
1505 except:
1506 if not self._extfileobj:
1507 self.fileobj.close()
1508 self.closed = True
1509 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001510
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511 #--------------------------------------------------------------------------
1512 # Below are the classmethods which act as alternate constructors to the
1513 # TarFile class. The open() method is the only one that is needed for
1514 # public use; it is the "super"-constructor and is able to select an
1515 # adequate "sub"-constructor for a particular compression using the mapping
1516 # from OPEN_METH.
1517 #
1518 # This concept allows one to subclass TarFile without losing the comfort of
1519 # the super-constructor. A sub-constructor is registered and made available
1520 # by adding it to the mapping in OPEN_METH.
1521
Guido van Rossum75b64e62005-01-16 00:16:11 +00001522 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001523 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001524 """Open a tar archive for reading, writing or appending. Return
1525 an appropriate TarFile class.
1526
1527 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001528 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529 'r:' open for reading exclusively uncompressed
1530 'r:gz' open for reading with gzip compression
1531 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001532 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001533 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 'w' or 'w:' open for writing without compression
1535 'w:gz' open for writing with gzip compression
1536 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001537 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001538
Berker Peksag0fe63252015-02-13 21:02:12 +02001539 'x' or 'x:' create a tarfile exclusively without compression, raise
1540 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001541 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001542 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001543 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001544 if the file is already created
1545 'x:xz' create an lzma compressed tarfile, raise an exception
1546 if the file is already created
1547
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001548 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001549 'r|' open an uncompressed stream of tar blocks for reading
1550 'r|gz' open a gzip compressed stream of tar blocks
1551 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001552 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001553 'w|' open an uncompressed stream for writing
1554 'w|gz' open a gzip compressed stream for writing
1555 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001556 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001557 """
1558
1559 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001561
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001562 if mode in ("r", "r:*"):
1563 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001564 def not_compressed(comptype):
1565 return cls.OPEN_METH[comptype] == 'taropen'
1566 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001567 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001568 if fileobj is not None:
1569 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001570 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001571 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001572 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001573 if fileobj is not None:
1574 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001575 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001576 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001577
1578 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579 filemode, comptype = mode.split(":", 1)
1580 filemode = filemode or "r"
1581 comptype = comptype or "tar"
1582
1583 # Select the *open() function according to
1584 # given compression.
1585 if comptype in cls.OPEN_METH:
1586 func = getattr(cls, cls.OPEN_METH[comptype])
1587 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001589 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001590
1591 elif "|" in mode:
1592 filemode, comptype = mode.split("|", 1)
1593 filemode = filemode or "r"
1594 comptype = comptype or "tar"
1595
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001596 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001597 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598
Antoine Pitrou605c2932010-09-23 20:15:14 +00001599 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1600 try:
1601 t = cls(name, filemode, stream, **kwargs)
1602 except:
1603 stream.close()
1604 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605 t._extfileobj = False
1606 return t
1607
Berker Peksag0fe63252015-02-13 21:02:12 +02001608 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001609 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001610
Thomas Wouters477c8d52006-05-27 19:21:47 +00001611 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612
Guido van Rossum75b64e62005-01-16 00:16:11 +00001613 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001614 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001615 """Open uncompressed tar archive name for reading or writing.
1616 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001617 if mode not in ("r", "a", "w", "x"):
1618 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001619 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001620
Guido van Rossum75b64e62005-01-16 00:16:11 +00001621 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001622 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001623 """Open gzip compressed tar archive name for reading or writing.
1624 Appending is not allowed.
1625 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001626 if mode not in ("r", "w", "x"):
1627 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628
1629 try:
1630 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001631 gzip.GzipFile
1632 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001633 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001636 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001637 except OSError:
1638 if fileobj is not None and mode == 'r':
1639 raise ReadError("not a gzip file")
1640 raise
1641
1642 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001643 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001644 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001645 fileobj.close()
1646 if mode == 'r':
1647 raise ReadError("not a gzip file")
1648 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001649 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001650 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001651 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001652 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 return t
1654
Guido van Rossum75b64e62005-01-16 00:16:11 +00001655 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001656 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001657 """Open bzip2 compressed tar archive name for reading or writing.
1658 Appending is not allowed.
1659 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001660 if mode not in ("r", "w", "x"):
1661 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662
1663 try:
1664 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001665 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001666 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001667
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001668 fileobj = bz2.BZ2File(fileobj or name, mode,
1669 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670
1671 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001672 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001673 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001674 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001675 if mode == 'r':
1676 raise ReadError("not a bzip2 file")
1677 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001678 except:
1679 fileobj.close()
1680 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681 t._extfileobj = False
1682 return t
1683
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001684 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001685 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001686 """Open lzma compressed tar archive name for reading or writing.
1687 Appending is not allowed.
1688 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001689 if mode not in ("r", "w", "x"):
1690 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001691
1692 try:
1693 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001694 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001695 raise CompressionError("lzma module is not available")
1696
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001697 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001698
1699 try:
1700 t = cls.taropen(name, mode, fileobj, **kwargs)
1701 except (lzma.LZMAError, EOFError):
1702 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001703 if mode == 'r':
1704 raise ReadError("not an lzma file")
1705 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001706 except:
1707 fileobj.close()
1708 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001709 t._extfileobj = False
1710 return t
1711
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001712 # All *open() methods are registered here.
1713 OPEN_METH = {
1714 "tar": "taropen", # uncompressed tar
1715 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001716 "bz2": "bz2open", # bzip2 compressed tar
1717 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001718 }
1719
1720 #--------------------------------------------------------------------------
1721 # The public methods which TarFile provides:
1722
1723 def close(self):
1724 """Close the TarFile. In write-mode, two finishing zero blocks are
1725 appended to the archive.
1726 """
1727 if self.closed:
1728 return
1729
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001731 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001732 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001733 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1734 self.offset += (BLOCKSIZE * 2)
1735 # fill up the end with zero-blocks
1736 # (like option -b20 for tar does)
1737 blocks, remainder = divmod(self.offset, RECORDSIZE)
1738 if remainder > 0:
1739 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1740 finally:
1741 if not self._extfileobj:
1742 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743
1744 def getmember(self, name):
1745 """Return a TarInfo object for member `name'. If `name' can not be
1746 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001747 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 most up-to-date version.
1749 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001750 tarinfo = self._getmember(name)
1751 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001752 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001753 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001754
1755 def getmembers(self):
1756 """Return the members of the archive as a list of TarInfo objects. The
1757 list has the same order as the members in the archive.
1758 """
1759 self._check()
1760 if not self._loaded: # if we want to obtain a list of
1761 self._load() # all members, we first have to
1762 # scan the whole archive.
1763 return self.members
1764
1765 def getnames(self):
1766 """Return the members of the archive as a list of their names. It has
1767 the same order as the list returned by getmembers().
1768 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001769 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770
1771 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001772 """Create a TarInfo object from the result of os.stat or equivalent
1773 on an existing file. The file is either named by `name', or
1774 specified as a file object `fileobj' with a file descriptor. If
1775 given, `arcname' specifies an alternative name for the file in the
1776 archive, otherwise, the name is taken from the 'name' attribute of
1777 'fileobj', or the 'name' argument. The name should be a text
1778 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001779 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001780 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781
1782 # When fileobj is given, replace name by
1783 # fileobj's real name.
1784 if fileobj is not None:
1785 name = fileobj.name
1786
1787 # Building the name of the member in the archive.
1788 # Backward slashes are converted to forward slashes,
1789 # Absolute paths are turned to relative paths.
1790 if arcname is None:
1791 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001793 arcname = arcname.replace(os.sep, "/")
1794 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001795
1796 # Now, fill the TarInfo object with
1797 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001798 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001799 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001800
1801 # Use os.stat or os.lstat, depending on platform
1802 # and if symlinks shall be resolved.
1803 if fileobj is None:
1804 if hasattr(os, "lstat") and not self.dereference:
1805 statres = os.lstat(name)
1806 else:
1807 statres = os.stat(name)
1808 else:
1809 statres = os.fstat(fileobj.fileno())
1810 linkname = ""
1811
1812 stmd = statres.st_mode
1813 if stat.S_ISREG(stmd):
1814 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001815 if not self.dereference and statres.st_nlink > 1 and \
1816 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817 # Is it a hardlink to an already
1818 # archived file?
1819 type = LNKTYPE
1820 linkname = self.inodes[inode]
1821 else:
1822 # The inode is added only if its valid.
1823 # For win32 it is always 0.
1824 type = REGTYPE
1825 if inode[0]:
1826 self.inodes[inode] = arcname
1827 elif stat.S_ISDIR(stmd):
1828 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001829 elif stat.S_ISFIFO(stmd):
1830 type = FIFOTYPE
1831 elif stat.S_ISLNK(stmd):
1832 type = SYMTYPE
1833 linkname = os.readlink(name)
1834 elif stat.S_ISCHR(stmd):
1835 type = CHRTYPE
1836 elif stat.S_ISBLK(stmd):
1837 type = BLKTYPE
1838 else:
1839 return None
1840
1841 # Fill the TarInfo object with all
1842 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001843 tarinfo.name = arcname
1844 tarinfo.mode = stmd
1845 tarinfo.uid = statres.st_uid
1846 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001847 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001848 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001849 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001850 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001851 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001852 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001853 tarinfo.linkname = linkname
1854 if pwd:
1855 try:
1856 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1857 except KeyError:
1858 pass
1859 if grp:
1860 try:
1861 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1862 except KeyError:
1863 pass
1864
1865 if type in (CHRTYPE, BLKTYPE):
1866 if hasattr(os, "major") and hasattr(os, "minor"):
1867 tarinfo.devmajor = os.major(statres.st_rdev)
1868 tarinfo.devminor = os.minor(statres.st_rdev)
1869 return tarinfo
1870
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001871 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001872 """Print a table of contents to sys.stdout. If `verbose' is False, only
1873 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001874 output is produced. `members' is optional and must be a subset of the
1875 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 """
1877 self._check()
1878
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001879 if members is None:
1880 members = self
1881 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001883 _safe_print(stat.filemode(tarinfo.mode))
1884 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1885 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001887 _safe_print("%10s" %
1888 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001889 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001890 _safe_print("%10d" % tarinfo.size)
1891 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1892 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001894 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895
1896 if verbose:
1897 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001898 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001900 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001901 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001902
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001903 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904 """Add the file `name' to the archive. `name' may be any type of file
1905 (directory, fifo, symbolic link, etc.). If given, `arcname'
1906 specifies an alternative name for the file in the archive.
1907 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001908 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001909 that expects a TarInfo object argument and returns the changed
1910 TarInfo object, if it returns None the TarInfo object will be
1911 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001913 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914
1915 if arcname is None:
1916 arcname = name
1917
1918 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001919 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920 self._dbg(2, "tarfile: Skipped %r" % name)
1921 return
1922
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 self._dbg(1, name)
1924
1925 # Create a TarInfo object from the file.
1926 tarinfo = self.gettarinfo(name, arcname)
1927
1928 if tarinfo is None:
1929 self._dbg(1, "tarfile: Unsupported type %r" % name)
1930 return
1931
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001932 # Change or exclude the TarInfo object.
1933 if filter is not None:
1934 tarinfo = filter(tarinfo)
1935 if tarinfo is None:
1936 self._dbg(2, "tarfile: Excluded %r" % name)
1937 return
1938
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 # Append the tar header and data to the archive.
1940 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001941 with bltn_open(name, "rb") as f:
1942 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001944 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945 self.addfile(tarinfo)
1946 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001947 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001948 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001949 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001951 else:
1952 self.addfile(tarinfo)
1953
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001954 def addfile(self, tarinfo, fileobj=None):
1955 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001956 given, it should be a binary file, and tarinfo.size bytes are read
1957 from it and added to the archive. You can create TarInfo objects
1958 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001959 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001960 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961
Thomas Wouters89f507f2006-12-13 04:49:30 +00001962 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963
Guido van Rossume7ba4952007-06-06 23:52:48 +00001964 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001965 self.fileobj.write(buf)
1966 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001967 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968 # If there's data to follow, append it.
1969 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001970 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001971 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1972 if remainder > 0:
1973 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1974 blocks += 1
1975 self.offset += blocks * BLOCKSIZE
1976
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001977 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978
Eric V. Smith7a803892015-04-15 10:27:58 -04001979 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001980 """Extract all members from the archive to the current working
1981 directory and set owner, modification time and permissions on
1982 directories afterwards. `path' specifies a different directory
1983 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001984 list returned by getmembers(). If `numeric_owner` is True, only
1985 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001986 """
1987 directories = []
1988
1989 if members is None:
1990 members = self
1991
1992 for tarinfo in members:
1993 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001994 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001995 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001996 tarinfo = copy.copy(tarinfo)
1997 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001998 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04001999 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2000 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002001
2002 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002003 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002004 directories.reverse()
2005
2006 # Set correct owner, mtime and filemode on directories.
2007 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002008 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002009 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002010 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002011 self.utime(tarinfo, dirpath)
2012 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002013 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002014 if self.errorlevel > 1:
2015 raise
2016 else:
2017 self._dbg(1, "tarfile: %s" % e)
2018
Eric V. Smith7a803892015-04-15 10:27:58 -04002019 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 """Extract a member from the archive to the current working directory,
2021 using its full name. Its file information is extracted as accurately
2022 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002023 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002024 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2025 is True, only the numbers for user/group names are used and not
2026 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002027 """
2028 self._check("r")
2029
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002030 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002031 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002032 else:
2033 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034
Neal Norwitza4f651a2004-07-20 22:07:44 +00002035 # Prepare the link target for makelink().
2036 if tarinfo.islnk():
2037 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2038
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002040 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002041 set_attrs=set_attrs,
2042 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002043 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002044 if self.errorlevel > 0:
2045 raise
2046 else:
2047 if e.filename is None:
2048 self._dbg(1, "tarfile: %s" % e.strerror)
2049 else:
2050 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002051 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 if self.errorlevel > 1:
2053 raise
2054 else:
2055 self._dbg(1, "tarfile: %s" % e)
2056
2057 def extractfile(self, member):
2058 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002059 a filename or a TarInfo object. If `member' is a regular file or a
2060 link, an io.BufferedReader object is returned. Otherwise, None is
2061 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 """
2063 self._check("r")
2064
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002065 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002067 else:
2068 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002070 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2071 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002072 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002073
2074 elif tarinfo.islnk() or tarinfo.issym():
2075 if isinstance(self.fileobj, _Stream):
2076 # A small but ugly workaround for the case that someone tries
2077 # to extract a (sym)link as a file-object from a non-seekable
2078 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002079 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002081 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002082 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083 else:
2084 # If there's no data associated with the member (directory, chrdev,
2085 # blkdev, etc.), return None instead of a file object.
2086 return None
2087
Eric V. Smith7a803892015-04-15 10:27:58 -04002088 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2089 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002090 """Extract the TarInfo object tarinfo to a physical
2091 file called targetpath.
2092 """
2093 # Fetch the TarInfo object for the given name
2094 # and build the destination pathname, replacing
2095 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002096 targetpath = targetpath.rstrip("/")
2097 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098
2099 # Create all upper directories.
2100 upperdirs = os.path.dirname(targetpath)
2101 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002102 # Create directories that are not part of the archive with
2103 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002104 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002105
2106 if tarinfo.islnk() or tarinfo.issym():
2107 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2108 else:
2109 self._dbg(1, tarinfo.name)
2110
2111 if tarinfo.isreg():
2112 self.makefile(tarinfo, targetpath)
2113 elif tarinfo.isdir():
2114 self.makedir(tarinfo, targetpath)
2115 elif tarinfo.isfifo():
2116 self.makefifo(tarinfo, targetpath)
2117 elif tarinfo.ischr() or tarinfo.isblk():
2118 self.makedev(tarinfo, targetpath)
2119 elif tarinfo.islnk() or tarinfo.issym():
2120 self.makelink(tarinfo, targetpath)
2121 elif tarinfo.type not in SUPPORTED_TYPES:
2122 self.makeunknown(tarinfo, targetpath)
2123 else:
2124 self.makefile(tarinfo, targetpath)
2125
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002126 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002127 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002128 if not tarinfo.issym():
2129 self.chmod(tarinfo, targetpath)
2130 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002131
2132 #--------------------------------------------------------------------------
2133 # Below are the different file methods. They are called via
2134 # _extract_member() when extract() is called. They can be replaced in a
2135 # subclass to implement other functionality.
2136
2137 def makedir(self, tarinfo, targetpath):
2138 """Make a directory called targetpath.
2139 """
2140 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002141 # Use a safe mode for the directory, the real mode is set
2142 # later in _extract_member().
2143 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002144 except FileExistsError:
2145 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002146
2147 def makefile(self, tarinfo, targetpath):
2148 """Make a file called targetpath.
2149 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002150 source = self.fileobj
2151 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002152 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002153 with bltn_open(targetpath, "wb") as target:
2154 if tarinfo.sparse is not None:
2155 for offset, size in tarinfo.sparse:
2156 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002157 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002158 target.seek(tarinfo.size)
2159 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002160 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002161 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002162
2163 def makeunknown(self, tarinfo, targetpath):
2164 """Make a file from a TarInfo object with an unknown type
2165 at targetpath.
2166 """
2167 self.makefile(tarinfo, targetpath)
2168 self._dbg(1, "tarfile: Unknown file type %r, " \
2169 "extracted as regular file." % tarinfo.type)
2170
2171 def makefifo(self, tarinfo, targetpath):
2172 """Make a fifo called targetpath.
2173 """
2174 if hasattr(os, "mkfifo"):
2175 os.mkfifo(targetpath)
2176 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002177 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002178
2179 def makedev(self, tarinfo, targetpath):
2180 """Make a character or block device called targetpath.
2181 """
2182 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002183 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002184
2185 mode = tarinfo.mode
2186 if tarinfo.isblk():
2187 mode |= stat.S_IFBLK
2188 else:
2189 mode |= stat.S_IFCHR
2190
2191 os.mknod(targetpath, mode,
2192 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2193
2194 def makelink(self, tarinfo, targetpath):
2195 """Make a (symbolic) link called targetpath. If it cannot be created
2196 (platform limitation), we try to make a copy of the referenced file
2197 instead of a link.
2198 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002199 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002200 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002201 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002202 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002203 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002204 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002205 if os.path.exists(tarinfo._link_target):
2206 os.link(tarinfo._link_target, targetpath)
2207 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002208 self._extract_member(self._find_link_target(tarinfo),
2209 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002210 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002211 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002212 self._extract_member(self._find_link_target(tarinfo),
2213 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002214 except KeyError:
2215 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002216
Eric V. Smith7a803892015-04-15 10:27:58 -04002217 def chown(self, tarinfo, targetpath, numeric_owner):
2218 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002219 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2220 is False, fall back to .gid/.uid when the search based on name
2221 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002222 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002223 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002224 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002225 g = tarinfo.gid
2226 u = tarinfo.uid
2227 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002228 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002229 if grp:
2230 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002231 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002232 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002233 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002234 if pwd:
2235 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002236 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002237 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002238 try:
2239 if tarinfo.issym() and hasattr(os, "lchown"):
2240 os.lchown(targetpath, u, g)
2241 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002242 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002243 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002244 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002245
2246 def chmod(self, tarinfo, targetpath):
2247 """Set file permissions of targetpath according to tarinfo.
2248 """
Jack Jansen834eff62003-03-07 12:47:06 +00002249 if hasattr(os, 'chmod'):
2250 try:
2251 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002252 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002253 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002254
2255 def utime(self, tarinfo, targetpath):
2256 """Set modification time of targetpath according to tarinfo.
2257 """
Jack Jansen834eff62003-03-07 12:47:06 +00002258 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002259 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002260 try:
2261 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002262 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002263 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002264
2265 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002266 def next(self):
2267 """Return the next member of the archive as a TarInfo object, when
2268 TarFile is opened for reading. Return None if there is no more
2269 available.
2270 """
2271 self._check("ra")
2272 if self.firstmember is not None:
2273 m = self.firstmember
2274 self.firstmember = None
2275 return m
2276
Lars Gustäbel03572682015-07-06 09:27:24 +02002277 # Advance the file pointer.
2278 if self.offset != self.fileobj.tell():
2279 self.fileobj.seek(self.offset - 1)
2280 if not self.fileobj.read(1):
2281 raise ReadError("unexpected end of data")
2282
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002283 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002284 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002286 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002287 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002288 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002289 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002290 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291 self.offset += BLOCKSIZE
2292 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002293 except InvalidHeaderError as e:
2294 if self.ignore_zeros:
2295 self._dbg(2, "0x%X: %s" % (self.offset, e))
2296 self.offset += BLOCKSIZE
2297 continue
2298 elif self.offset == 0:
2299 raise ReadError(str(e))
2300 except EmptyHeaderError:
2301 if self.offset == 0:
2302 raise ReadError("empty file")
2303 except TruncatedHeaderError as e:
2304 if self.offset == 0:
2305 raise ReadError(str(e))
2306 except SubsequentHeaderError as e:
2307 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002308 break
2309
Lars Gustäbel9520a432009-11-22 18:48:49 +00002310 if tarinfo is not None:
2311 self.members.append(tarinfo)
2312 else:
2313 self._loaded = True
2314
Thomas Wouters477c8d52006-05-27 19:21:47 +00002315 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002316
2317 #--------------------------------------------------------------------------
2318 # Little helper methods:
2319
Lars Gustäbel1b512722010-06-03 12:45:16 +00002320 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002321 """Find an archive member by name from bottom to top.
2322 If tarinfo is given, it is used as the starting point.
2323 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002324 # Ensure that all members have been loaded.
2325 members = self.getmembers()
2326
Lars Gustäbel1b512722010-06-03 12:45:16 +00002327 # Limit the member search list up to tarinfo.
2328 if tarinfo is not None:
2329 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002330
Lars Gustäbel1b512722010-06-03 12:45:16 +00002331 if normalize:
2332 name = os.path.normpath(name)
2333
2334 for member in reversed(members):
2335 if normalize:
2336 member_name = os.path.normpath(member.name)
2337 else:
2338 member_name = member.name
2339
2340 if name == member_name:
2341 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002342
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002343 def _load(self):
2344 """Read through the entire archive file and look for readable
2345 members.
2346 """
2347 while True:
2348 tarinfo = self.next()
2349 if tarinfo is None:
2350 break
2351 self._loaded = True
2352
2353 def _check(self, mode=None):
2354 """Check if TarFile is still open, and if the operation's mode
2355 corresponds to TarFile's mode.
2356 """
2357 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002358 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002359 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002360 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002361
Lars Gustäbel1b512722010-06-03 12:45:16 +00002362 def _find_link_target(self, tarinfo):
2363 """Find the target member of a symlink or hardlink member in the
2364 archive.
2365 """
2366 if tarinfo.issym():
2367 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002368 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002369 limit = None
2370 else:
2371 # Search the archive before the link, because a hard link is
2372 # just a reference to an already archived file.
2373 linkname = tarinfo.linkname
2374 limit = tarinfo
2375
2376 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2377 if member is None:
2378 raise KeyError("linkname %r not found" % linkname)
2379 return member
2380
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002381 def __iter__(self):
2382 """Provide an iterator object.
2383 """
2384 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002385 yield from self.members
2386 return
2387
2388 # Yield items using TarFile's next() method.
2389 # When all members have been read, set TarFile as _loaded.
2390 index = 0
2391 # Fix for SF #1100429: Under rare circumstances it can
2392 # happen that getmembers() is called during iteration,
2393 # which will have already exhausted the next() method.
2394 if self.firstmember is not None:
2395 tarinfo = self.next()
2396 index += 1
2397 yield tarinfo
2398
2399 while True:
2400 if index < len(self.members):
2401 tarinfo = self.members[index]
2402 elif not self._loaded:
2403 tarinfo = self.next()
2404 if not tarinfo:
2405 self._loaded = True
2406 return
2407 else:
2408 return
2409 index += 1
2410 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002411
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002412 def _dbg(self, level, msg):
2413 """Write debugging output to sys.stderr.
2414 """
2415 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002416 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002417
2418 def __enter__(self):
2419 self._check()
2420 return self
2421
2422 def __exit__(self, type, value, traceback):
2423 if type is None:
2424 self.close()
2425 else:
2426 # An exception occurred. We must not call close() because
2427 # it would try to write end-of-archive blocks and padding.
2428 if not self._extfileobj:
2429 self.fileobj.close()
2430 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002431
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002432#--------------------
2433# exported functions
2434#--------------------
2435def is_tarfile(name):
2436 """Return True if name points to a tar archive that we
2437 are able to handle, else return False.
2438 """
2439 try:
2440 t = open(name)
2441 t.close()
2442 return True
2443 except TarError:
2444 return False
2445
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002446open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002447
2448
2449def main():
2450 import argparse
2451
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002452 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002453 parser = argparse.ArgumentParser(description=description)
2454 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2455 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002456 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002457 group.add_argument('-l', '--list', metavar='<tarfile>',
2458 help='Show listing of a tarfile')
2459 group.add_argument('-e', '--extract', nargs='+',
2460 metavar=('<tarfile>', '<output_dir>'),
2461 help='Extract tarfile into target dir')
2462 group.add_argument('-c', '--create', nargs='+',
2463 metavar=('<name>', '<file>'),
2464 help='Create tarfile from sources')
2465 group.add_argument('-t', '--test', metavar='<tarfile>',
2466 help='Test if a tarfile is valid')
2467 args = parser.parse_args()
2468
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002469 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002470 src = args.test
2471 if is_tarfile(src):
2472 with open(src, 'r') as tar:
2473 tar.getmembers()
2474 print(tar.getmembers(), file=sys.stderr)
2475 if args.verbose:
2476 print('{!r} is a tar archive.'.format(src))
2477 else:
2478 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2479
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002480 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002481 src = args.list
2482 if is_tarfile(src):
2483 with TarFile.open(src, 'r:*') as tf:
2484 tf.list(verbose=args.verbose)
2485 else:
2486 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2487
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002488 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002489 if len(args.extract) == 1:
2490 src = args.extract[0]
2491 curdir = os.curdir
2492 elif len(args.extract) == 2:
2493 src, curdir = args.extract
2494 else:
2495 parser.exit(1, parser.format_help())
2496
2497 if is_tarfile(src):
2498 with TarFile.open(src, 'r:*') as tf:
2499 tf.extractall(path=curdir)
2500 if args.verbose:
2501 if curdir == '.':
2502 msg = '{!r} file is extracted.'.format(src)
2503 else:
2504 msg = ('{!r} file is extracted '
2505 'into {!r} directory.').format(src, curdir)
2506 print(msg)
2507 else:
2508 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2509
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002510 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002511 tar_name = args.create.pop(0)
2512 _, ext = os.path.splitext(tar_name)
2513 compressions = {
2514 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002515 '.gz': 'gz',
2516 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002517 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002518 '.xz': 'xz',
2519 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002520 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002521 '.bz2': 'bz2',
2522 '.tbz': 'bz2',
2523 '.tbz2': 'bz2',
2524 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002525 }
2526 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2527 tar_files = args.create
2528
2529 with TarFile.open(tar_name, tar_mode) as tf:
2530 for file_name in tar_files:
2531 tf.add(file_name)
2532
2533 if args.verbose:
2534 print('{!r} file created.'.format(tar_name))
2535
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002536if __name__ == '__main__':
2537 main()