blob: edd31e96fb4694ddb1ea3c9e9285f930a59ba0e0 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Miss Islington (bot)a9a8a982018-02-26 16:50:09 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Miss Islington (bot)a9a8a982018-02-26 16:50:09 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000259def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200260 """Deprecated in this location; use stat.filemode."""
261 import warnings
262 warnings.warn("deprecated in favor of stat.filemode",
263 DeprecationWarning, 2)
264 return stat.filemode(mode)
265
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200266def _safe_print(s):
267 encoding = getattr(sys.stdout, 'encoding', None)
268 if encoding is not None:
269 s = s.encode(encoding, 'backslashreplace').decode(encoding)
270 print(s, end=' ')
271
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000272
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000273class TarError(Exception):
274 """Base exception."""
275 pass
276class ExtractError(TarError):
277 """General exception for extract errors."""
278 pass
279class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300280 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000281 pass
282class CompressionError(TarError):
283 """Exception for unavailable compression methods."""
284 pass
285class StreamError(TarError):
286 """Exception for unsupported operations on stream-like TarFiles."""
287 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000288class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000289 """Base exception for header errors."""
290 pass
291class EmptyHeaderError(HeaderError):
292 """Exception for empty headers."""
293 pass
294class TruncatedHeaderError(HeaderError):
295 """Exception for truncated headers."""
296 pass
297class EOFHeaderError(HeaderError):
298 """Exception for end of file headers."""
299 pass
300class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000301 """Exception for invalid headers."""
302 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000303class SubsequentHeaderError(HeaderError):
304 """Exception for missing and invalid extended headers."""
305 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000306
307#---------------------------
308# internal stream interface
309#---------------------------
310class _LowLevelFile:
311 """Low-level file object. Supports reading and writing.
312 It is used instead of a regular file object for streaming
313 access.
314 """
315
316 def __init__(self, name, mode):
317 mode = {
318 "r": os.O_RDONLY,
319 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
320 }[mode]
321 if hasattr(os, "O_BINARY"):
322 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000323 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000324
325 def close(self):
326 os.close(self.fd)
327
328 def read(self, size):
329 return os.read(self.fd, size)
330
331 def write(self, s):
332 os.write(self.fd, s)
333
334class _Stream:
335 """Class that serves as an adapter between TarFile and
336 a stream-like object. The stream-like object only
337 needs to have a read() or write() method and is accessed
338 blockwise. Use of gzip or bzip2 compression is possible.
339 A stream-like object could be for example: sys.stdin,
340 sys.stdout, a socket, a tape device etc.
341
342 _Stream is intended to be used only internally.
343 """
344
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000345 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346 """Construct a _Stream object.
347 """
348 self._extfileobj = True
349 if fileobj is None:
350 fileobj = _LowLevelFile(name, mode)
351 self._extfileobj = False
352
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000353 if comptype == '*':
354 # Enable transparent compression detection for the
355 # stream interface
356 fileobj = _StreamProxy(fileobj)
357 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000358
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.name = name or ""
360 self.mode = mode
361 self.comptype = comptype
362 self.fileobj = fileobj
363 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000364 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000365 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000366 self.closed = False
367
Antoine Pitrou605c2932010-09-23 20:15:14 +0000368 try:
369 if comptype == "gz":
370 try:
371 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400372 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000373 raise CompressionError("zlib module is not available")
374 self.zlib = zlib
375 self.crc = zlib.crc32(b"")
376 if mode == "r":
377 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100378 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 else:
380 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000381
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100382 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000383 try:
384 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400385 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000386 raise CompressionError("bz2 module is not available")
387 if mode == "r":
388 self.dbuf = b""
389 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200390 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000391 else:
392 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100393
394 elif comptype == "xz":
395 try:
396 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400397 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100398 raise CompressionError("lzma module is not available")
399 if mode == "r":
400 self.dbuf = b""
401 self.cmp = lzma.LZMADecompressor()
402 self.exception = lzma.LZMAError
403 else:
404 self.cmp = lzma.LZMACompressor()
405
406 elif comptype != "tar":
407 raise CompressionError("unknown compression type %r" % comptype)
408
Antoine Pitrou605c2932010-09-23 20:15:14 +0000409 except:
410 if not self._extfileobj:
411 self.fileobj.close()
412 self.closed = True
413 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000414
415 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000416 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000417 self.close()
418
419 def _init_write_gz(self):
420 """Initialize for writing with gzip compression.
421 """
422 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
423 -self.zlib.MAX_WBITS,
424 self.zlib.DEF_MEM_LEVEL,
425 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000426 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000427 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428 if self.name.endswith(".gz"):
429 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000430 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
431 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432
433 def write(self, s):
434 """Write string s to the stream.
435 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000436 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000437 self.crc = self.zlib.crc32(s, self.crc)
438 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000439 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000440 s = self.cmp.compress(s)
441 self.__write(s)
442
443 def __write(self, s):
444 """Write string s to the stream if a whole new block
445 is ready to be written.
446 """
447 self.buf += s
448 while len(self.buf) > self.bufsize:
449 self.fileobj.write(self.buf[:self.bufsize])
450 self.buf = self.buf[self.bufsize:]
451
452 def close(self):
453 """Close the _Stream object. No operation should be
454 done on it afterwards.
455 """
456 if self.closed:
457 return
458
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000459 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300460 try:
461 if self.mode == "w" and self.comptype != "tar":
462 self.buf += self.cmp.flush()
463
464 if self.mode == "w" and self.buf:
465 self.fileobj.write(self.buf)
466 self.buf = b""
467 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000468 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300469 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470 finally:
471 if not self._extfileobj:
472 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473
474 def _init_read_gz(self):
475 """Initialize for reading a gzip compressed fileobj.
476 """
477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000478 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479
480 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000481 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000482 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000483 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000484 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485
486 flag = ord(self.__read(1))
487 self.__read(6)
488
489 if flag & 4:
490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491 self.read(xlen)
492 if flag & 8:
493 while True:
494 s = self.__read(1)
495 if not s or s == NUL:
496 break
497 if flag & 16:
498 while True:
499 s = self.__read(1)
500 if not s or s == NUL:
501 break
502 if flag & 2:
503 self.__read(2)
504
505 def tell(self):
506 """Return the stream's file pointer position.
507 """
508 return self.pos
509
510 def seek(self, pos=0):
511 """Set the stream's file pointer to pos. Negative seeking
512 is forbidden.
513 """
514 if pos - self.pos >= 0:
515 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000516 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517 self.read(self.bufsize)
518 self.read(remainder)
519 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000520 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000521 return self.pos
522
523 def read(self, size=None):
524 """Return the next size number of bytes from the stream.
525 If size is not defined, return all bytes of the stream
526 up to EOF.
527 """
528 if size is None:
529 t = []
530 while True:
531 buf = self._read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700535 buf = b"".join(t)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000536 else:
537 buf = self._read(size)
538 self.pos += len(buf)
539 return buf
540
541 def _read(self, size):
542 """Return size bytes from the stream.
543 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 return self.__read(size)
546
547 c = len(self.dbuf)
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700548 t = [self.dbuf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549 while c < size:
550 buf = self.__read(self.bufsize)
551 if not buf:
552 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 try:
554 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100555 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000556 raise ReadError("invalid compressed data")
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700557 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 c += len(buf)
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700559 t = b"".join(t)
560 self.dbuf = t[size:]
561 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000562
563 def __read(self, size):
564 """Return size bytes from stream. If internal buffer is empty,
565 read another block from the stream.
566 """
567 c = len(self.buf)
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700568 t = [self.buf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000569 while c < size:
570 buf = self.fileobj.read(self.bufsize)
571 if not buf:
572 break
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700573 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000574 c += len(buf)
Miss Islington (bot)c1b75b52018-07-04 01:32:41 -0700575 t = b"".join(t)
576 self.buf = t[size:]
577 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000578# class _Stream
579
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000580class _StreamProxy(object):
581 """Small proxy class that enables transparent compression
582 detection for the Stream interface (mode 'r|*').
583 """
584
585 def __init__(self, fileobj):
586 self.fileobj = fileobj
587 self.buf = self.fileobj.read(BLOCKSIZE)
588
589 def read(self, size):
590 self.read = self.fileobj.read
591 return self.buf
592
593 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100594 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000595 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100596 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000597 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100598 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
599 return "xz"
600 else:
601 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000602
603 def close(self):
604 self.fileobj.close()
605# class StreamProxy
606
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000607#------------------------
608# Extraction file object
609#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000610class _FileInFile(object):
611 """A thin wrapper around an existing file object that
612 provides a part of its data as an individual file
613 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000614 """
615
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000616 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000617 self.fileobj = fileobj
618 self.offset = offset
619 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000620 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200621 self.name = getattr(fileobj, "name", None)
622 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000623
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000624 if blockinfo is None:
625 blockinfo = [(0, size)]
626
627 # Construct a map with data and zero blocks.
628 self.map_index = 0
629 self.map = []
630 lastpos = 0
631 realpos = self.offset
632 for offset, size in blockinfo:
633 if offset > lastpos:
634 self.map.append((False, lastpos, offset, None))
635 self.map.append((True, offset, offset + size, realpos))
636 realpos += size
637 lastpos = offset + size
638 if lastpos < self.size:
639 self.map.append((False, lastpos, self.size, None))
640
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200641 def flush(self):
642 pass
643
644 def readable(self):
645 return True
646
647 def writable(self):
648 return False
649
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000650 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000651 return self.fileobj.seekable()
652
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000653 def tell(self):
654 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000655 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000656 return self.position
657
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200658 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000659 """Seek to a position in the file.
660 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200661 if whence == io.SEEK_SET:
662 self.position = min(max(position, 0), self.size)
663 elif whence == io.SEEK_CUR:
664 if position < 0:
665 self.position = max(self.position + position, 0)
666 else:
667 self.position = min(self.position + position, self.size)
668 elif whence == io.SEEK_END:
669 self.position = max(min(self.size + position, self.size), 0)
670 else:
671 raise ValueError("Invalid argument")
672 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000673
674 def read(self, size=None):
675 """Read data from the file.
676 """
677 if size is None:
678 size = self.size - self.position
679 else:
680 size = min(size, self.size - self.position)
681
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000682 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000683 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000684 while True:
685 data, start, stop, offset = self.map[self.map_index]
686 if start <= self.position < stop:
687 break
688 else:
689 self.map_index += 1
690 if self.map_index == len(self.map):
691 self.map_index = 0
692 length = min(size, stop - self.position)
693 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000694 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200695 b = self.fileobj.read(length)
696 if len(b) != length:
697 raise ReadError("unexpected end of data")
698 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000699 else:
700 buf += NUL * length
701 size -= length
702 self.position += length
703 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000704
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200705 def readinto(self, b):
706 buf = self.read(len(b))
707 b[:len(buf)] = buf
708 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000709
710 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000711 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200712#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000713
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200714class ExFileObject(io.BufferedReader):
715
716 def __init__(self, tarfile, tarinfo):
717 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
718 tarinfo.size, tarinfo.sparse)
719 super().__init__(fileobj)
720#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000721
722#------------------
723# Exported Classes
724#------------------
725class TarInfo(object):
726 """Informational class which holds the details about an
727 archive member given by a tar header block.
728 TarInfo objects are returned by TarFile.getmember(),
729 TarFile.getmembers() and TarFile.gettarinfo() and are
730 usually created internally.
731 """
732
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000733 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
734 "chksum", "type", "linkname", "uname", "gname",
735 "devmajor", "devminor",
736 "offset", "offset_data", "pax_headers", "sparse",
737 "tarfile", "_sparse_structs", "_link_target")
738
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000739 def __init__(self, name=""):
740 """Construct a TarInfo object. name is the optional name
741 of the member.
742 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000743 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000744 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000745 self.uid = 0 # user id
746 self.gid = 0 # group id
747 self.size = 0 # file size
748 self.mtime = 0 # modification time
749 self.chksum = 0 # header checksum
750 self.type = REGTYPE # member type
751 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000752 self.uname = "" # user name
753 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000754 self.devmajor = 0 # device major number
755 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000756
Thomas Wouters477c8d52006-05-27 19:21:47 +0000757 self.offset = 0 # the tar header starts here
758 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000759
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000760 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000761 self.pax_headers = {} # pax header information
762
763 # In pax headers the "name" and "linkname" field are called
764 # "path" and "linkpath".
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200765 @property
766 def path(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000767 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000768
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200769 @path.setter
770 def path(self, name):
771 self.name = name
772
773 @property
774 def linkpath(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200776
777 @linkpath.setter
778 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000780
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000781 def __repr__(self):
782 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
783
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000784 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785 """Return the TarInfo's attributes as a dictionary.
786 """
787 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000788 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000789 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000790 "uid": self.uid,
791 "gid": self.gid,
792 "size": self.size,
793 "mtime": self.mtime,
794 "chksum": self.chksum,
795 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000796 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000797 "uname": self.uname,
798 "gname": self.gname,
799 "devmajor": self.devmajor,
800 "devminor": self.devminor
801 }
802
803 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
804 info["name"] += "/"
805
806 return info
807
Victor Stinnerde629d42010-05-05 21:43:57 +0000808 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809 """Return a tar header as a string of 512 byte blocks.
810 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000811 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000812
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000814 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000815 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000816 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000818 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819 else:
820 raise ValueError("invalid format")
821
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000822 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 """Return the object as a ustar header block.
824 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 info["magic"] = POSIX_MAGIC
826
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200827 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828 raise ValueError("linkname is too long")
829
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200830 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
831 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000833 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000835 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 """Return the object as a GNU header block sequence.
837 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 info["magic"] = GNU_MAGIC
839
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000840 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200841 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000842 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200844 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000845 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000847 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000849 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000850 """Return the object as a ustar header block. If it cannot be
851 represented this way, prepend a pax extended header sequence
852 with supplement information.
853 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854 info["magic"] = POSIX_MAGIC
855 pax_headers = self.pax_headers.copy()
856
857 # Test string fields for values that exceed the field length or cannot
858 # be represented in ASCII encoding.
859 for name, hname, length in (
860 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
861 ("uname", "uname", 32), ("gname", "gname", 32)):
862
Guido van Rossume7ba4952007-06-06 23:52:48 +0000863 if hname in pax_headers:
864 # The pax header has priority.
865 continue
866
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867 # Try to encode the string as ASCII.
868 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000869 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000871 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872 continue
873
Guido van Rossume7ba4952007-06-06 23:52:48 +0000874 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000875 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876
877 # Test number fields for values that exceed the field limit or values
878 # that like to be stored as float.
879 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000880 if name in pax_headers:
881 # The pax header has priority. Avoid overflow.
882 info[name] = 0
883 continue
884
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 val = info[name]
886 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000887 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888 info[name] = 0
889
Guido van Rossume7ba4952007-06-06 23:52:48 +0000890 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000892 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000894 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000896 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897
898 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000899 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000900 """Return the object as a pax global header block sequence.
901 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000902 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200904 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905 """Split a name longer than 100 chars into a prefix
906 and a name part.
907 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200908 components = name.split("/")
909 for i in range(1, len(components)):
910 prefix = "/".join(components[:i])
911 name = "/".join(components[i:])
912 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
913 len(name.encode(encoding, errors)) <= LENGTH_NAME:
914 break
915 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000916 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200917
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 return prefix, name
919
920 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000921 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000922 """Return a header block. info is a dictionary with file
923 information, format must be one of the *_FORMAT constants.
924 """
925 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000926 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000927 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 itn(info.get("uid", 0), 8, format),
929 itn(info.get("gid", 0), 8, format),
930 itn(info.get("size", 0), 12, format),
931 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000932 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000934 stn(info.get("linkname", ""), 100, encoding, errors),
935 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000936 stn(info.get("uname", ""), 32, encoding, errors),
937 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000938 itn(info.get("devmajor", 0), 8, format),
939 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000940 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 ]
942
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000943 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000945 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 return buf
947
948 @staticmethod
949 def _create_payload(payload):
950 """Return the string payload filled with zero bytes
951 up to the next 512 byte border.
952 """
953 blocks, remainder = divmod(len(payload), BLOCKSIZE)
954 if remainder > 0:
955 payload += (BLOCKSIZE - remainder) * NUL
956 return payload
957
958 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000959 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000960 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
961 for name.
962 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000963 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964
965 info = {}
966 info["name"] = "././@LongLink"
967 info["type"] = type
968 info["size"] = len(name)
969 info["magic"] = GNU_MAGIC
970
971 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000972 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000973 cls._create_payload(name)
974
975 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000976 def _create_pax_generic_header(cls, pax_headers, type, encoding):
977 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000978 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000979 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000981 # Check if one of the fields contains surrogate characters and thereby
982 # forces hdrcharset=BINARY, see _proc_pax() for more information.
983 binary = False
984 for keyword, value in pax_headers.items():
985 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000986 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000987 except UnicodeEncodeError:
988 binary = True
989 break
990
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000991 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000992 if binary:
993 # Put the hdrcharset field at the beginning of the header.
994 records += b"21 hdrcharset=BINARY\n"
995
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000997 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000998 if binary:
999 # Try to restore the original byte representation of `value'.
1000 # Needless to say, that the encoding must match the string.
1001 value = value.encode(encoding, "surrogateescape")
1002 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001003 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001004
Guido van Rossumd8faa362007-04-27 19:54:29 +00001005 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1006 n = p = 0
1007 while True:
1008 n = l + len(str(p))
1009 if n == p:
1010 break
1011 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001012 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013
1014 # We use a hardcoded "././@PaxHeader" name like star does
1015 # instead of the one that POSIX recommends.
1016 info = {}
1017 info["name"] = "././@PaxHeader"
1018 info["type"] = type
1019 info["size"] = len(records)
1020 info["magic"] = POSIX_MAGIC
1021
1022 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001023 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001024 cls._create_payload(records)
1025
Guido van Rossum75b64e62005-01-16 00:16:11 +00001026 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001027 def frombuf(cls, buf, encoding, errors):
1028 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001029 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001030 if len(buf) == 0:
1031 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001032 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001033 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001034 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001035 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001036
1037 chksum = nti(buf[148:156])
1038 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001039 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040
Guido van Rossumd8faa362007-04-27 19:54:29 +00001041 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001042 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043 obj.mode = nti(buf[100:108])
1044 obj.uid = nti(buf[108:116])
1045 obj.gid = nti(buf[116:124])
1046 obj.size = nti(buf[124:136])
1047 obj.mtime = nti(buf[136:148])
1048 obj.chksum = chksum
1049 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001050 obj.linkname = nts(buf[157:257], encoding, errors)
1051 obj.uname = nts(buf[265:297], encoding, errors)
1052 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001053 obj.devmajor = nti(buf[329:337])
1054 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001055 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001056
Guido van Rossumd8faa362007-04-27 19:54:29 +00001057 # Old V7 tar format represents a directory as a regular
1058 # file with a trailing slash.
1059 if obj.type == AREGTYPE and obj.name.endswith("/"):
1060 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001061
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001062 # The old GNU sparse format occupies some of the unused
1063 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001064 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001065 if obj.type == GNUTYPE_SPARSE:
1066 pos = 386
1067 structs = []
1068 for i in range(4):
1069 try:
1070 offset = nti(buf[pos:pos + 12])
1071 numbytes = nti(buf[pos + 12:pos + 24])
1072 except ValueError:
1073 break
1074 structs.append((offset, numbytes))
1075 pos += 24
1076 isextended = bool(buf[482])
1077 origsize = nti(buf[483:495])
1078 obj._sparse_structs = (structs, isextended, origsize)
1079
Guido van Rossumd8faa362007-04-27 19:54:29 +00001080 # Remove redundant slashes from directories.
1081 if obj.isdir():
1082 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001083
Guido van Rossumd8faa362007-04-27 19:54:29 +00001084 # Reconstruct a ustar longname.
1085 if prefix and obj.type not in GNU_TYPES:
1086 obj.name = prefix + "/" + obj.name
1087 return obj
1088
1089 @classmethod
1090 def fromtarfile(cls, tarfile):
1091 """Return the next TarInfo object from TarFile object
1092 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001093 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001094 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001095 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001096 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1097 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001098
Guido van Rossumd8faa362007-04-27 19:54:29 +00001099 #--------------------------------------------------------------------------
1100 # The following are methods that are called depending on the type of a
1101 # member. The entry point is _proc_member() which can be overridden in a
1102 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1103 # implement the following
1104 # operations:
1105 # 1. Set self.offset_data to the position where the data blocks begin,
1106 # if there is data that follows.
1107 # 2. Set tarfile.offset to the position where the next member's header will
1108 # begin.
1109 # 3. Return self or another valid TarInfo object.
1110 def _proc_member(self, tarfile):
1111 """Choose the right processing method depending on
1112 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001113 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001114 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1115 return self._proc_gnulong(tarfile)
1116 elif self.type == GNUTYPE_SPARSE:
1117 return self._proc_sparse(tarfile)
1118 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1119 return self._proc_pax(tarfile)
1120 else:
1121 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001122
Guido van Rossumd8faa362007-04-27 19:54:29 +00001123 def _proc_builtin(self, tarfile):
1124 """Process a builtin type or an unknown type which
1125 will be treated as a regular file.
1126 """
1127 self.offset_data = tarfile.fileobj.tell()
1128 offset = self.offset_data
1129 if self.isreg() or self.type not in SUPPORTED_TYPES:
1130 # Skip the following data blocks.
1131 offset += self._block(self.size)
1132 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001133
Guido van Rossume7ba4952007-06-06 23:52:48 +00001134 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001135 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001136 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001137
1138 return self
1139
1140 def _proc_gnulong(self, tarfile):
1141 """Process the blocks that hold a GNU longname
1142 or longlink member.
1143 """
1144 buf = tarfile.fileobj.read(self._block(self.size))
1145
1146 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001147 try:
1148 next = self.fromtarfile(tarfile)
1149 except HeaderError:
1150 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001151
1152 # Patch the TarInfo object from the next header with
1153 # the longname information.
1154 next.offset = self.offset
1155 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001156 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001157 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001158 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 return next
1161
1162 def _proc_sparse(self, tarfile):
1163 """Process a GNU sparse header plus extra headers.
1164 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001165 # We already collected some sparse structures in frombuf().
1166 structs, isextended, origsize = self._sparse_structs
1167 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001168
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001169 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001170 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001171 buf = tarfile.fileobj.read(BLOCKSIZE)
1172 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001173 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001174 try:
1175 offset = nti(buf[pos:pos + 12])
1176 numbytes = nti(buf[pos + 12:pos + 24])
1177 except ValueError:
1178 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001179 if offset and numbytes:
1180 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001182 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001183 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001184
1185 self.offset_data = tarfile.fileobj.tell()
1186 tarfile.offset = self.offset_data + self._block(self.size)
1187 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001188 return self
1189
1190 def _proc_pax(self, tarfile):
1191 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001192 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001193 """
1194 # Read the header information.
1195 buf = tarfile.fileobj.read(self._block(self.size))
1196
1197 # A pax header stores supplemental information for either
1198 # the following file (extended) or all following files
1199 # (global).
1200 if self.type == XGLTYPE:
1201 pax_headers = tarfile.pax_headers
1202 else:
1203 pax_headers = tarfile.pax_headers.copy()
1204
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001205 # Check if the pax header contains a hdrcharset field. This tells us
1206 # the encoding of the path, linkpath, uname and gname fields. Normally,
1207 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1208 # implementations are allowed to store them as raw binary strings if
1209 # the translation to UTF-8 fails.
1210 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1211 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001212 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001213
1214 # For the time being, we don't care about anything other than "BINARY".
1215 # The only other value that is currently allowed by the standard is
1216 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1217 hdrcharset = pax_headers.get("hdrcharset")
1218 if hdrcharset == "BINARY":
1219 encoding = tarfile.encoding
1220 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001221 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001222
Guido van Rossumd8faa362007-04-27 19:54:29 +00001223 # Parse pax header information. A record looks like that:
1224 # "%d %s=%s\n" % (length, keyword, value). length is the size
1225 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001226 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001227 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001228 pos = 0
1229 while True:
1230 match = regex.match(buf, pos)
1231 if not match:
1232 break
1233
1234 length, keyword = match.groups()
1235 length = int(length)
1236 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1237
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001238 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001239 # as the error handler, but we better not take the risk. For
1240 # example, GNU tar <= 1.23 is known to store filenames it cannot
1241 # translate to UTF-8 as raw strings (unfortunately without a
1242 # hdrcharset=BINARY header).
1243 # We first try the strict standard encoding, and if that fails we
1244 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001245 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001246 tarfile.errors)
1247 if keyword in PAX_NAME_FIELDS:
1248 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1249 tarfile.errors)
1250 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001251 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001252 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001253
1254 pax_headers[keyword] = value
1255 pos += length
1256
Guido van Rossume7ba4952007-06-06 23:52:48 +00001257 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001258 try:
1259 next = self.fromtarfile(tarfile)
1260 except HeaderError:
1261 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001262
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001263 # Process GNU sparse information.
1264 if "GNU.sparse.map" in pax_headers:
1265 # GNU extended sparse format version 0.1.
1266 self._proc_gnusparse_01(next, pax_headers)
1267
1268 elif "GNU.sparse.size" in pax_headers:
1269 # GNU extended sparse format version 0.0.
1270 self._proc_gnusparse_00(next, pax_headers, buf)
1271
1272 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1273 # GNU extended sparse format version 1.0.
1274 self._proc_gnusparse_10(next, pax_headers, tarfile)
1275
Guido van Rossume7ba4952007-06-06 23:52:48 +00001276 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001277 # Patch the TarInfo object with the extended header info.
1278 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1279 next.offset = self.offset
1280
1281 if "size" in pax_headers:
1282 # If the extended header replaces the size field,
1283 # we need to recalculate the offset where the next
1284 # header starts.
1285 offset = next.offset_data
1286 if next.isreg() or next.type not in SUPPORTED_TYPES:
1287 offset += next._block(next.size)
1288 tarfile.offset = offset
1289
1290 return next
1291
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001292 def _proc_gnusparse_00(self, next, pax_headers, buf):
1293 """Process a GNU tar extended sparse header, version 0.0.
1294 """
1295 offsets = []
1296 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1297 offsets.append(int(match.group(1)))
1298 numbytes = []
1299 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1300 numbytes.append(int(match.group(1)))
1301 next.sparse = list(zip(offsets, numbytes))
1302
1303 def _proc_gnusparse_01(self, next, pax_headers):
1304 """Process a GNU tar extended sparse header, version 0.1.
1305 """
1306 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1307 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1308
1309 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1310 """Process a GNU tar extended sparse header, version 1.0.
1311 """
1312 fields = None
1313 sparse = []
1314 buf = tarfile.fileobj.read(BLOCKSIZE)
1315 fields, buf = buf.split(b"\n", 1)
1316 fields = int(fields)
1317 while len(sparse) < fields * 2:
1318 if b"\n" not in buf:
1319 buf += tarfile.fileobj.read(BLOCKSIZE)
1320 number, buf = buf.split(b"\n", 1)
1321 sparse.append(int(number))
1322 next.offset_data = tarfile.fileobj.tell()
1323 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1324
Guido van Rossume7ba4952007-06-06 23:52:48 +00001325 def _apply_pax_info(self, pax_headers, encoding, errors):
1326 """Replace fields with supplemental information from a previous
1327 pax extended or global header.
1328 """
1329 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001330 if keyword == "GNU.sparse.name":
1331 setattr(self, "path", value)
1332 elif keyword == "GNU.sparse.size":
1333 setattr(self, "size", int(value))
1334 elif keyword == "GNU.sparse.realsize":
1335 setattr(self, "size", int(value))
1336 elif keyword in PAX_FIELDS:
1337 if keyword in PAX_NUMBER_FIELDS:
1338 try:
1339 value = PAX_NUMBER_FIELDS[keyword](value)
1340 except ValueError:
1341 value = 0
1342 if keyword == "path":
1343 value = value.rstrip("/")
1344 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001345
1346 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001347
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001348 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1349 """Decode a single field from a pax record.
1350 """
1351 try:
1352 return value.decode(encoding, "strict")
1353 except UnicodeDecodeError:
1354 return value.decode(fallback_encoding, fallback_errors)
1355
Guido van Rossumd8faa362007-04-27 19:54:29 +00001356 def _block(self, count):
1357 """Round up a byte count by BLOCKSIZE and return it,
1358 e.g. _block(834) => 1024.
1359 """
1360 blocks, remainder = divmod(count, BLOCKSIZE)
1361 if remainder:
1362 blocks += 1
1363 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001364
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001365 def isreg(self):
1366 return self.type in REGULAR_TYPES
1367 def isfile(self):
1368 return self.isreg()
1369 def isdir(self):
1370 return self.type == DIRTYPE
1371 def issym(self):
1372 return self.type == SYMTYPE
1373 def islnk(self):
1374 return self.type == LNKTYPE
1375 def ischr(self):
1376 return self.type == CHRTYPE
1377 def isblk(self):
1378 return self.type == BLKTYPE
1379 def isfifo(self):
1380 return self.type == FIFOTYPE
1381 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001382 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001383 def isdev(self):
1384 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1385# class TarInfo
1386
1387class TarFile(object):
1388 """The TarFile Class provides an interface to tar archives.
1389 """
1390
1391 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1392
1393 dereference = False # If true, add content of linked file to the
1394 # tar file, else the link.
1395
1396 ignore_zeros = False # If true, skips empty or invalid blocks and
1397 # continues processing.
1398
Lars Gustäbel365aff32009-12-13 11:42:29 +00001399 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001400 # messages (if debug >= 0). If > 0, errors
1401 # are passed to the caller as exceptions.
1402
Guido van Rossumd8faa362007-04-27 19:54:29 +00001403 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001404
Guido van Rossume7ba4952007-06-06 23:52:48 +00001405 encoding = ENCODING # Encoding for 8-bit character strings.
1406
1407 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408
Guido van Rossumd8faa362007-04-27 19:54:29 +00001409 tarinfo = TarInfo # The default TarInfo class to use.
1410
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001411 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001412
1413 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1414 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001415 errors="surrogateescape", pax_headers=None, debug=None,
1416 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001417 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1418 read from an existing archive, 'a' to append data to an existing
1419 file or 'w' to create a new file overwriting an existing one. `mode'
1420 defaults to 'r'.
1421 If `fileobj' is given, it is used for reading or writing data. If it
1422 can be determined, `mode' is overridden by `fileobj's mode.
1423 `fileobj' is not closed, when TarFile is closed.
1424 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001425 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001426 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001427 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001428 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001429 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001430
1431 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001432 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001433 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001434 self.mode = "w"
1435 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001436 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001437 self._extfileobj = False
1438 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001439 if (name is None and hasattr(fileobj, "name") and
1440 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001441 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001443 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001444 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001445 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001446 self.fileobj = fileobj
1447
Guido van Rossumd8faa362007-04-27 19:54:29 +00001448 # Init attributes.
1449 if format is not None:
1450 self.format = format
1451 if tarinfo is not None:
1452 self.tarinfo = tarinfo
1453 if dereference is not None:
1454 self.dereference = dereference
1455 if ignore_zeros is not None:
1456 self.ignore_zeros = ignore_zeros
1457 if encoding is not None:
1458 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001459 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001460
1461 if pax_headers is not None and self.format == PAX_FORMAT:
1462 self.pax_headers = pax_headers
1463 else:
1464 self.pax_headers = {}
1465
Guido van Rossumd8faa362007-04-27 19:54:29 +00001466 if debug is not None:
1467 self.debug = debug
1468 if errorlevel is not None:
1469 self.errorlevel = errorlevel
1470
1471 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001472 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001473 self.closed = False
1474 self.members = [] # list of members as TarInfo objects
1475 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001476 self.offset = self.fileobj.tell()
1477 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001478 self.inodes = {} # dictionary caching the inodes of
1479 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480
Lars Gustäbel7b465392009-11-18 20:29:25 +00001481 try:
1482 if self.mode == "r":
1483 self.firstmember = None
1484 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485
Lars Gustäbel7b465392009-11-18 20:29:25 +00001486 if self.mode == "a":
1487 # Move to the end of the archive,
1488 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001489 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001490 self.fileobj.seek(self.offset)
1491 try:
1492 tarinfo = self.tarinfo.fromtarfile(self)
1493 self.members.append(tarinfo)
1494 except EOFHeaderError:
1495 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001496 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001497 except HeaderError as e:
1498 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001499
Lars Gustäbel20703c62015-05-27 12:53:44 +02001500 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001501 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001502
Lars Gustäbel7b465392009-11-18 20:29:25 +00001503 if self.pax_headers:
1504 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1505 self.fileobj.write(buf)
1506 self.offset += len(buf)
1507 except:
1508 if not self._extfileobj:
1509 self.fileobj.close()
1510 self.closed = True
1511 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001512
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513 #--------------------------------------------------------------------------
1514 # Below are the classmethods which act as alternate constructors to the
1515 # TarFile class. The open() method is the only one that is needed for
1516 # public use; it is the "super"-constructor and is able to select an
1517 # adequate "sub"-constructor for a particular compression using the mapping
1518 # from OPEN_METH.
1519 #
1520 # This concept allows one to subclass TarFile without losing the comfort of
1521 # the super-constructor. A sub-constructor is registered and made available
1522 # by adding it to the mapping in OPEN_METH.
1523
Guido van Rossum75b64e62005-01-16 00:16:11 +00001524 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001525 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001526 """Open a tar archive for reading, writing or appending. Return
1527 an appropriate TarFile class.
1528
1529 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001530 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 'r:' open for reading exclusively uncompressed
1532 'r:gz' open for reading with gzip compression
1533 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001534 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001535 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 'w' or 'w:' open for writing without compression
1537 'w:gz' open for writing with gzip compression
1538 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001539 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001540
Berker Peksag0fe63252015-02-13 21:02:12 +02001541 'x' or 'x:' create a tarfile exclusively without compression, raise
1542 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001543 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001544 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001545 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001546 if the file is already created
1547 'x:xz' create an lzma compressed tarfile, raise an exception
1548 if the file is already created
1549
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001550 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 'r|' open an uncompressed stream of tar blocks for reading
1552 'r|gz' open a gzip compressed stream of tar blocks
1553 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001554 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001555 'w|' open an uncompressed stream for writing
1556 'w|gz' open a gzip compressed stream for writing
1557 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001558 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001559 """
1560
1561 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001563
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001564 if mode in ("r", "r:*"):
1565 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001566 def not_compressed(comptype):
1567 return cls.OPEN_METH[comptype] == 'taropen'
1568 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001569 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001570 if fileobj is not None:
1571 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001572 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001573 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001574 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001575 if fileobj is not None:
1576 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001577 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001578 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001579
1580 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581 filemode, comptype = mode.split(":", 1)
1582 filemode = filemode or "r"
1583 comptype = comptype or "tar"
1584
1585 # Select the *open() function according to
1586 # given compression.
1587 if comptype in cls.OPEN_METH:
1588 func = getattr(cls, cls.OPEN_METH[comptype])
1589 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001590 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001591 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592
1593 elif "|" in mode:
1594 filemode, comptype = mode.split("|", 1)
1595 filemode = filemode or "r"
1596 comptype = comptype or "tar"
1597
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001598 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001599 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600
Antoine Pitrou605c2932010-09-23 20:15:14 +00001601 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1602 try:
1603 t = cls(name, filemode, stream, **kwargs)
1604 except:
1605 stream.close()
1606 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 t._extfileobj = False
1608 return t
1609
Berker Peksag0fe63252015-02-13 21:02:12 +02001610 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001611 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612
Thomas Wouters477c8d52006-05-27 19:21:47 +00001613 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001614
Guido van Rossum75b64e62005-01-16 00:16:11 +00001615 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001616 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001617 """Open uncompressed tar archive name for reading or writing.
1618 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001619 if mode not in ("r", "a", "w", "x"):
1620 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001621 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001622
Guido van Rossum75b64e62005-01-16 00:16:11 +00001623 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001624 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001625 """Open gzip compressed tar archive name for reading or writing.
1626 Appending is not allowed.
1627 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001628 if mode not in ("r", "w", "x"):
1629 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630
1631 try:
1632 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001633 gzip.GzipFile
1634 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001635 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001637 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001638 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001639 except OSError:
1640 if fileobj is not None and mode == 'r':
1641 raise ReadError("not a gzip file")
1642 raise
1643
1644 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001645 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001646 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001647 fileobj.close()
1648 if mode == 'r':
1649 raise ReadError("not a gzip file")
1650 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001651 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001652 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001653 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001654 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655 return t
1656
Guido van Rossum75b64e62005-01-16 00:16:11 +00001657 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001658 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659 """Open bzip2 compressed tar archive name for reading or writing.
1660 Appending is not allowed.
1661 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001662 if mode not in ("r", "w", "x"):
1663 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001664
1665 try:
1666 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001667 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001668 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001669
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001670 fileobj = bz2.BZ2File(fileobj or name, mode,
1671 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672
1673 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001674 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001675 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001676 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001677 if mode == 'r':
1678 raise ReadError("not a bzip2 file")
1679 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001680 except:
1681 fileobj.close()
1682 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001683 t._extfileobj = False
1684 return t
1685
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001686 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001687 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001688 """Open lzma compressed tar archive name for reading or writing.
1689 Appending is not allowed.
1690 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001691 if mode not in ("r", "w", "x"):
1692 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001693
1694 try:
1695 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001696 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001697 raise CompressionError("lzma module is not available")
1698
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001699 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001700
1701 try:
1702 t = cls.taropen(name, mode, fileobj, **kwargs)
1703 except (lzma.LZMAError, EOFError):
1704 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001705 if mode == 'r':
1706 raise ReadError("not an lzma file")
1707 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001708 except:
1709 fileobj.close()
1710 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001711 t._extfileobj = False
1712 return t
1713
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001714 # All *open() methods are registered here.
1715 OPEN_METH = {
1716 "tar": "taropen", # uncompressed tar
1717 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001718 "bz2": "bz2open", # bzip2 compressed tar
1719 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001720 }
1721
1722 #--------------------------------------------------------------------------
1723 # The public methods which TarFile provides:
1724
1725 def close(self):
1726 """Close the TarFile. In write-mode, two finishing zero blocks are
1727 appended to the archive.
1728 """
1729 if self.closed:
1730 return
1731
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001733 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001734 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001735 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1736 self.offset += (BLOCKSIZE * 2)
1737 # fill up the end with zero-blocks
1738 # (like option -b20 for tar does)
1739 blocks, remainder = divmod(self.offset, RECORDSIZE)
1740 if remainder > 0:
1741 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1742 finally:
1743 if not self._extfileobj:
1744 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745
1746 def getmember(self, name):
1747 """Return a TarInfo object for member `name'. If `name' can not be
1748 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001749 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750 most up-to-date version.
1751 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001752 tarinfo = self._getmember(name)
1753 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001754 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001755 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756
1757 def getmembers(self):
1758 """Return the members of the archive as a list of TarInfo objects. The
1759 list has the same order as the members in the archive.
1760 """
1761 self._check()
1762 if not self._loaded: # if we want to obtain a list of
1763 self._load() # all members, we first have to
1764 # scan the whole archive.
1765 return self.members
1766
1767 def getnames(self):
1768 """Return the members of the archive as a list of their names. It has
1769 the same order as the list returned by getmembers().
1770 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001771 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001772
1773 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001774 """Create a TarInfo object from the result of os.stat or equivalent
1775 on an existing file. The file is either named by `name', or
1776 specified as a file object `fileobj' with a file descriptor. If
1777 given, `arcname' specifies an alternative name for the file in the
1778 archive, otherwise, the name is taken from the 'name' attribute of
1779 'fileobj', or the 'name' argument. The name should be a text
1780 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001782 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001783
1784 # When fileobj is given, replace name by
1785 # fileobj's real name.
1786 if fileobj is not None:
1787 name = fileobj.name
1788
1789 # Building the name of the member in the archive.
1790 # Backward slashes are converted to forward slashes,
1791 # Absolute paths are turned to relative paths.
1792 if arcname is None:
1793 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001795 arcname = arcname.replace(os.sep, "/")
1796 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001797
1798 # Now, fill the TarInfo object with
1799 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001800 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001801 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001802
1803 # Use os.stat or os.lstat, depending on platform
1804 # and if symlinks shall be resolved.
1805 if fileobj is None:
1806 if hasattr(os, "lstat") and not self.dereference:
1807 statres = os.lstat(name)
1808 else:
1809 statres = os.stat(name)
1810 else:
1811 statres = os.fstat(fileobj.fileno())
1812 linkname = ""
1813
1814 stmd = statres.st_mode
1815 if stat.S_ISREG(stmd):
1816 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001817 if not self.dereference and statres.st_nlink > 1 and \
1818 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819 # Is it a hardlink to an already
1820 # archived file?
1821 type = LNKTYPE
1822 linkname = self.inodes[inode]
1823 else:
1824 # The inode is added only if its valid.
1825 # For win32 it is always 0.
1826 type = REGTYPE
1827 if inode[0]:
1828 self.inodes[inode] = arcname
1829 elif stat.S_ISDIR(stmd):
1830 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831 elif stat.S_ISFIFO(stmd):
1832 type = FIFOTYPE
1833 elif stat.S_ISLNK(stmd):
1834 type = SYMTYPE
1835 linkname = os.readlink(name)
1836 elif stat.S_ISCHR(stmd):
1837 type = CHRTYPE
1838 elif stat.S_ISBLK(stmd):
1839 type = BLKTYPE
1840 else:
1841 return None
1842
1843 # Fill the TarInfo object with all
1844 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001845 tarinfo.name = arcname
1846 tarinfo.mode = stmd
1847 tarinfo.uid = statres.st_uid
1848 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001849 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001850 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001851 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001852 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001853 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001854 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001855 tarinfo.linkname = linkname
1856 if pwd:
1857 try:
1858 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1859 except KeyError:
1860 pass
1861 if grp:
1862 try:
1863 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1864 except KeyError:
1865 pass
1866
1867 if type in (CHRTYPE, BLKTYPE):
1868 if hasattr(os, "major") and hasattr(os, "minor"):
1869 tarinfo.devmajor = os.major(statres.st_rdev)
1870 tarinfo.devminor = os.minor(statres.st_rdev)
1871 return tarinfo
1872
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001873 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001874 """Print a table of contents to sys.stdout. If `verbose' is False, only
1875 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001876 output is produced. `members' is optional and must be a subset of the
1877 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 """
1879 self._check()
1880
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001881 if members is None:
1882 members = self
1883 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001885 _safe_print(stat.filemode(tarinfo.mode))
1886 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1887 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001889 _safe_print("%10s" %
1890 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001891 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001892 _safe_print("%10d" % tarinfo.size)
1893 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1894 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001895
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001896 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897
1898 if verbose:
1899 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001900 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001902 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001903 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001905 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001906 """Add the file `name' to the archive. `name' may be any type of file
1907 (directory, fifo, symbolic link, etc.). If given, `arcname'
1908 specifies an alternative name for the file in the archive.
1909 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001910 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001911 that expects a TarInfo object argument and returns the changed
1912 TarInfo object, if it returns None the TarInfo object will be
1913 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001915 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916
1917 if arcname is None:
1918 arcname = name
1919
1920 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001921 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001922 self._dbg(2, "tarfile: Skipped %r" % name)
1923 return
1924
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001925 self._dbg(1, name)
1926
1927 # Create a TarInfo object from the file.
1928 tarinfo = self.gettarinfo(name, arcname)
1929
1930 if tarinfo is None:
1931 self._dbg(1, "tarfile: Unsupported type %r" % name)
1932 return
1933
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001934 # Change or exclude the TarInfo object.
1935 if filter is not None:
1936 tarinfo = filter(tarinfo)
1937 if tarinfo is None:
1938 self._dbg(2, "tarfile: Excluded %r" % name)
1939 return
1940
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 # Append the tar header and data to the archive.
1942 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001943 with bltn_open(name, "rb") as f:
1944 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001946 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001947 self.addfile(tarinfo)
1948 if recursive:
Bernhard M. Wiedemann57750be2018-01-31 11:17:10 +01001949 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001950 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001951 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001953 else:
1954 self.addfile(tarinfo)
1955
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001956 def addfile(self, tarinfo, fileobj=None):
1957 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001958 given, it should be a binary file, and tarinfo.size bytes are read
1959 from it and added to the archive. You can create TarInfo objects
1960 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001962 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963
Thomas Wouters89f507f2006-12-13 04:49:30 +00001964 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001965
Guido van Rossume7ba4952007-06-06 23:52:48 +00001966 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001967 self.fileobj.write(buf)
1968 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001969 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970 # If there's data to follow, append it.
1971 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001972 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001973 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1974 if remainder > 0:
1975 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1976 blocks += 1
1977 self.offset += blocks * BLOCKSIZE
1978
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001979 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980
Eric V. Smith7a803892015-04-15 10:27:58 -04001981 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001982 """Extract all members from the archive to the current working
1983 directory and set owner, modification time and permissions on
1984 directories afterwards. `path' specifies a different directory
1985 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001986 list returned by getmembers(). If `numeric_owner` is True, only
1987 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001988 """
1989 directories = []
1990
1991 if members is None:
1992 members = self
1993
1994 for tarinfo in members:
1995 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001996 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001997 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001998 tarinfo = copy.copy(tarinfo)
1999 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002000 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002001 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2002 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002003
2004 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002005 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002006 directories.reverse()
2007
2008 # Set correct owner, mtime and filemode on directories.
2009 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002010 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002011 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002012 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002013 self.utime(tarinfo, dirpath)
2014 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002015 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002016 if self.errorlevel > 1:
2017 raise
2018 else:
2019 self._dbg(1, "tarfile: %s" % e)
2020
Eric V. Smith7a803892015-04-15 10:27:58 -04002021 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 """Extract a member from the archive to the current working directory,
2023 using its full name. Its file information is extracted as accurately
2024 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002025 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002026 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2027 is True, only the numbers for user/group names are used and not
2028 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002029 """
2030 self._check("r")
2031
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002032 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002034 else:
2035 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002036
Neal Norwitza4f651a2004-07-20 22:07:44 +00002037 # Prepare the link target for makelink().
2038 if tarinfo.islnk():
2039 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2040
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002042 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002043 set_attrs=set_attrs,
2044 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002045 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 if self.errorlevel > 0:
2047 raise
2048 else:
2049 if e.filename is None:
2050 self._dbg(1, "tarfile: %s" % e.strerror)
2051 else:
2052 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002053 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002054 if self.errorlevel > 1:
2055 raise
2056 else:
2057 self._dbg(1, "tarfile: %s" % e)
2058
2059 def extractfile(self, member):
2060 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002061 a filename or a TarInfo object. If `member' is a regular file or a
2062 link, an io.BufferedReader object is returned. Otherwise, None is
2063 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064 """
2065 self._check("r")
2066
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002067 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002068 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002069 else:
2070 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002071
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002072 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2073 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002074 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002075
2076 elif tarinfo.islnk() or tarinfo.issym():
2077 if isinstance(self.fileobj, _Stream):
2078 # A small but ugly workaround for the case that someone tries
2079 # to extract a (sym)link as a file-object from a non-seekable
2080 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002081 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002082 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002083 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002084 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002085 else:
2086 # If there's no data associated with the member (directory, chrdev,
2087 # blkdev, etc.), return None instead of a file object.
2088 return None
2089
Eric V. Smith7a803892015-04-15 10:27:58 -04002090 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2091 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002092 """Extract the TarInfo object tarinfo to a physical
2093 file called targetpath.
2094 """
2095 # Fetch the TarInfo object for the given name
2096 # and build the destination pathname, replacing
2097 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002098 targetpath = targetpath.rstrip("/")
2099 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 # Create all upper directories.
2102 upperdirs = os.path.dirname(targetpath)
2103 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002104 # Create directories that are not part of the archive with
2105 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002106 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107
2108 if tarinfo.islnk() or tarinfo.issym():
2109 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2110 else:
2111 self._dbg(1, tarinfo.name)
2112
2113 if tarinfo.isreg():
2114 self.makefile(tarinfo, targetpath)
2115 elif tarinfo.isdir():
2116 self.makedir(tarinfo, targetpath)
2117 elif tarinfo.isfifo():
2118 self.makefifo(tarinfo, targetpath)
2119 elif tarinfo.ischr() or tarinfo.isblk():
2120 self.makedev(tarinfo, targetpath)
2121 elif tarinfo.islnk() or tarinfo.issym():
2122 self.makelink(tarinfo, targetpath)
2123 elif tarinfo.type not in SUPPORTED_TYPES:
2124 self.makeunknown(tarinfo, targetpath)
2125 else:
2126 self.makefile(tarinfo, targetpath)
2127
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002128 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002129 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002130 if not tarinfo.issym():
2131 self.chmod(tarinfo, targetpath)
2132 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133
2134 #--------------------------------------------------------------------------
2135 # Below are the different file methods. They are called via
2136 # _extract_member() when extract() is called. They can be replaced in a
2137 # subclass to implement other functionality.
2138
2139 def makedir(self, tarinfo, targetpath):
2140 """Make a directory called targetpath.
2141 """
2142 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002143 # Use a safe mode for the directory, the real mode is set
2144 # later in _extract_member().
2145 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002146 except FileExistsError:
2147 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002148
2149 def makefile(self, tarinfo, targetpath):
2150 """Make a file called targetpath.
2151 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002152 source = self.fileobj
2153 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002154 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002155 with bltn_open(targetpath, "wb") as target:
2156 if tarinfo.sparse is not None:
2157 for offset, size in tarinfo.sparse:
2158 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002159 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002160 target.seek(tarinfo.size)
2161 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002162 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002163 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002164
2165 def makeunknown(self, tarinfo, targetpath):
2166 """Make a file from a TarInfo object with an unknown type
2167 at targetpath.
2168 """
2169 self.makefile(tarinfo, targetpath)
2170 self._dbg(1, "tarfile: Unknown file type %r, " \
2171 "extracted as regular file." % tarinfo.type)
2172
2173 def makefifo(self, tarinfo, targetpath):
2174 """Make a fifo called targetpath.
2175 """
2176 if hasattr(os, "mkfifo"):
2177 os.mkfifo(targetpath)
2178 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002179 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002180
2181 def makedev(self, tarinfo, targetpath):
2182 """Make a character or block device called targetpath.
2183 """
2184 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002185 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002186
2187 mode = tarinfo.mode
2188 if tarinfo.isblk():
2189 mode |= stat.S_IFBLK
2190 else:
2191 mode |= stat.S_IFCHR
2192
2193 os.mknod(targetpath, mode,
2194 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2195
2196 def makelink(self, tarinfo, targetpath):
2197 """Make a (symbolic) link called targetpath. If it cannot be created
2198 (platform limitation), we try to make a copy of the referenced file
2199 instead of a link.
2200 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002201 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002202 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002203 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002204 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002205 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002206 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002207 if os.path.exists(tarinfo._link_target):
2208 os.link(tarinfo._link_target, targetpath)
2209 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002210 self._extract_member(self._find_link_target(tarinfo),
2211 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002212 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002213 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002214 self._extract_member(self._find_link_target(tarinfo),
2215 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002216 except KeyError:
2217 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002218
Eric V. Smith7a803892015-04-15 10:27:58 -04002219 def chown(self, tarinfo, targetpath, numeric_owner):
2220 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002221 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2222 is False, fall back to .gid/.uid when the search based on name
2223 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002224 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002225 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002227 g = tarinfo.gid
2228 u = tarinfo.uid
2229 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002230 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002231 if grp:
2232 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002233 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002234 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002235 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002236 if pwd:
2237 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002238 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002239 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002240 try:
2241 if tarinfo.issym() and hasattr(os, "lchown"):
2242 os.lchown(targetpath, u, g)
2243 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002244 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002245 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002246 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247
2248 def chmod(self, tarinfo, targetpath):
2249 """Set file permissions of targetpath according to tarinfo.
2250 """
Jack Jansen834eff62003-03-07 12:47:06 +00002251 if hasattr(os, 'chmod'):
2252 try:
2253 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002254 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002255 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002256
2257 def utime(self, tarinfo, targetpath):
2258 """Set modification time of targetpath according to tarinfo.
2259 """
Jack Jansen834eff62003-03-07 12:47:06 +00002260 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002261 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002262 try:
2263 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002264 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002265 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002266
2267 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002268 def next(self):
2269 """Return the next member of the archive as a TarInfo object, when
2270 TarFile is opened for reading. Return None if there is no more
2271 available.
2272 """
2273 self._check("ra")
2274 if self.firstmember is not None:
2275 m = self.firstmember
2276 self.firstmember = None
2277 return m
2278
Lars Gustäbel03572682015-07-06 09:27:24 +02002279 # Advance the file pointer.
2280 if self.offset != self.fileobj.tell():
2281 self.fileobj.seek(self.offset - 1)
2282 if not self.fileobj.read(1):
2283 raise ReadError("unexpected end of data")
2284
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002286 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002287 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002289 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002290 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002291 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002292 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002293 self.offset += BLOCKSIZE
2294 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002295 except InvalidHeaderError as e:
2296 if self.ignore_zeros:
2297 self._dbg(2, "0x%X: %s" % (self.offset, e))
2298 self.offset += BLOCKSIZE
2299 continue
2300 elif self.offset == 0:
2301 raise ReadError(str(e))
2302 except EmptyHeaderError:
2303 if self.offset == 0:
2304 raise ReadError("empty file")
2305 except TruncatedHeaderError as e:
2306 if self.offset == 0:
2307 raise ReadError(str(e))
2308 except SubsequentHeaderError as e:
2309 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002310 break
2311
Lars Gustäbel9520a432009-11-22 18:48:49 +00002312 if tarinfo is not None:
2313 self.members.append(tarinfo)
2314 else:
2315 self._loaded = True
2316
Thomas Wouters477c8d52006-05-27 19:21:47 +00002317 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002318
2319 #--------------------------------------------------------------------------
2320 # Little helper methods:
2321
Lars Gustäbel1b512722010-06-03 12:45:16 +00002322 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323 """Find an archive member by name from bottom to top.
2324 If tarinfo is given, it is used as the starting point.
2325 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002326 # Ensure that all members have been loaded.
2327 members = self.getmembers()
2328
Lars Gustäbel1b512722010-06-03 12:45:16 +00002329 # Limit the member search list up to tarinfo.
2330 if tarinfo is not None:
2331 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002332
Lars Gustäbel1b512722010-06-03 12:45:16 +00002333 if normalize:
2334 name = os.path.normpath(name)
2335
2336 for member in reversed(members):
2337 if normalize:
2338 member_name = os.path.normpath(member.name)
2339 else:
2340 member_name = member.name
2341
2342 if name == member_name:
2343 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002344
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002345 def _load(self):
2346 """Read through the entire archive file and look for readable
2347 members.
2348 """
2349 while True:
2350 tarinfo = self.next()
2351 if tarinfo is None:
2352 break
2353 self._loaded = True
2354
2355 def _check(self, mode=None):
2356 """Check if TarFile is still open, and if the operation's mode
2357 corresponds to TarFile's mode.
2358 """
2359 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002360 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002361 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002362 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002363
Lars Gustäbel1b512722010-06-03 12:45:16 +00002364 def _find_link_target(self, tarinfo):
2365 """Find the target member of a symlink or hardlink member in the
2366 archive.
2367 """
2368 if tarinfo.issym():
2369 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002370 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002371 limit = None
2372 else:
2373 # Search the archive before the link, because a hard link is
2374 # just a reference to an already archived file.
2375 linkname = tarinfo.linkname
2376 limit = tarinfo
2377
2378 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2379 if member is None:
2380 raise KeyError("linkname %r not found" % linkname)
2381 return member
2382
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002383 def __iter__(self):
2384 """Provide an iterator object.
2385 """
2386 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002387 yield from self.members
2388 return
2389
2390 # Yield items using TarFile's next() method.
2391 # When all members have been read, set TarFile as _loaded.
2392 index = 0
2393 # Fix for SF #1100429: Under rare circumstances it can
2394 # happen that getmembers() is called during iteration,
2395 # which will have already exhausted the next() method.
2396 if self.firstmember is not None:
2397 tarinfo = self.next()
2398 index += 1
2399 yield tarinfo
2400
2401 while True:
2402 if index < len(self.members):
2403 tarinfo = self.members[index]
2404 elif not self._loaded:
2405 tarinfo = self.next()
2406 if not tarinfo:
2407 self._loaded = True
2408 return
2409 else:
2410 return
2411 index += 1
2412 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002413
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002414 def _dbg(self, level, msg):
2415 """Write debugging output to sys.stderr.
2416 """
2417 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002418 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002419
2420 def __enter__(self):
2421 self._check()
2422 return self
2423
2424 def __exit__(self, type, value, traceback):
2425 if type is None:
2426 self.close()
2427 else:
2428 # An exception occurred. We must not call close() because
2429 # it would try to write end-of-archive blocks and padding.
2430 if not self._extfileobj:
2431 self.fileobj.close()
2432 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002433
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002434#--------------------
2435# exported functions
2436#--------------------
2437def is_tarfile(name):
2438 """Return True if name points to a tar archive that we
2439 are able to handle, else return False.
2440 """
2441 try:
2442 t = open(name)
2443 t.close()
2444 return True
2445 except TarError:
2446 return False
2447
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002448open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002449
2450
2451def main():
2452 import argparse
2453
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002454 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002455 parser = argparse.ArgumentParser(description=description)
2456 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2457 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002458 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002459 group.add_argument('-l', '--list', metavar='<tarfile>',
2460 help='Show listing of a tarfile')
2461 group.add_argument('-e', '--extract', nargs='+',
2462 metavar=('<tarfile>', '<output_dir>'),
2463 help='Extract tarfile into target dir')
2464 group.add_argument('-c', '--create', nargs='+',
2465 metavar=('<name>', '<file>'),
2466 help='Create tarfile from sources')
2467 group.add_argument('-t', '--test', metavar='<tarfile>',
2468 help='Test if a tarfile is valid')
2469 args = parser.parse_args()
2470
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002471 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002472 src = args.test
2473 if is_tarfile(src):
2474 with open(src, 'r') as tar:
2475 tar.getmembers()
2476 print(tar.getmembers(), file=sys.stderr)
2477 if args.verbose:
2478 print('{!r} is a tar archive.'.format(src))
2479 else:
2480 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2481
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002482 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002483 src = args.list
2484 if is_tarfile(src):
2485 with TarFile.open(src, 'r:*') as tf:
2486 tf.list(verbose=args.verbose)
2487 else:
2488 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2489
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002490 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002491 if len(args.extract) == 1:
2492 src = args.extract[0]
2493 curdir = os.curdir
2494 elif len(args.extract) == 2:
2495 src, curdir = args.extract
2496 else:
2497 parser.exit(1, parser.format_help())
2498
2499 if is_tarfile(src):
2500 with TarFile.open(src, 'r:*') as tf:
2501 tf.extractall(path=curdir)
2502 if args.verbose:
2503 if curdir == '.':
2504 msg = '{!r} file is extracted.'.format(src)
2505 else:
2506 msg = ('{!r} file is extracted '
2507 'into {!r} directory.').format(src, curdir)
2508 print(msg)
2509 else:
2510 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2511
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002512 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002513 tar_name = args.create.pop(0)
2514 _, ext = os.path.splitext(tar_name)
2515 compressions = {
2516 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002517 '.gz': 'gz',
2518 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002519 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002520 '.xz': 'xz',
2521 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002522 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002523 '.bz2': 'bz2',
2524 '.tbz': 'bz2',
2525 '.tbz2': 'bz2',
2526 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002527 }
2528 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2529 tar_files = args.create
2530
2531 with TarFile.open(tar_name, tar_mode) as tf:
2532 for file_name in tar_files:
2533 tf.add(file_name)
2534
2535 if args.verbose:
2536 print('{!r} file created.'.format(tar_name))
2537
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002538if __name__ == '__main__':
2539 main()