blob: ba3e95f281dfdc11589d96b4bbed0a3fa924e48f [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
108DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200259def _safe_print(s):
260 encoding = getattr(sys.stdout, 'encoding', None)
261 if encoding is not None:
262 s = s.encode(encoding, 'backslashreplace').decode(encoding)
263 print(s, end=' ')
264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266class TarError(Exception):
267 """Base exception."""
268 pass
269class ExtractError(TarError):
270 """General exception for extract errors."""
271 pass
272class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300273 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 pass
275class CompressionError(TarError):
276 """Exception for unavailable compression methods."""
277 pass
278class StreamError(TarError):
279 """Exception for unsupported operations on stream-like TarFiles."""
280 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000281class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000282 """Base exception for header errors."""
283 pass
284class EmptyHeaderError(HeaderError):
285 """Exception for empty headers."""
286 pass
287class TruncatedHeaderError(HeaderError):
288 """Exception for truncated headers."""
289 pass
290class EOFHeaderError(HeaderError):
291 """Exception for end of file headers."""
292 pass
293class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000294 """Exception for invalid headers."""
295 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000296class SubsequentHeaderError(HeaderError):
297 """Exception for missing and invalid extended headers."""
298 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304 """Low-level file object. Supports reading and writing.
305 It is used instead of a regular file object for streaming
306 access.
307 """
308
309 def __init__(self, name, mode):
310 mode = {
311 "r": os.O_RDONLY,
312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313 }[mode]
314 if hasattr(os, "O_BINARY"):
315 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000316 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318 def close(self):
319 os.close(self.fd)
320
321 def read(self, size):
322 return os.read(self.fd, size)
323
324 def write(self, s):
325 os.write(self.fd, s)
326
327class _Stream:
328 """Class that serves as an adapter between TarFile and
329 a stream-like object. The stream-like object only
330 needs to have a read() or write() method and is accessed
331 blockwise. Use of gzip or bzip2 compression is possible.
332 A stream-like object could be for example: sys.stdin,
333 sys.stdout, a socket, a tape device etc.
334
335 _Stream is intended to be used only internally.
336 """
337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000339 """Construct a _Stream object.
340 """
341 self._extfileobj = True
342 if fileobj is None:
343 fileobj = _LowLevelFile(name, mode)
344 self._extfileobj = False
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if comptype == '*':
347 # Enable transparent compression detection for the
348 # stream interface
349 fileobj = _StreamProxy(fileobj)
350 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 self.name = name or ""
353 self.mode = mode
354 self.comptype = comptype
355 self.fileobj = fileobj
356 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000357 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000358 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.closed = False
360
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 try:
362 if comptype == "gz":
363 try:
364 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400365 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000366 raise CompressionError("zlib module is not available")
367 self.zlib = zlib
368 self.crc = zlib.crc32(b"")
369 if mode == "r":
370 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100371 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 else:
373 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000374
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100375 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000376 try:
377 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400378 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 raise CompressionError("bz2 module is not available")
380 if mode == "r":
381 self.dbuf = b""
382 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 else:
385 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386
387 elif comptype == "xz":
388 try:
389 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400390 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100391 raise CompressionError("lzma module is not available")
392 if mode == "r":
393 self.dbuf = b""
394 self.cmp = lzma.LZMADecompressor()
395 self.exception = lzma.LZMAError
396 else:
397 self.cmp = lzma.LZMACompressor()
398
399 elif comptype != "tar":
400 raise CompressionError("unknown compression type %r" % comptype)
401
Antoine Pitrou605c2932010-09-23 20:15:14 +0000402 except:
403 if not self._extfileobj:
404 self.fileobj.close()
405 self.closed = True
406 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407
408 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000409 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 self.close()
411
412 def _init_write_gz(self):
413 """Initialize for writing with gzip compression.
414 """
415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416 -self.zlib.MAX_WBITS,
417 self.zlib.DEF_MEM_LEVEL,
418 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000419 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if self.name.endswith(".gz"):
422 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000423 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
424 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425
426 def write(self, s):
427 """Write string s to the stream.
428 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000429 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 self.crc = self.zlib.crc32(s, self.crc)
431 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 s = self.cmp.compress(s)
434 self.__write(s)
435
436 def __write(self, s):
437 """Write string s to the stream if a whole new block
438 is ready to be written.
439 """
440 self.buf += s
441 while len(self.buf) > self.bufsize:
442 self.fileobj.write(self.buf[:self.bufsize])
443 self.buf = self.buf[self.bufsize:]
444
445 def close(self):
446 """Close the _Stream object. No operation should be
447 done on it afterwards.
448 """
449 if self.closed:
450 return
451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300453 try:
454 if self.mode == "w" and self.comptype != "tar":
455 self.buf += self.cmp.flush()
456
457 if self.mode == "w" and self.buf:
458 self.fileobj.write(self.buf)
459 self.buf = b""
460 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000461 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300462 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
463 finally:
464 if not self._extfileobj:
465 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466
467 def _init_read_gz(self):
468 """Initialize for reading a gzip compressed fileobj.
469 """
470 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000472
473 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000474 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000475 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000477 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 flag = ord(self.__read(1))
480 self.__read(6)
481
482 if flag & 4:
483 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
484 self.read(xlen)
485 if flag & 8:
486 while True:
487 s = self.__read(1)
488 if not s or s == NUL:
489 break
490 if flag & 16:
491 while True:
492 s = self.__read(1)
493 if not s or s == NUL:
494 break
495 if flag & 2:
496 self.__read(2)
497
498 def tell(self):
499 """Return the stream's file pointer position.
500 """
501 return self.pos
502
503 def seek(self, pos=0):
504 """Set the stream's file pointer to pos. Negative seeking
505 is forbidden.
506 """
507 if pos - self.pos >= 0:
508 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000509 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000510 self.read(self.bufsize)
511 self.read(remainder)
512 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000513 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000514 return self.pos
515
INADA Naoki8d130912018-07-06 14:06:00 +0900516 def read(self, size):
517 """Return the next size number of bytes from the stream."""
518 assert size is not None
519 buf = self._read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 self.pos += len(buf)
521 return buf
522
523 def _read(self, size):
524 """Return size bytes from the stream.
525 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000526 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000527 return self.__read(size)
528
529 c = len(self.dbuf)
hajoscher12a08c42018-07-04 10:13:18 +0200530 t = [self.dbuf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531 while c < size:
INADA Naoki8d130912018-07-06 14:06:00 +0900532 # Skip underlying buffer to avoid unaligned double buffering.
533 if self.buf:
534 buf = self.buf
535 self.buf = b""
536 else:
537 buf = self.fileobj.read(self.bufsize)
538 if not buf:
539 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540 try:
541 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100542 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000543 raise ReadError("invalid compressed data")
hajoscher12a08c42018-07-04 10:13:18 +0200544 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200546 t = b"".join(t)
547 self.dbuf = t[size:]
548 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549
550 def __read(self, size):
551 """Return size bytes from stream. If internal buffer is empty,
552 read another block from the stream.
553 """
554 c = len(self.buf)
hajoscher12a08c42018-07-04 10:13:18 +0200555 t = [self.buf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 while c < size:
557 buf = self.fileobj.read(self.bufsize)
558 if not buf:
559 break
hajoscher12a08c42018-07-04 10:13:18 +0200560 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200562 t = b"".join(t)
563 self.buf = t[size:]
564 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565# class _Stream
566
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000567class _StreamProxy(object):
568 """Small proxy class that enables transparent compression
569 detection for the Stream interface (mode 'r|*').
570 """
571
572 def __init__(self, fileobj):
573 self.fileobj = fileobj
574 self.buf = self.fileobj.read(BLOCKSIZE)
575
576 def read(self, size):
577 self.read = self.fileobj.read
578 return self.buf
579
580 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100581 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000582 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100583 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000584 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100585 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
586 return "xz"
587 else:
588 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000589
590 def close(self):
591 self.fileobj.close()
592# class StreamProxy
593
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594#------------------------
595# Extraction file object
596#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000597class _FileInFile(object):
598 """A thin wrapper around an existing file object that
599 provides a part of its data as an individual file
600 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000601 """
602
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000603 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000604 self.fileobj = fileobj
605 self.offset = offset
606 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000607 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200608 self.name = getattr(fileobj, "name", None)
609 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000610
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000611 if blockinfo is None:
612 blockinfo = [(0, size)]
613
614 # Construct a map with data and zero blocks.
615 self.map_index = 0
616 self.map = []
617 lastpos = 0
618 realpos = self.offset
619 for offset, size in blockinfo:
620 if offset > lastpos:
621 self.map.append((False, lastpos, offset, None))
622 self.map.append((True, offset, offset + size, realpos))
623 realpos += size
624 lastpos = offset + size
625 if lastpos < self.size:
626 self.map.append((False, lastpos, self.size, None))
627
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200628 def flush(self):
629 pass
630
631 def readable(self):
632 return True
633
634 def writable(self):
635 return False
636
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000637 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000638 return self.fileobj.seekable()
639
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000640 def tell(self):
641 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000642 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000643 return self.position
644
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200645 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000646 """Seek to a position in the file.
647 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200648 if whence == io.SEEK_SET:
649 self.position = min(max(position, 0), self.size)
650 elif whence == io.SEEK_CUR:
651 if position < 0:
652 self.position = max(self.position + position, 0)
653 else:
654 self.position = min(self.position + position, self.size)
655 elif whence == io.SEEK_END:
656 self.position = max(min(self.size + position, self.size), 0)
657 else:
658 raise ValueError("Invalid argument")
659 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000660
661 def read(self, size=None):
662 """Read data from the file.
663 """
664 if size is None:
665 size = self.size - self.position
666 else:
667 size = min(size, self.size - self.position)
668
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000669 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000670 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000671 while True:
672 data, start, stop, offset = self.map[self.map_index]
673 if start <= self.position < stop:
674 break
675 else:
676 self.map_index += 1
677 if self.map_index == len(self.map):
678 self.map_index = 0
679 length = min(size, stop - self.position)
680 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000681 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200682 b = self.fileobj.read(length)
683 if len(b) != length:
684 raise ReadError("unexpected end of data")
685 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000686 else:
687 buf += NUL * length
688 size -= length
689 self.position += length
690 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000691
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200692 def readinto(self, b):
693 buf = self.read(len(b))
694 b[:len(buf)] = buf
695 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696
697 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200699#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000700
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200701class ExFileObject(io.BufferedReader):
702
703 def __init__(self, tarfile, tarinfo):
704 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
705 tarinfo.size, tarinfo.sparse)
706 super().__init__(fileobj)
707#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708
709#------------------
710# Exported Classes
711#------------------
712class TarInfo(object):
713 """Informational class which holds the details about an
714 archive member given by a tar header block.
715 TarInfo objects are returned by TarFile.getmember(),
716 TarFile.getmembers() and TarFile.gettarinfo() and are
717 usually created internally.
718 """
719
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000720 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
721 "chksum", "type", "linkname", "uname", "gname",
722 "devmajor", "devminor",
723 "offset", "offset_data", "pax_headers", "sparse",
724 "tarfile", "_sparse_structs", "_link_target")
725
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000726 def __init__(self, name=""):
727 """Construct a TarInfo object. name is the optional name
728 of the member.
729 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000730 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000731 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000732 self.uid = 0 # user id
733 self.gid = 0 # group id
734 self.size = 0 # file size
735 self.mtime = 0 # modification time
736 self.chksum = 0 # header checksum
737 self.type = REGTYPE # member type
738 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000739 self.uname = "" # user name
740 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000741 self.devmajor = 0 # device major number
742 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000743
Thomas Wouters477c8d52006-05-27 19:21:47 +0000744 self.offset = 0 # the tar header starts here
745 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000746
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000747 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000748 self.pax_headers = {} # pax header information
749
750 # In pax headers the "name" and "linkname" field are called
751 # "path" and "linkpath".
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200752 @property
753 def path(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000755
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200756 @path.setter
757 def path(self, name):
758 self.name = name
759
760 @property
761 def linkpath(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000762 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200763
764 @linkpath.setter
765 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000766 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000767
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000768 def __repr__(self):
769 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
770
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000771 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000772 """Return the TarInfo's attributes as a dictionary.
773 """
774 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000775 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000776 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777 "uid": self.uid,
778 "gid": self.gid,
779 "size": self.size,
780 "mtime": self.mtime,
781 "chksum": self.chksum,
782 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000783 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000784 "uname": self.uname,
785 "gname": self.gname,
786 "devmajor": self.devmajor,
787 "devminor": self.devminor
788 }
789
790 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
791 info["name"] += "/"
792
793 return info
794
Victor Stinnerde629d42010-05-05 21:43:57 +0000795 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000796 """Return a tar header as a string of 512 byte blocks.
797 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000798 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000799
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000801 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000803 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000805 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 else:
807 raise ValueError("invalid format")
808
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000809 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 """Return the object as a ustar header block.
811 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 info["magic"] = POSIX_MAGIC
813
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200814 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000815 raise ValueError("linkname is too long")
816
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200817 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
818 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000820 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000822 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 """Return the object as a GNU header block sequence.
824 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 info["magic"] = GNU_MAGIC
826
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000827 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200828 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000829 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200831 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000832 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000834 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000836 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837 """Return the object as a ustar header block. If it cannot be
838 represented this way, prepend a pax extended header sequence
839 with supplement information.
840 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841 info["magic"] = POSIX_MAGIC
842 pax_headers = self.pax_headers.copy()
843
844 # Test string fields for values that exceed the field length or cannot
845 # be represented in ASCII encoding.
846 for name, hname, length in (
847 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
848 ("uname", "uname", 32), ("gname", "gname", 32)):
849
Guido van Rossume7ba4952007-06-06 23:52:48 +0000850 if hname in pax_headers:
851 # The pax header has priority.
852 continue
853
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854 # Try to encode the string as ASCII.
855 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000856 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000858 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 continue
860
Guido van Rossume7ba4952007-06-06 23:52:48 +0000861 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000862 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863
864 # Test number fields for values that exceed the field limit or values
865 # that like to be stored as float.
866 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000867 if name in pax_headers:
868 # The pax header has priority. Avoid overflow.
869 info[name] = 0
870 continue
871
Guido van Rossumd8faa362007-04-27 19:54:29 +0000872 val = info[name]
873 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000874 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875 info[name] = 0
876
Guido van Rossume7ba4952007-06-06 23:52:48 +0000877 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000879 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000881 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000883 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
885 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000886 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 """Return the object as a pax global header block sequence.
888 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000889 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000890
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200891 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892 """Split a name longer than 100 chars into a prefix
893 and a name part.
894 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200895 components = name.split("/")
896 for i in range(1, len(components)):
897 prefix = "/".join(components[:i])
898 name = "/".join(components[i:])
899 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
900 len(name.encode(encoding, errors)) <= LENGTH_NAME:
901 break
902 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200904
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905 return prefix, name
906
907 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000908 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000909 """Return a header block. info is a dictionary with file
910 information, format must be one of the *_FORMAT constants.
911 """
912 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000913 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000914 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 itn(info.get("uid", 0), 8, format),
916 itn(info.get("gid", 0), 8, format),
917 itn(info.get("size", 0), 12, format),
918 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000919 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000920 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000921 stn(info.get("linkname", ""), 100, encoding, errors),
922 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000923 stn(info.get("uname", ""), 32, encoding, errors),
924 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925 itn(info.get("devmajor", 0), 8, format),
926 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000927 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 ]
929
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000930 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000932 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000933 return buf
934
935 @staticmethod
936 def _create_payload(payload):
937 """Return the string payload filled with zero bytes
938 up to the next 512 byte border.
939 """
940 blocks, remainder = divmod(len(payload), BLOCKSIZE)
941 if remainder > 0:
942 payload += (BLOCKSIZE - remainder) * NUL
943 return payload
944
945 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000946 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000947 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
948 for name.
949 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000950 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951
952 info = {}
953 info["name"] = "././@LongLink"
954 info["type"] = type
955 info["size"] = len(name)
956 info["magic"] = GNU_MAGIC
957
958 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000959 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000960 cls._create_payload(name)
961
962 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000963 def _create_pax_generic_header(cls, pax_headers, type, encoding):
964 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000965 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000966 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000967 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000968 # Check if one of the fields contains surrogate characters and thereby
969 # forces hdrcharset=BINARY, see _proc_pax() for more information.
970 binary = False
971 for keyword, value in pax_headers.items():
972 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000973 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000974 except UnicodeEncodeError:
975 binary = True
976 break
977
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000978 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000979 if binary:
980 # Put the hdrcharset field at the beginning of the header.
981 records += b"21 hdrcharset=BINARY\n"
982
Guido van Rossumd8faa362007-04-27 19:54:29 +0000983 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000984 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000985 if binary:
986 # Try to restore the original byte representation of `value'.
987 # Needless to say, that the encoding must match the string.
988 value = value.encode(encoding, "surrogateescape")
989 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000990 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000991
Guido van Rossumd8faa362007-04-27 19:54:29 +0000992 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
993 n = p = 0
994 while True:
995 n = l + len(str(p))
996 if n == p:
997 break
998 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +0000999 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001000
1001 # We use a hardcoded "././@PaxHeader" name like star does
1002 # instead of the one that POSIX recommends.
1003 info = {}
1004 info["name"] = "././@PaxHeader"
1005 info["type"] = type
1006 info["size"] = len(records)
1007 info["magic"] = POSIX_MAGIC
1008
1009 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001010 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001011 cls._create_payload(records)
1012
Guido van Rossum75b64e62005-01-16 00:16:11 +00001013 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001014 def frombuf(cls, buf, encoding, errors):
1015 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001016 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001017 if len(buf) == 0:
1018 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001019 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001020 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001021 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001022 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001023
1024 chksum = nti(buf[148:156])
1025 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001026 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027
Guido van Rossumd8faa362007-04-27 19:54:29 +00001028 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001029 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001030 obj.mode = nti(buf[100:108])
1031 obj.uid = nti(buf[108:116])
1032 obj.gid = nti(buf[116:124])
1033 obj.size = nti(buf[124:136])
1034 obj.mtime = nti(buf[136:148])
1035 obj.chksum = chksum
1036 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001037 obj.linkname = nts(buf[157:257], encoding, errors)
1038 obj.uname = nts(buf[265:297], encoding, errors)
1039 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040 obj.devmajor = nti(buf[329:337])
1041 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001042 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001043
Guido van Rossumd8faa362007-04-27 19:54:29 +00001044 # Old V7 tar format represents a directory as a regular
1045 # file with a trailing slash.
1046 if obj.type == AREGTYPE and obj.name.endswith("/"):
1047 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001048
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001049 # The old GNU sparse format occupies some of the unused
1050 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001051 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001052 if obj.type == GNUTYPE_SPARSE:
1053 pos = 386
1054 structs = []
1055 for i in range(4):
1056 try:
1057 offset = nti(buf[pos:pos + 12])
1058 numbytes = nti(buf[pos + 12:pos + 24])
1059 except ValueError:
1060 break
1061 structs.append((offset, numbytes))
1062 pos += 24
1063 isextended = bool(buf[482])
1064 origsize = nti(buf[483:495])
1065 obj._sparse_structs = (structs, isextended, origsize)
1066
Guido van Rossumd8faa362007-04-27 19:54:29 +00001067 # Remove redundant slashes from directories.
1068 if obj.isdir():
1069 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001070
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071 # Reconstruct a ustar longname.
1072 if prefix and obj.type not in GNU_TYPES:
1073 obj.name = prefix + "/" + obj.name
1074 return obj
1075
1076 @classmethod
1077 def fromtarfile(cls, tarfile):
1078 """Return the next TarInfo object from TarFile object
1079 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001080 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001081 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001082 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1084 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001085
Guido van Rossumd8faa362007-04-27 19:54:29 +00001086 #--------------------------------------------------------------------------
1087 # The following are methods that are called depending on the type of a
1088 # member. The entry point is _proc_member() which can be overridden in a
1089 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1090 # implement the following
1091 # operations:
1092 # 1. Set self.offset_data to the position where the data blocks begin,
1093 # if there is data that follows.
1094 # 2. Set tarfile.offset to the position where the next member's header will
1095 # begin.
1096 # 3. Return self or another valid TarInfo object.
1097 def _proc_member(self, tarfile):
1098 """Choose the right processing method depending on
1099 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001100 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001101 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1102 return self._proc_gnulong(tarfile)
1103 elif self.type == GNUTYPE_SPARSE:
1104 return self._proc_sparse(tarfile)
1105 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1106 return self._proc_pax(tarfile)
1107 else:
1108 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001109
Guido van Rossumd8faa362007-04-27 19:54:29 +00001110 def _proc_builtin(self, tarfile):
1111 """Process a builtin type or an unknown type which
1112 will be treated as a regular file.
1113 """
1114 self.offset_data = tarfile.fileobj.tell()
1115 offset = self.offset_data
1116 if self.isreg() or self.type not in SUPPORTED_TYPES:
1117 # Skip the following data blocks.
1118 offset += self._block(self.size)
1119 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001120
Guido van Rossume7ba4952007-06-06 23:52:48 +00001121 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001123 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001124
1125 return self
1126
1127 def _proc_gnulong(self, tarfile):
1128 """Process the blocks that hold a GNU longname
1129 or longlink member.
1130 """
1131 buf = tarfile.fileobj.read(self._block(self.size))
1132
1133 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001134 try:
1135 next = self.fromtarfile(tarfile)
1136 except HeaderError:
1137 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001138
1139 # Patch the TarInfo object from the next header with
1140 # the longname information.
1141 next.offset = self.offset
1142 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001143 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001144 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001145 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001146
1147 return next
1148
1149 def _proc_sparse(self, tarfile):
1150 """Process a GNU sparse header plus extra headers.
1151 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001152 # We already collected some sparse structures in frombuf().
1153 structs, isextended, origsize = self._sparse_structs
1154 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001155
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001156 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001157 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158 buf = tarfile.fileobj.read(BLOCKSIZE)
1159 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001160 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001161 try:
1162 offset = nti(buf[pos:pos + 12])
1163 numbytes = nti(buf[pos + 12:pos + 24])
1164 except ValueError:
1165 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001166 if offset and numbytes:
1167 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001168 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001169 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001170 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001171
1172 self.offset_data = tarfile.fileobj.tell()
1173 tarfile.offset = self.offset_data + self._block(self.size)
1174 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001175 return self
1176
1177 def _proc_pax(self, tarfile):
1178 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001179 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001180 """
1181 # Read the header information.
1182 buf = tarfile.fileobj.read(self._block(self.size))
1183
1184 # A pax header stores supplemental information for either
1185 # the following file (extended) or all following files
1186 # (global).
1187 if self.type == XGLTYPE:
1188 pax_headers = tarfile.pax_headers
1189 else:
1190 pax_headers = tarfile.pax_headers.copy()
1191
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001192 # Check if the pax header contains a hdrcharset field. This tells us
1193 # the encoding of the path, linkpath, uname and gname fields. Normally,
1194 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1195 # implementations are allowed to store them as raw binary strings if
1196 # the translation to UTF-8 fails.
1197 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1198 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001199 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001200
1201 # For the time being, we don't care about anything other than "BINARY".
1202 # The only other value that is currently allowed by the standard is
1203 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1204 hdrcharset = pax_headers.get("hdrcharset")
1205 if hdrcharset == "BINARY":
1206 encoding = tarfile.encoding
1207 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001208 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001209
Guido van Rossumd8faa362007-04-27 19:54:29 +00001210 # Parse pax header information. A record looks like that:
1211 # "%d %s=%s\n" % (length, keyword, value). length is the size
1212 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001213 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001214 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001215 pos = 0
1216 while True:
1217 match = regex.match(buf, pos)
1218 if not match:
1219 break
1220
1221 length, keyword = match.groups()
1222 length = int(length)
1223 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1224
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001225 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001226 # as the error handler, but we better not take the risk. For
1227 # example, GNU tar <= 1.23 is known to store filenames it cannot
1228 # translate to UTF-8 as raw strings (unfortunately without a
1229 # hdrcharset=BINARY header).
1230 # We first try the strict standard encoding, and if that fails we
1231 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001232 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001233 tarfile.errors)
1234 if keyword in PAX_NAME_FIELDS:
1235 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1236 tarfile.errors)
1237 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001238 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001239 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001240
1241 pax_headers[keyword] = value
1242 pos += length
1243
Guido van Rossume7ba4952007-06-06 23:52:48 +00001244 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001245 try:
1246 next = self.fromtarfile(tarfile)
1247 except HeaderError:
1248 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001249
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001250 # Process GNU sparse information.
1251 if "GNU.sparse.map" in pax_headers:
1252 # GNU extended sparse format version 0.1.
1253 self._proc_gnusparse_01(next, pax_headers)
1254
1255 elif "GNU.sparse.size" in pax_headers:
1256 # GNU extended sparse format version 0.0.
1257 self._proc_gnusparse_00(next, pax_headers, buf)
1258
1259 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1260 # GNU extended sparse format version 1.0.
1261 self._proc_gnusparse_10(next, pax_headers, tarfile)
1262
Guido van Rossume7ba4952007-06-06 23:52:48 +00001263 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001264 # Patch the TarInfo object with the extended header info.
1265 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1266 next.offset = self.offset
1267
1268 if "size" in pax_headers:
1269 # If the extended header replaces the size field,
1270 # we need to recalculate the offset where the next
1271 # header starts.
1272 offset = next.offset_data
1273 if next.isreg() or next.type not in SUPPORTED_TYPES:
1274 offset += next._block(next.size)
1275 tarfile.offset = offset
1276
1277 return next
1278
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001279 def _proc_gnusparse_00(self, next, pax_headers, buf):
1280 """Process a GNU tar extended sparse header, version 0.0.
1281 """
1282 offsets = []
1283 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1284 offsets.append(int(match.group(1)))
1285 numbytes = []
1286 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1287 numbytes.append(int(match.group(1)))
1288 next.sparse = list(zip(offsets, numbytes))
1289
1290 def _proc_gnusparse_01(self, next, pax_headers):
1291 """Process a GNU tar extended sparse header, version 0.1.
1292 """
1293 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1294 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1295
1296 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1297 """Process a GNU tar extended sparse header, version 1.0.
1298 """
1299 fields = None
1300 sparse = []
1301 buf = tarfile.fileobj.read(BLOCKSIZE)
1302 fields, buf = buf.split(b"\n", 1)
1303 fields = int(fields)
1304 while len(sparse) < fields * 2:
1305 if b"\n" not in buf:
1306 buf += tarfile.fileobj.read(BLOCKSIZE)
1307 number, buf = buf.split(b"\n", 1)
1308 sparse.append(int(number))
1309 next.offset_data = tarfile.fileobj.tell()
1310 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1311
Guido van Rossume7ba4952007-06-06 23:52:48 +00001312 def _apply_pax_info(self, pax_headers, encoding, errors):
1313 """Replace fields with supplemental information from a previous
1314 pax extended or global header.
1315 """
1316 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001317 if keyword == "GNU.sparse.name":
1318 setattr(self, "path", value)
1319 elif keyword == "GNU.sparse.size":
1320 setattr(self, "size", int(value))
1321 elif keyword == "GNU.sparse.realsize":
1322 setattr(self, "size", int(value))
1323 elif keyword in PAX_FIELDS:
1324 if keyword in PAX_NUMBER_FIELDS:
1325 try:
1326 value = PAX_NUMBER_FIELDS[keyword](value)
1327 except ValueError:
1328 value = 0
1329 if keyword == "path":
1330 value = value.rstrip("/")
1331 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001332
1333 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001334
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001335 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1336 """Decode a single field from a pax record.
1337 """
1338 try:
1339 return value.decode(encoding, "strict")
1340 except UnicodeDecodeError:
1341 return value.decode(fallback_encoding, fallback_errors)
1342
Guido van Rossumd8faa362007-04-27 19:54:29 +00001343 def _block(self, count):
1344 """Round up a byte count by BLOCKSIZE and return it,
1345 e.g. _block(834) => 1024.
1346 """
1347 blocks, remainder = divmod(count, BLOCKSIZE)
1348 if remainder:
1349 blocks += 1
1350 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001351
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001352 def isreg(self):
1353 return self.type in REGULAR_TYPES
1354 def isfile(self):
1355 return self.isreg()
1356 def isdir(self):
1357 return self.type == DIRTYPE
1358 def issym(self):
1359 return self.type == SYMTYPE
1360 def islnk(self):
1361 return self.type == LNKTYPE
1362 def ischr(self):
1363 return self.type == CHRTYPE
1364 def isblk(self):
1365 return self.type == BLKTYPE
1366 def isfifo(self):
1367 return self.type == FIFOTYPE
1368 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001369 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001370 def isdev(self):
1371 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1372# class TarInfo
1373
1374class TarFile(object):
1375 """The TarFile Class provides an interface to tar archives.
1376 """
1377
1378 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1379
1380 dereference = False # If true, add content of linked file to the
1381 # tar file, else the link.
1382
1383 ignore_zeros = False # If true, skips empty or invalid blocks and
1384 # continues processing.
1385
Lars Gustäbel365aff32009-12-13 11:42:29 +00001386 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001387 # messages (if debug >= 0). If > 0, errors
1388 # are passed to the caller as exceptions.
1389
Guido van Rossumd8faa362007-04-27 19:54:29 +00001390 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391
Guido van Rossume7ba4952007-06-06 23:52:48 +00001392 encoding = ENCODING # Encoding for 8-bit character strings.
1393
1394 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395
Guido van Rossumd8faa362007-04-27 19:54:29 +00001396 tarinfo = TarInfo # The default TarInfo class to use.
1397
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001398 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001399
1400 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1401 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001402 errors="surrogateescape", pax_headers=None, debug=None,
1403 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001404 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1405 read from an existing archive, 'a' to append data to an existing
1406 file or 'w' to create a new file overwriting an existing one. `mode'
1407 defaults to 'r'.
1408 If `fileobj' is given, it is used for reading or writing data. If it
1409 can be determined, `mode' is overridden by `fileobj's mode.
1410 `fileobj' is not closed, when TarFile is closed.
1411 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001412 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001413 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001414 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001415 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001416 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001417
1418 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001419 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001420 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001421 self.mode = "w"
1422 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001423 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001424 self._extfileobj = False
1425 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001426 if (name is None and hasattr(fileobj, "name") and
1427 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001428 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001429 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001430 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001431 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001432 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001433 self.fileobj = fileobj
1434
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 # Init attributes.
1436 if format is not None:
1437 self.format = format
1438 if tarinfo is not None:
1439 self.tarinfo = tarinfo
1440 if dereference is not None:
1441 self.dereference = dereference
1442 if ignore_zeros is not None:
1443 self.ignore_zeros = ignore_zeros
1444 if encoding is not None:
1445 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001446 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001447
1448 if pax_headers is not None and self.format == PAX_FORMAT:
1449 self.pax_headers = pax_headers
1450 else:
1451 self.pax_headers = {}
1452
Guido van Rossumd8faa362007-04-27 19:54:29 +00001453 if debug is not None:
1454 self.debug = debug
1455 if errorlevel is not None:
1456 self.errorlevel = errorlevel
1457
1458 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001459 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001460 self.closed = False
1461 self.members = [] # list of members as TarInfo objects
1462 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001463 self.offset = self.fileobj.tell()
1464 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001465 self.inodes = {} # dictionary caching the inodes of
1466 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001467
Lars Gustäbel7b465392009-11-18 20:29:25 +00001468 try:
1469 if self.mode == "r":
1470 self.firstmember = None
1471 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472
Lars Gustäbel7b465392009-11-18 20:29:25 +00001473 if self.mode == "a":
1474 # Move to the end of the archive,
1475 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001476 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001477 self.fileobj.seek(self.offset)
1478 try:
1479 tarinfo = self.tarinfo.fromtarfile(self)
1480 self.members.append(tarinfo)
1481 except EOFHeaderError:
1482 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001483 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001484 except HeaderError as e:
1485 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001486
Lars Gustäbel20703c62015-05-27 12:53:44 +02001487 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001488 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
Lars Gustäbel7b465392009-11-18 20:29:25 +00001490 if self.pax_headers:
1491 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1492 self.fileobj.write(buf)
1493 self.offset += len(buf)
1494 except:
1495 if not self._extfileobj:
1496 self.fileobj.close()
1497 self.closed = True
1498 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001500 #--------------------------------------------------------------------------
1501 # Below are the classmethods which act as alternate constructors to the
1502 # TarFile class. The open() method is the only one that is needed for
1503 # public use; it is the "super"-constructor and is able to select an
1504 # adequate "sub"-constructor for a particular compression using the mapping
1505 # from OPEN_METH.
1506 #
1507 # This concept allows one to subclass TarFile without losing the comfort of
1508 # the super-constructor. A sub-constructor is registered and made available
1509 # by adding it to the mapping in OPEN_METH.
1510
Guido van Rossum75b64e62005-01-16 00:16:11 +00001511 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001512 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513 """Open a tar archive for reading, writing or appending. Return
1514 an appropriate TarFile class.
1515
1516 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001517 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518 'r:' open for reading exclusively uncompressed
1519 'r:gz' open for reading with gzip compression
1520 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001521 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001522 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 'w' or 'w:' open for writing without compression
1524 'w:gz' open for writing with gzip compression
1525 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001526 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001527
Berker Peksag0fe63252015-02-13 21:02:12 +02001528 'x' or 'x:' create a tarfile exclusively without compression, raise
1529 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001530 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001531 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001532 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001533 if the file is already created
1534 'x:xz' create an lzma compressed tarfile, raise an exception
1535 if the file is already created
1536
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001537 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001538 'r|' open an uncompressed stream of tar blocks for reading
1539 'r|gz' open a gzip compressed stream of tar blocks
1540 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001541 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542 'w|' open an uncompressed stream for writing
1543 'w|gz' open a gzip compressed stream for writing
1544 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001545 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 """
1547
1548 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001549 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001551 if mode in ("r", "r:*"):
1552 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001553 def not_compressed(comptype):
1554 return cls.OPEN_METH[comptype] == 'taropen'
1555 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001556 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001557 if fileobj is not None:
1558 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001559 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001560 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001561 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001562 if fileobj is not None:
1563 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001564 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566
1567 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001568 filemode, comptype = mode.split(":", 1)
1569 filemode = filemode or "r"
1570 comptype = comptype or "tar"
1571
1572 # Select the *open() function according to
1573 # given compression.
1574 if comptype in cls.OPEN_METH:
1575 func = getattr(cls, cls.OPEN_METH[comptype])
1576 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001577 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001578 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001579
1580 elif "|" in mode:
1581 filemode, comptype = mode.split("|", 1)
1582 filemode = filemode or "r"
1583 comptype = comptype or "tar"
1584
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001585 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001586 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587
Antoine Pitrou605c2932010-09-23 20:15:14 +00001588 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1589 try:
1590 t = cls(name, filemode, stream, **kwargs)
1591 except:
1592 stream.close()
1593 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001594 t._extfileobj = False
1595 return t
1596
Berker Peksag0fe63252015-02-13 21:02:12 +02001597 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001598 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001599
Thomas Wouters477c8d52006-05-27 19:21:47 +00001600 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001601
Guido van Rossum75b64e62005-01-16 00:16:11 +00001602 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001603 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001604 """Open uncompressed tar archive name for reading or writing.
1605 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001606 if mode not in ("r", "a", "w", "x"):
1607 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001608 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001609
Guido van Rossum75b64e62005-01-16 00:16:11 +00001610 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001611 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612 """Open gzip compressed tar archive name for reading or writing.
1613 Appending is not allowed.
1614 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001615 if mode not in ("r", "w", "x"):
1616 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001617
1618 try:
1619 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001620 gzip.GzipFile
1621 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001622 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001623
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001624 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001625 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001626 except OSError:
1627 if fileobj is not None and mode == 'r':
1628 raise ReadError("not a gzip file")
1629 raise
1630
1631 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001632 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001633 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001634 fileobj.close()
1635 if mode == 'r':
1636 raise ReadError("not a gzip file")
1637 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001638 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001639 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001640 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001641 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642 return t
1643
Guido van Rossum75b64e62005-01-16 00:16:11 +00001644 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001645 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646 """Open bzip2 compressed tar archive name for reading or writing.
1647 Appending is not allowed.
1648 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001649 if mode not in ("r", "w", "x"):
1650 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651
1652 try:
1653 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001654 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001655 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001656
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001657 fileobj = bz2.BZ2File(fileobj or name, mode,
1658 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659
1660 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001661 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001662 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001663 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001664 if mode == 'r':
1665 raise ReadError("not a bzip2 file")
1666 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001667 except:
1668 fileobj.close()
1669 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001670 t._extfileobj = False
1671 return t
1672
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001673 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001674 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001675 """Open lzma compressed tar archive name for reading or writing.
1676 Appending is not allowed.
1677 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001678 if mode not in ("r", "w", "x"):
1679 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001680
1681 try:
1682 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001683 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001684 raise CompressionError("lzma module is not available")
1685
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001686 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001687
1688 try:
1689 t = cls.taropen(name, mode, fileobj, **kwargs)
1690 except (lzma.LZMAError, EOFError):
1691 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001692 if mode == 'r':
1693 raise ReadError("not an lzma file")
1694 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001695 except:
1696 fileobj.close()
1697 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001698 t._extfileobj = False
1699 return t
1700
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701 # All *open() methods are registered here.
1702 OPEN_METH = {
1703 "tar": "taropen", # uncompressed tar
1704 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001705 "bz2": "bz2open", # bzip2 compressed tar
1706 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707 }
1708
1709 #--------------------------------------------------------------------------
1710 # The public methods which TarFile provides:
1711
1712 def close(self):
1713 """Close the TarFile. In write-mode, two finishing zero blocks are
1714 appended to the archive.
1715 """
1716 if self.closed:
1717 return
1718
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001720 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001721 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001722 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1723 self.offset += (BLOCKSIZE * 2)
1724 # fill up the end with zero-blocks
1725 # (like option -b20 for tar does)
1726 blocks, remainder = divmod(self.offset, RECORDSIZE)
1727 if remainder > 0:
1728 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1729 finally:
1730 if not self._extfileobj:
1731 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001732
1733 def getmember(self, name):
1734 """Return a TarInfo object for member `name'. If `name' can not be
1735 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001736 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001737 most up-to-date version.
1738 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001739 tarinfo = self._getmember(name)
1740 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001741 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001742 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001743
1744 def getmembers(self):
1745 """Return the members of the archive as a list of TarInfo objects. The
1746 list has the same order as the members in the archive.
1747 """
1748 self._check()
1749 if not self._loaded: # if we want to obtain a list of
1750 self._load() # all members, we first have to
1751 # scan the whole archive.
1752 return self.members
1753
1754 def getnames(self):
1755 """Return the members of the archive as a list of their names. It has
1756 the same order as the list returned by getmembers().
1757 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001758 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001759
1760 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001761 """Create a TarInfo object from the result of os.stat or equivalent
1762 on an existing file. The file is either named by `name', or
1763 specified as a file object `fileobj' with a file descriptor. If
1764 given, `arcname' specifies an alternative name for the file in the
1765 archive, otherwise, the name is taken from the 'name' attribute of
1766 'fileobj', or the 'name' argument. The name should be a text
1767 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001769 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770
1771 # When fileobj is given, replace name by
1772 # fileobj's real name.
1773 if fileobj is not None:
1774 name = fileobj.name
1775
1776 # Building the name of the member in the archive.
1777 # Backward slashes are converted to forward slashes,
1778 # Absolute paths are turned to relative paths.
1779 if arcname is None:
1780 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001782 arcname = arcname.replace(os.sep, "/")
1783 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784
1785 # Now, fill the TarInfo object with
1786 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001787 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001788 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789
1790 # Use os.stat or os.lstat, depending on platform
1791 # and if symlinks shall be resolved.
1792 if fileobj is None:
1793 if hasattr(os, "lstat") and not self.dereference:
1794 statres = os.lstat(name)
1795 else:
1796 statres = os.stat(name)
1797 else:
1798 statres = os.fstat(fileobj.fileno())
1799 linkname = ""
1800
1801 stmd = statres.st_mode
1802 if stat.S_ISREG(stmd):
1803 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001804 if not self.dereference and statres.st_nlink > 1 and \
1805 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 # Is it a hardlink to an already
1807 # archived file?
1808 type = LNKTYPE
1809 linkname = self.inodes[inode]
1810 else:
1811 # The inode is added only if its valid.
1812 # For win32 it is always 0.
1813 type = REGTYPE
1814 if inode[0]:
1815 self.inodes[inode] = arcname
1816 elif stat.S_ISDIR(stmd):
1817 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818 elif stat.S_ISFIFO(stmd):
1819 type = FIFOTYPE
1820 elif stat.S_ISLNK(stmd):
1821 type = SYMTYPE
1822 linkname = os.readlink(name)
1823 elif stat.S_ISCHR(stmd):
1824 type = CHRTYPE
1825 elif stat.S_ISBLK(stmd):
1826 type = BLKTYPE
1827 else:
1828 return None
1829
1830 # Fill the TarInfo object with all
1831 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001832 tarinfo.name = arcname
1833 tarinfo.mode = stmd
1834 tarinfo.uid = statres.st_uid
1835 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001836 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001837 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001838 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001839 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001841 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842 tarinfo.linkname = linkname
1843 if pwd:
1844 try:
1845 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1846 except KeyError:
1847 pass
1848 if grp:
1849 try:
1850 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1851 except KeyError:
1852 pass
1853
1854 if type in (CHRTYPE, BLKTYPE):
1855 if hasattr(os, "major") and hasattr(os, "minor"):
1856 tarinfo.devmajor = os.major(statres.st_rdev)
1857 tarinfo.devminor = os.minor(statres.st_rdev)
1858 return tarinfo
1859
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001860 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 """Print a table of contents to sys.stdout. If `verbose' is False, only
1862 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001863 output is produced. `members' is optional and must be a subset of the
1864 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 """
1866 self._check()
1867
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001868 if members is None:
1869 members = self
1870 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001872 _safe_print(stat.filemode(tarinfo.mode))
1873 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1874 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001876 _safe_print("%10s" %
1877 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001879 _safe_print("%10d" % tarinfo.size)
1880 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1881 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001883 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884
1885 if verbose:
1886 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001887 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001889 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001890 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001891
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001892 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 """Add the file `name' to the archive. `name' may be any type of file
1894 (directory, fifo, symbolic link, etc.). If given, `arcname'
1895 specifies an alternative name for the file in the archive.
1896 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001897 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001898 that expects a TarInfo object argument and returns the changed
1899 TarInfo object, if it returns None the TarInfo object will be
1900 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001902 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903
1904 if arcname is None:
1905 arcname = name
1906
1907 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001908 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 self._dbg(2, "tarfile: Skipped %r" % name)
1910 return
1911
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001912 self._dbg(1, name)
1913
1914 # Create a TarInfo object from the file.
1915 tarinfo = self.gettarinfo(name, arcname)
1916
1917 if tarinfo is None:
1918 self._dbg(1, "tarfile: Unsupported type %r" % name)
1919 return
1920
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001921 # Change or exclude the TarInfo object.
1922 if filter is not None:
1923 tarinfo = filter(tarinfo)
1924 if tarinfo is None:
1925 self._dbg(2, "tarfile: Excluded %r" % name)
1926 return
1927
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001928 # Append the tar header and data to the archive.
1929 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001930 with bltn_open(name, "rb") as f:
1931 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001933 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934 self.addfile(tarinfo)
1935 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001936 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001937 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001938 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001940 else:
1941 self.addfile(tarinfo)
1942
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001943 def addfile(self, tarinfo, fileobj=None):
1944 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001945 given, it should be a binary file, and tarinfo.size bytes are read
1946 from it and added to the archive. You can create TarInfo objects
1947 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001949 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950
Thomas Wouters89f507f2006-12-13 04:49:30 +00001951 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952
Guido van Rossume7ba4952007-06-06 23:52:48 +00001953 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001954 self.fileobj.write(buf)
1955 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001956 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 # If there's data to follow, append it.
1958 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001959 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1961 if remainder > 0:
1962 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1963 blocks += 1
1964 self.offset += blocks * BLOCKSIZE
1965
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001966 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967
Eric V. Smith7a803892015-04-15 10:27:58 -04001968 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001969 """Extract all members from the archive to the current working
1970 directory and set owner, modification time and permissions on
1971 directories afterwards. `path' specifies a different directory
1972 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001973 list returned by getmembers(). If `numeric_owner` is True, only
1974 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001975 """
1976 directories = []
1977
1978 if members is None:
1979 members = self
1980
1981 for tarinfo in members:
1982 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001983 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001984 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001985 tarinfo = copy.copy(tarinfo)
1986 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001987 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04001988 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
1989 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001990
1991 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001992 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001993 directories.reverse()
1994
1995 # Set correct owner, mtime and filemode on directories.
1996 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001997 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001998 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04001999 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002000 self.utime(tarinfo, dirpath)
2001 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002002 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002003 if self.errorlevel > 1:
2004 raise
2005 else:
2006 self._dbg(1, "tarfile: %s" % e)
2007
Eric V. Smith7a803892015-04-15 10:27:58 -04002008 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002009 """Extract a member from the archive to the current working directory,
2010 using its full name. Its file information is extracted as accurately
2011 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002012 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002013 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2014 is True, only the numbers for user/group names are used and not
2015 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002016 """
2017 self._check("r")
2018
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002019 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002021 else:
2022 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002023
Neal Norwitza4f651a2004-07-20 22:07:44 +00002024 # Prepare the link target for makelink().
2025 if tarinfo.islnk():
2026 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2027
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002028 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002029 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002030 set_attrs=set_attrs,
2031 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002032 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002033 if self.errorlevel > 0:
2034 raise
2035 else:
2036 if e.filename is None:
2037 self._dbg(1, "tarfile: %s" % e.strerror)
2038 else:
2039 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002040 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041 if self.errorlevel > 1:
2042 raise
2043 else:
2044 self._dbg(1, "tarfile: %s" % e)
2045
2046 def extractfile(self, member):
2047 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002048 a filename or a TarInfo object. If `member' is a regular file or a
2049 link, an io.BufferedReader object is returned. Otherwise, None is
2050 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051 """
2052 self._check("r")
2053
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002054 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002056 else:
2057 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002059 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2060 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002061 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062
2063 elif tarinfo.islnk() or tarinfo.issym():
2064 if isinstance(self.fileobj, _Stream):
2065 # A small but ugly workaround for the case that someone tries
2066 # to extract a (sym)link as a file-object from a non-seekable
2067 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002068 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002070 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002071 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002072 else:
2073 # If there's no data associated with the member (directory, chrdev,
2074 # blkdev, etc.), return None instead of a file object.
2075 return None
2076
Eric V. Smith7a803892015-04-15 10:27:58 -04002077 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2078 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079 """Extract the TarInfo object tarinfo to a physical
2080 file called targetpath.
2081 """
2082 # Fetch the TarInfo object for the given name
2083 # and build the destination pathname, replacing
2084 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002085 targetpath = targetpath.rstrip("/")
2086 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087
2088 # Create all upper directories.
2089 upperdirs = os.path.dirname(targetpath)
2090 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002091 # Create directories that are not part of the archive with
2092 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002093 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002094
2095 if tarinfo.islnk() or tarinfo.issym():
2096 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2097 else:
2098 self._dbg(1, tarinfo.name)
2099
2100 if tarinfo.isreg():
2101 self.makefile(tarinfo, targetpath)
2102 elif tarinfo.isdir():
2103 self.makedir(tarinfo, targetpath)
2104 elif tarinfo.isfifo():
2105 self.makefifo(tarinfo, targetpath)
2106 elif tarinfo.ischr() or tarinfo.isblk():
2107 self.makedev(tarinfo, targetpath)
2108 elif tarinfo.islnk() or tarinfo.issym():
2109 self.makelink(tarinfo, targetpath)
2110 elif tarinfo.type not in SUPPORTED_TYPES:
2111 self.makeunknown(tarinfo, targetpath)
2112 else:
2113 self.makefile(tarinfo, targetpath)
2114
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002115 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002116 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002117 if not tarinfo.issym():
2118 self.chmod(tarinfo, targetpath)
2119 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002120
2121 #--------------------------------------------------------------------------
2122 # Below are the different file methods. They are called via
2123 # _extract_member() when extract() is called. They can be replaced in a
2124 # subclass to implement other functionality.
2125
2126 def makedir(self, tarinfo, targetpath):
2127 """Make a directory called targetpath.
2128 """
2129 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002130 # Use a safe mode for the directory, the real mode is set
2131 # later in _extract_member().
2132 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002133 except FileExistsError:
2134 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002135
2136 def makefile(self, tarinfo, targetpath):
2137 """Make a file called targetpath.
2138 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002139 source = self.fileobj
2140 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002141 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002142 with bltn_open(targetpath, "wb") as target:
2143 if tarinfo.sparse is not None:
2144 for offset, size in tarinfo.sparse:
2145 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002146 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002147 target.seek(tarinfo.size)
2148 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002149 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002150 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002151
2152 def makeunknown(self, tarinfo, targetpath):
2153 """Make a file from a TarInfo object with an unknown type
2154 at targetpath.
2155 """
2156 self.makefile(tarinfo, targetpath)
2157 self._dbg(1, "tarfile: Unknown file type %r, " \
2158 "extracted as regular file." % tarinfo.type)
2159
2160 def makefifo(self, tarinfo, targetpath):
2161 """Make a fifo called targetpath.
2162 """
2163 if hasattr(os, "mkfifo"):
2164 os.mkfifo(targetpath)
2165 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002166 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002167
2168 def makedev(self, tarinfo, targetpath):
2169 """Make a character or block device called targetpath.
2170 """
2171 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002172 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002173
2174 mode = tarinfo.mode
2175 if tarinfo.isblk():
2176 mode |= stat.S_IFBLK
2177 else:
2178 mode |= stat.S_IFCHR
2179
2180 os.mknod(targetpath, mode,
2181 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2182
2183 def makelink(self, tarinfo, targetpath):
2184 """Make a (symbolic) link called targetpath. If it cannot be created
2185 (platform limitation), we try to make a copy of the referenced file
2186 instead of a link.
2187 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002188 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002189 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002190 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002191 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002192 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002193 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002194 if os.path.exists(tarinfo._link_target):
2195 os.link(tarinfo._link_target, targetpath)
2196 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002197 self._extract_member(self._find_link_target(tarinfo),
2198 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002199 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002200 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002201 self._extract_member(self._find_link_target(tarinfo),
2202 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002203 except KeyError:
2204 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002205
Eric V. Smith7a803892015-04-15 10:27:58 -04002206 def chown(self, tarinfo, targetpath, numeric_owner):
2207 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002208 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2209 is False, fall back to .gid/.uid when the search based on name
2210 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002211 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002212 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002213 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002214 g = tarinfo.gid
2215 u = tarinfo.uid
2216 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002217 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002218 if grp:
2219 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002220 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002221 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002222 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002223 if pwd:
2224 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002225 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002226 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227 try:
2228 if tarinfo.issym() and hasattr(os, "lchown"):
2229 os.lchown(targetpath, u, g)
2230 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002231 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002232 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002234
2235 def chmod(self, tarinfo, targetpath):
2236 """Set file permissions of targetpath according to tarinfo.
2237 """
Jack Jansen834eff62003-03-07 12:47:06 +00002238 if hasattr(os, 'chmod'):
2239 try:
2240 os.chmod(targetpath, tarinfo.mode)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002241 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002242 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243
2244 def utime(self, tarinfo, targetpath):
2245 """Set modification time of targetpath according to tarinfo.
2246 """
Jack Jansen834eff62003-03-07 12:47:06 +00002247 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002248 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002249 try:
2250 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002251 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002252 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253
2254 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002255 def next(self):
2256 """Return the next member of the archive as a TarInfo object, when
2257 TarFile is opened for reading. Return None if there is no more
2258 available.
2259 """
2260 self._check("ra")
2261 if self.firstmember is not None:
2262 m = self.firstmember
2263 self.firstmember = None
2264 return m
2265
Lars Gustäbel03572682015-07-06 09:27:24 +02002266 # Advance the file pointer.
2267 if self.offset != self.fileobj.tell():
2268 self.fileobj.seek(self.offset - 1)
2269 if not self.fileobj.read(1):
2270 raise ReadError("unexpected end of data")
2271
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002272 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002273 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002274 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002275 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002276 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002277 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002279 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280 self.offset += BLOCKSIZE
2281 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002282 except InvalidHeaderError as e:
2283 if self.ignore_zeros:
2284 self._dbg(2, "0x%X: %s" % (self.offset, e))
2285 self.offset += BLOCKSIZE
2286 continue
2287 elif self.offset == 0:
2288 raise ReadError(str(e))
2289 except EmptyHeaderError:
2290 if self.offset == 0:
2291 raise ReadError("empty file")
2292 except TruncatedHeaderError as e:
2293 if self.offset == 0:
2294 raise ReadError(str(e))
2295 except SubsequentHeaderError as e:
2296 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002297 break
2298
Lars Gustäbel9520a432009-11-22 18:48:49 +00002299 if tarinfo is not None:
2300 self.members.append(tarinfo)
2301 else:
2302 self._loaded = True
2303
Thomas Wouters477c8d52006-05-27 19:21:47 +00002304 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305
2306 #--------------------------------------------------------------------------
2307 # Little helper methods:
2308
Lars Gustäbel1b512722010-06-03 12:45:16 +00002309 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002310 """Find an archive member by name from bottom to top.
2311 If tarinfo is given, it is used as the starting point.
2312 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002313 # Ensure that all members have been loaded.
2314 members = self.getmembers()
2315
Lars Gustäbel1b512722010-06-03 12:45:16 +00002316 # Limit the member search list up to tarinfo.
2317 if tarinfo is not None:
2318 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002319
Lars Gustäbel1b512722010-06-03 12:45:16 +00002320 if normalize:
2321 name = os.path.normpath(name)
2322
2323 for member in reversed(members):
2324 if normalize:
2325 member_name = os.path.normpath(member.name)
2326 else:
2327 member_name = member.name
2328
2329 if name == member_name:
2330 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002331
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002332 def _load(self):
2333 """Read through the entire archive file and look for readable
2334 members.
2335 """
2336 while True:
2337 tarinfo = self.next()
2338 if tarinfo is None:
2339 break
2340 self._loaded = True
2341
2342 def _check(self, mode=None):
2343 """Check if TarFile is still open, and if the operation's mode
2344 corresponds to TarFile's mode.
2345 """
2346 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002347 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002348 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002349 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002350
Lars Gustäbel1b512722010-06-03 12:45:16 +00002351 def _find_link_target(self, tarinfo):
2352 """Find the target member of a symlink or hardlink member in the
2353 archive.
2354 """
2355 if tarinfo.issym():
2356 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002357 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002358 limit = None
2359 else:
2360 # Search the archive before the link, because a hard link is
2361 # just a reference to an already archived file.
2362 linkname = tarinfo.linkname
2363 limit = tarinfo
2364
2365 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2366 if member is None:
2367 raise KeyError("linkname %r not found" % linkname)
2368 return member
2369
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002370 def __iter__(self):
2371 """Provide an iterator object.
2372 """
2373 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002374 yield from self.members
2375 return
2376
2377 # Yield items using TarFile's next() method.
2378 # When all members have been read, set TarFile as _loaded.
2379 index = 0
2380 # Fix for SF #1100429: Under rare circumstances it can
2381 # happen that getmembers() is called during iteration,
2382 # which will have already exhausted the next() method.
2383 if self.firstmember is not None:
2384 tarinfo = self.next()
2385 index += 1
2386 yield tarinfo
2387
2388 while True:
2389 if index < len(self.members):
2390 tarinfo = self.members[index]
2391 elif not self._loaded:
2392 tarinfo = self.next()
2393 if not tarinfo:
2394 self._loaded = True
2395 return
2396 else:
2397 return
2398 index += 1
2399 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002400
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002401 def _dbg(self, level, msg):
2402 """Write debugging output to sys.stderr.
2403 """
2404 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002405 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002406
2407 def __enter__(self):
2408 self._check()
2409 return self
2410
2411 def __exit__(self, type, value, traceback):
2412 if type is None:
2413 self.close()
2414 else:
2415 # An exception occurred. We must not call close() because
2416 # it would try to write end-of-archive blocks and padding.
2417 if not self._extfileobj:
2418 self.fileobj.close()
2419 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002420
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002421#--------------------
2422# exported functions
2423#--------------------
2424def is_tarfile(name):
2425 """Return True if name points to a tar archive that we
2426 are able to handle, else return False.
2427 """
2428 try:
2429 t = open(name)
2430 t.close()
2431 return True
2432 except TarError:
2433 return False
2434
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002435open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002436
2437
2438def main():
2439 import argparse
2440
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002441 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002442 parser = argparse.ArgumentParser(description=description)
2443 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2444 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002445 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002446 group.add_argument('-l', '--list', metavar='<tarfile>',
2447 help='Show listing of a tarfile')
2448 group.add_argument('-e', '--extract', nargs='+',
2449 metavar=('<tarfile>', '<output_dir>'),
2450 help='Extract tarfile into target dir')
2451 group.add_argument('-c', '--create', nargs='+',
2452 metavar=('<name>', '<file>'),
2453 help='Create tarfile from sources')
2454 group.add_argument('-t', '--test', metavar='<tarfile>',
2455 help='Test if a tarfile is valid')
2456 args = parser.parse_args()
2457
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002458 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002459 src = args.test
2460 if is_tarfile(src):
2461 with open(src, 'r') as tar:
2462 tar.getmembers()
2463 print(tar.getmembers(), file=sys.stderr)
2464 if args.verbose:
2465 print('{!r} is a tar archive.'.format(src))
2466 else:
2467 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2468
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002469 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002470 src = args.list
2471 if is_tarfile(src):
2472 with TarFile.open(src, 'r:*') as tf:
2473 tf.list(verbose=args.verbose)
2474 else:
2475 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2476
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002477 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002478 if len(args.extract) == 1:
2479 src = args.extract[0]
2480 curdir = os.curdir
2481 elif len(args.extract) == 2:
2482 src, curdir = args.extract
2483 else:
2484 parser.exit(1, parser.format_help())
2485
2486 if is_tarfile(src):
2487 with TarFile.open(src, 'r:*') as tf:
2488 tf.extractall(path=curdir)
2489 if args.verbose:
2490 if curdir == '.':
2491 msg = '{!r} file is extracted.'.format(src)
2492 else:
2493 msg = ('{!r} file is extracted '
2494 'into {!r} directory.').format(src, curdir)
2495 print(msg)
2496 else:
2497 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2498
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002499 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002500 tar_name = args.create.pop(0)
2501 _, ext = os.path.splitext(tar_name)
2502 compressions = {
2503 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002504 '.gz': 'gz',
2505 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002506 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002507 '.xz': 'xz',
2508 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002509 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002510 '.bz2': 'bz2',
2511 '.tbz': 'bz2',
2512 '.tbz2': 'bz2',
2513 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002514 }
2515 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2516 tar_files = args.create
2517
2518 with TarFile.open(tar_name, tar_mode) as tf:
2519 for file_name in tar_files:
2520 tf.add(file_name)
2521
2522 if args.verbose:
2523 print('{!r} file created.'.format(tar_name))
2524
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002525if __name__ == '__main__':
2526 main()