blob: 1d15612616f1d8ca7ec3d21c028b8495bbe87302 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
CAM Gerlache680c3d2019-03-21 09:44:51 -0500108DEFAULT_FORMAT = PAX_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200259def _safe_print(s):
260 encoding = getattr(sys.stdout, 'encoding', None)
261 if encoding is not None:
262 s = s.encode(encoding, 'backslashreplace').decode(encoding)
263 print(s, end=' ')
264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266class TarError(Exception):
267 """Base exception."""
268 pass
269class ExtractError(TarError):
270 """General exception for extract errors."""
271 pass
272class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300273 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 pass
275class CompressionError(TarError):
276 """Exception for unavailable compression methods."""
277 pass
278class StreamError(TarError):
279 """Exception for unsupported operations on stream-like TarFiles."""
280 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000281class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000282 """Base exception for header errors."""
283 pass
284class EmptyHeaderError(HeaderError):
285 """Exception for empty headers."""
286 pass
287class TruncatedHeaderError(HeaderError):
288 """Exception for truncated headers."""
289 pass
290class EOFHeaderError(HeaderError):
291 """Exception for end of file headers."""
292 pass
293class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000294 """Exception for invalid headers."""
295 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000296class SubsequentHeaderError(HeaderError):
297 """Exception for missing and invalid extended headers."""
298 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304 """Low-level file object. Supports reading and writing.
305 It is used instead of a regular file object for streaming
306 access.
307 """
308
309 def __init__(self, name, mode):
310 mode = {
311 "r": os.O_RDONLY,
312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313 }[mode]
314 if hasattr(os, "O_BINARY"):
315 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000316 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318 def close(self):
319 os.close(self.fd)
320
321 def read(self, size):
322 return os.read(self.fd, size)
323
324 def write(self, s):
325 os.write(self.fd, s)
326
327class _Stream:
328 """Class that serves as an adapter between TarFile and
329 a stream-like object. The stream-like object only
330 needs to have a read() or write() method and is accessed
331 blockwise. Use of gzip or bzip2 compression is possible.
332 A stream-like object could be for example: sys.stdin,
333 sys.stdout, a socket, a tape device etc.
334
335 _Stream is intended to be used only internally.
336 """
337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000339 """Construct a _Stream object.
340 """
341 self._extfileobj = True
342 if fileobj is None:
343 fileobj = _LowLevelFile(name, mode)
344 self._extfileobj = False
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if comptype == '*':
347 # Enable transparent compression detection for the
348 # stream interface
349 fileobj = _StreamProxy(fileobj)
350 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 self.name = name or ""
353 self.mode = mode
354 self.comptype = comptype
355 self.fileobj = fileobj
356 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000357 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000358 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.closed = False
360
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 try:
362 if comptype == "gz":
363 try:
364 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400365 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000366 raise CompressionError("zlib module is not available")
367 self.zlib = zlib
368 self.crc = zlib.crc32(b"")
369 if mode == "r":
370 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100371 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 else:
373 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000374
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100375 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000376 try:
377 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400378 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 raise CompressionError("bz2 module is not available")
380 if mode == "r":
381 self.dbuf = b""
382 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 else:
385 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386
387 elif comptype == "xz":
388 try:
389 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400390 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100391 raise CompressionError("lzma module is not available")
392 if mode == "r":
393 self.dbuf = b""
394 self.cmp = lzma.LZMADecompressor()
395 self.exception = lzma.LZMAError
396 else:
397 self.cmp = lzma.LZMACompressor()
398
399 elif comptype != "tar":
400 raise CompressionError("unknown compression type %r" % comptype)
401
Antoine Pitrou605c2932010-09-23 20:15:14 +0000402 except:
403 if not self._extfileobj:
404 self.fileobj.close()
405 self.closed = True
406 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407
408 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000409 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 self.close()
411
412 def _init_write_gz(self):
413 """Initialize for writing with gzip compression.
414 """
415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416 -self.zlib.MAX_WBITS,
417 self.zlib.DEF_MEM_LEVEL,
418 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000419 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if self.name.endswith(".gz"):
422 self.name = self.name[:-3]
Artem Bulgakov22748a82020-09-07 19:46:33 +0300423 # Honor "directory components removed" from RFC1952
424 self.name = os.path.basename(self.name)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000425 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
426 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000427
428 def write(self, s):
429 """Write string s to the stream.
430 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000431 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432 self.crc = self.zlib.crc32(s, self.crc)
433 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000434 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000435 s = self.cmp.compress(s)
436 self.__write(s)
437
438 def __write(self, s):
439 """Write string s to the stream if a whole new block
440 is ready to be written.
441 """
442 self.buf += s
443 while len(self.buf) > self.bufsize:
444 self.fileobj.write(self.buf[:self.bufsize])
445 self.buf = self.buf[self.bufsize:]
446
447 def close(self):
448 """Close the _Stream object. No operation should be
449 done on it afterwards.
450 """
451 if self.closed:
452 return
453
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000454 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300455 try:
456 if self.mode == "w" and self.comptype != "tar":
457 self.buf += self.cmp.flush()
458
459 if self.mode == "w" and self.buf:
460 self.fileobj.write(self.buf)
461 self.buf = b""
462 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000463 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300464 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
465 finally:
466 if not self._extfileobj:
467 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000468
469 def _init_read_gz(self):
470 """Initialize for reading a gzip compressed fileobj.
471 """
472 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000473 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000474
475 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000477 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000478 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000479 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000480
481 flag = ord(self.__read(1))
482 self.__read(6)
483
484 if flag & 4:
485 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
486 self.read(xlen)
487 if flag & 8:
488 while True:
489 s = self.__read(1)
490 if not s or s == NUL:
491 break
492 if flag & 16:
493 while True:
494 s = self.__read(1)
495 if not s or s == NUL:
496 break
497 if flag & 2:
498 self.__read(2)
499
500 def tell(self):
501 """Return the stream's file pointer position.
502 """
503 return self.pos
504
505 def seek(self, pos=0):
506 """Set the stream's file pointer to pos. Negative seeking
507 is forbidden.
508 """
509 if pos - self.pos >= 0:
510 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000511 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000512 self.read(self.bufsize)
513 self.read(remainder)
514 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000515 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000516 return self.pos
517
INADA Naoki8d130912018-07-06 14:06:00 +0900518 def read(self, size):
519 """Return the next size number of bytes from the stream."""
520 assert size is not None
521 buf = self._read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000522 self.pos += len(buf)
523 return buf
524
525 def _read(self, size):
526 """Return size bytes from the stream.
527 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000528 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000529 return self.__read(size)
530
531 c = len(self.dbuf)
hajoscher12a08c42018-07-04 10:13:18 +0200532 t = [self.dbuf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000533 while c < size:
INADA Naoki8d130912018-07-06 14:06:00 +0900534 # Skip underlying buffer to avoid unaligned double buffering.
535 if self.buf:
536 buf = self.buf
537 self.buf = b""
538 else:
539 buf = self.fileobj.read(self.bufsize)
540 if not buf:
541 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000542 try:
543 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100544 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000545 raise ReadError("invalid compressed data")
hajoscher12a08c42018-07-04 10:13:18 +0200546 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000547 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200548 t = b"".join(t)
549 self.dbuf = t[size:]
550 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000551
552 def __read(self, size):
553 """Return size bytes from stream. If internal buffer is empty,
554 read another block from the stream.
555 """
556 c = len(self.buf)
hajoscher12a08c42018-07-04 10:13:18 +0200557 t = [self.buf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000558 while c < size:
559 buf = self.fileobj.read(self.bufsize)
560 if not buf:
561 break
hajoscher12a08c42018-07-04 10:13:18 +0200562 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000563 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200564 t = b"".join(t)
565 self.buf = t[size:]
566 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000567# class _Stream
568
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000569class _StreamProxy(object):
570 """Small proxy class that enables transparent compression
571 detection for the Stream interface (mode 'r|*').
572 """
573
574 def __init__(self, fileobj):
575 self.fileobj = fileobj
576 self.buf = self.fileobj.read(BLOCKSIZE)
577
578 def read(self, size):
579 self.read = self.fileobj.read
580 return self.buf
581
582 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100583 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000584 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100585 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000586 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100587 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
588 return "xz"
589 else:
590 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000591
592 def close(self):
593 self.fileobj.close()
594# class StreamProxy
595
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000596#------------------------
597# Extraction file object
598#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000599class _FileInFile(object):
600 """A thin wrapper around an existing file object that
601 provides a part of its data as an individual file
602 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000603 """
604
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000605 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000606 self.fileobj = fileobj
607 self.offset = offset
608 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000609 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200610 self.name = getattr(fileobj, "name", None)
611 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000612
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000613 if blockinfo is None:
614 blockinfo = [(0, size)]
615
616 # Construct a map with data and zero blocks.
617 self.map_index = 0
618 self.map = []
619 lastpos = 0
620 realpos = self.offset
621 for offset, size in blockinfo:
622 if offset > lastpos:
623 self.map.append((False, lastpos, offset, None))
624 self.map.append((True, offset, offset + size, realpos))
625 realpos += size
626 lastpos = offset + size
627 if lastpos < self.size:
628 self.map.append((False, lastpos, self.size, None))
629
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200630 def flush(self):
631 pass
632
633 def readable(self):
634 return True
635
636 def writable(self):
637 return False
638
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000639 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000640 return self.fileobj.seekable()
641
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000642 def tell(self):
643 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000644 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000645 return self.position
646
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200647 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000648 """Seek to a position in the file.
649 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200650 if whence == io.SEEK_SET:
651 self.position = min(max(position, 0), self.size)
652 elif whence == io.SEEK_CUR:
653 if position < 0:
654 self.position = max(self.position + position, 0)
655 else:
656 self.position = min(self.position + position, self.size)
657 elif whence == io.SEEK_END:
658 self.position = max(min(self.size + position, self.size), 0)
659 else:
660 raise ValueError("Invalid argument")
661 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000662
663 def read(self, size=None):
664 """Read data from the file.
665 """
666 if size is None:
667 size = self.size - self.position
668 else:
669 size = min(size, self.size - self.position)
670
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000671 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000672 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000673 while True:
674 data, start, stop, offset = self.map[self.map_index]
675 if start <= self.position < stop:
676 break
677 else:
678 self.map_index += 1
679 if self.map_index == len(self.map):
680 self.map_index = 0
681 length = min(size, stop - self.position)
682 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000683 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200684 b = self.fileobj.read(length)
685 if len(b) != length:
686 raise ReadError("unexpected end of data")
687 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000688 else:
689 buf += NUL * length
690 size -= length
691 self.position += length
692 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000693
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200694 def readinto(self, b):
695 buf = self.read(len(b))
696 b[:len(buf)] = buf
697 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698
699 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200701#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000702
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200703class ExFileObject(io.BufferedReader):
704
705 def __init__(self, tarfile, tarinfo):
706 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
707 tarinfo.size, tarinfo.sparse)
708 super().__init__(fileobj)
709#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710
711#------------------
712# Exported Classes
713#------------------
714class TarInfo(object):
715 """Informational class which holds the details about an
716 archive member given by a tar header block.
717 TarInfo objects are returned by TarFile.getmember(),
718 TarFile.getmembers() and TarFile.gettarinfo() and are
719 usually created internally.
720 """
721
Raymond Hettingera694f232019-03-27 13:16:34 -0700722 __slots__ = dict(
723 name = 'Name of the archive member.',
724 mode = 'Permission bits.',
725 uid = 'User ID of the user who originally stored this member.',
726 gid = 'Group ID of the user who originally stored this member.',
727 size = 'Size in bytes.',
728 mtime = 'Time of last modification.',
729 chksum = 'Header checksum.',
730 type = ('File type. type is usually one of these constants: '
731 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
732 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
733 linkname = ('Name of the target file name, which is only present '
734 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
735 uname = 'User name.',
736 gname = 'Group name.',
737 devmajor = 'Device major number.',
738 devminor = 'Device minor number.',
739 offset = 'The tar header starts here.',
740 offset_data = "The file's data starts here.",
741 pax_headers = ('A dictionary containing key-value pairs of an '
742 'associated pax extended header.'),
743 sparse = 'Sparse member information.',
744 tarfile = None,
745 _sparse_structs = None,
746 _link_target = None,
747 )
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000748
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749 def __init__(self, name=""):
750 """Construct a TarInfo object. name is the optional name
751 of the member.
752 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000753 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000754 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000755 self.uid = 0 # user id
756 self.gid = 0 # group id
757 self.size = 0 # file size
758 self.mtime = 0 # modification time
759 self.chksum = 0 # header checksum
760 self.type = REGTYPE # member type
761 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000762 self.uname = "" # user name
763 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000764 self.devmajor = 0 # device major number
765 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000766
Thomas Wouters477c8d52006-05-27 19:21:47 +0000767 self.offset = 0 # the tar header starts here
768 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000769
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000770 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000771 self.pax_headers = {} # pax header information
772
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200773 @property
774 def path(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700775 'In pax headers, "name" is called "path".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000777
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200778 @path.setter
779 def path(self, name):
780 self.name = name
781
782 @property
783 def linkpath(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700784 'In pax headers, "linkname" is called "linkpath".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000785 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200786
787 @linkpath.setter
788 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000789 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000790
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000791 def __repr__(self):
792 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
793
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000794 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000795 """Return the TarInfo's attributes as a dictionary.
796 """
797 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000798 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000799 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 "uid": self.uid,
801 "gid": self.gid,
802 "size": self.size,
803 "mtime": self.mtime,
804 "chksum": self.chksum,
805 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000806 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807 "uname": self.uname,
808 "gname": self.gname,
809 "devmajor": self.devmajor,
810 "devminor": self.devminor
811 }
812
813 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
814 info["name"] += "/"
815
816 return info
817
Victor Stinnerde629d42010-05-05 21:43:57 +0000818 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819 """Return a tar header as a string of 512 byte blocks.
820 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000821 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000822
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000826 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000828 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 else:
830 raise ValueError("invalid format")
831
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000832 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 """Return the object as a ustar header block.
834 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835 info["magic"] = POSIX_MAGIC
836
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200837 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000838 raise ValueError("linkname is too long")
839
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200840 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
841 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000843 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000845 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 """Return the object as a GNU header block sequence.
847 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848 info["magic"] = GNU_MAGIC
849
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000850 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200851 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000852 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000853
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200854 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000855 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000857 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000859 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000860 """Return the object as a ustar header block. If it cannot be
861 represented this way, prepend a pax extended header sequence
862 with supplement information.
863 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864 info["magic"] = POSIX_MAGIC
865 pax_headers = self.pax_headers.copy()
866
867 # Test string fields for values that exceed the field length or cannot
868 # be represented in ASCII encoding.
869 for name, hname, length in (
870 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
871 ("uname", "uname", 32), ("gname", "gname", 32)):
872
Guido van Rossume7ba4952007-06-06 23:52:48 +0000873 if hname in pax_headers:
874 # The pax header has priority.
875 continue
876
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877 # Try to encode the string as ASCII.
878 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000879 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000881 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 continue
883
Guido van Rossume7ba4952007-06-06 23:52:48 +0000884 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000885 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
887 # Test number fields for values that exceed the field limit or values
888 # that like to be stored as float.
889 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000890 if name in pax_headers:
891 # The pax header has priority. Avoid overflow.
892 info[name] = 0
893 continue
894
Guido van Rossumd8faa362007-04-27 19:54:29 +0000895 val = info[name]
896 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000897 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000898 info[name] = 0
899
Guido van Rossume7ba4952007-06-06 23:52:48 +0000900 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000902 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000904 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000906 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000907
908 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000909 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910 """Return the object as a pax global header block sequence.
911 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000912 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200914 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 """Split a name longer than 100 chars into a prefix
916 and a name part.
917 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200918 components = name.split("/")
919 for i in range(1, len(components)):
920 prefix = "/".join(components[:i])
921 name = "/".join(components[i:])
922 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
923 len(name.encode(encoding, errors)) <= LENGTH_NAME:
924 break
925 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200927
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 return prefix, name
929
930 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000931 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 """Return a header block. info is a dictionary with file
933 information, format must be one of the *_FORMAT constants.
934 """
William Chargin674935b2020-02-12 11:56:02 -0800935 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
936 if has_device_fields:
937 devmajor = itn(info.get("devmajor", 0), 8, format)
938 devminor = itn(info.get("devminor", 0), 8, format)
939 else:
940 devmajor = stn("", 8, encoding, errors)
941 devminor = stn("", 8, encoding, errors)
942
Guido van Rossumd8faa362007-04-27 19:54:29 +0000943 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000944 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000945 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 itn(info.get("uid", 0), 8, format),
947 itn(info.get("gid", 0), 8, format),
948 itn(info.get("size", 0), 12, format),
949 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000950 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000951 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000952 stn(info.get("linkname", ""), 100, encoding, errors),
953 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000954 stn(info.get("uname", ""), 32, encoding, errors),
955 stn(info.get("gname", ""), 32, encoding, errors),
William Chargin674935b2020-02-12 11:56:02 -0800956 devmajor,
957 devminor,
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000958 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000959 ]
960
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000961 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000963 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964 return buf
965
966 @staticmethod
967 def _create_payload(payload):
968 """Return the string payload filled with zero bytes
969 up to the next 512 byte border.
970 """
971 blocks, remainder = divmod(len(payload), BLOCKSIZE)
972 if remainder > 0:
973 payload += (BLOCKSIZE - remainder) * NUL
974 return payload
975
976 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000977 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000978 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
979 for name.
980 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000981 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000982
983 info = {}
984 info["name"] = "././@LongLink"
985 info["type"] = type
986 info["size"] = len(name)
987 info["magic"] = GNU_MAGIC
988
989 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000990 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000991 cls._create_payload(name)
992
993 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000994 def _create_pax_generic_header(cls, pax_headers, type, encoding):
995 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000997 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000998 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000999 # Check if one of the fields contains surrogate characters and thereby
1000 # forces hdrcharset=BINARY, see _proc_pax() for more information.
1001 binary = False
1002 for keyword, value in pax_headers.items():
1003 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001004 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001005 except UnicodeEncodeError:
1006 binary = True
1007 break
1008
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001009 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001010 if binary:
1011 # Put the hdrcharset field at the beginning of the header.
1012 records += b"21 hdrcharset=BINARY\n"
1013
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001015 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001016 if binary:
1017 # Try to restore the original byte representation of `value'.
1018 # Needless to say, that the encoding must match the string.
1019 value = value.encode(encoding, "surrogateescape")
1020 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001021 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001022
Guido van Rossumd8faa362007-04-27 19:54:29 +00001023 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1024 n = p = 0
1025 while True:
1026 n = l + len(str(p))
1027 if n == p:
1028 break
1029 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001030 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031
1032 # We use a hardcoded "././@PaxHeader" name like star does
1033 # instead of the one that POSIX recommends.
1034 info = {}
1035 info["name"] = "././@PaxHeader"
1036 info["type"] = type
1037 info["size"] = len(records)
1038 info["magic"] = POSIX_MAGIC
1039
1040 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001041 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001042 cls._create_payload(records)
1043
Guido van Rossum75b64e62005-01-16 00:16:11 +00001044 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001045 def frombuf(cls, buf, encoding, errors):
1046 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001047 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001048 if len(buf) == 0:
1049 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001050 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001051 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001052 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001053 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001054
1055 chksum = nti(buf[148:156])
1056 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001057 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001058
Guido van Rossumd8faa362007-04-27 19:54:29 +00001059 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001060 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 obj.mode = nti(buf[100:108])
1062 obj.uid = nti(buf[108:116])
1063 obj.gid = nti(buf[116:124])
1064 obj.size = nti(buf[124:136])
1065 obj.mtime = nti(buf[136:148])
1066 obj.chksum = chksum
1067 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001068 obj.linkname = nts(buf[157:257], encoding, errors)
1069 obj.uname = nts(buf[265:297], encoding, errors)
1070 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001071 obj.devmajor = nti(buf[329:337])
1072 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001073 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001074
Guido van Rossumd8faa362007-04-27 19:54:29 +00001075 # Old V7 tar format represents a directory as a regular
1076 # file with a trailing slash.
1077 if obj.type == AREGTYPE and obj.name.endswith("/"):
1078 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001079
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001080 # The old GNU sparse format occupies some of the unused
1081 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001082 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001083 if obj.type == GNUTYPE_SPARSE:
1084 pos = 386
1085 structs = []
1086 for i in range(4):
1087 try:
1088 offset = nti(buf[pos:pos + 12])
1089 numbytes = nti(buf[pos + 12:pos + 24])
1090 except ValueError:
1091 break
1092 structs.append((offset, numbytes))
1093 pos += 24
1094 isextended = bool(buf[482])
1095 origsize = nti(buf[483:495])
1096 obj._sparse_structs = (structs, isextended, origsize)
1097
Guido van Rossumd8faa362007-04-27 19:54:29 +00001098 # Remove redundant slashes from directories.
1099 if obj.isdir():
1100 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001101
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 # Reconstruct a ustar longname.
1103 if prefix and obj.type not in GNU_TYPES:
1104 obj.name = prefix + "/" + obj.name
1105 return obj
1106
1107 @classmethod
1108 def fromtarfile(cls, tarfile):
1109 """Return the next TarInfo object from TarFile object
1110 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001111 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001112 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001113 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001114 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1115 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001116
Guido van Rossumd8faa362007-04-27 19:54:29 +00001117 #--------------------------------------------------------------------------
1118 # The following are methods that are called depending on the type of a
1119 # member. The entry point is _proc_member() which can be overridden in a
1120 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1121 # implement the following
1122 # operations:
1123 # 1. Set self.offset_data to the position where the data blocks begin,
1124 # if there is data that follows.
1125 # 2. Set tarfile.offset to the position where the next member's header will
1126 # begin.
1127 # 3. Return self or another valid TarInfo object.
1128 def _proc_member(self, tarfile):
1129 """Choose the right processing method depending on
1130 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001131 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001132 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1133 return self._proc_gnulong(tarfile)
1134 elif self.type == GNUTYPE_SPARSE:
1135 return self._proc_sparse(tarfile)
1136 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1137 return self._proc_pax(tarfile)
1138 else:
1139 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001140
Guido van Rossumd8faa362007-04-27 19:54:29 +00001141 def _proc_builtin(self, tarfile):
1142 """Process a builtin type or an unknown type which
1143 will be treated as a regular file.
1144 """
1145 self.offset_data = tarfile.fileobj.tell()
1146 offset = self.offset_data
1147 if self.isreg() or self.type not in SUPPORTED_TYPES:
1148 # Skip the following data blocks.
1149 offset += self._block(self.size)
1150 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001151
Guido van Rossume7ba4952007-06-06 23:52:48 +00001152 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001153 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001154 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001155
1156 return self
1157
1158 def _proc_gnulong(self, tarfile):
1159 """Process the blocks that hold a GNU longname
1160 or longlink member.
1161 """
1162 buf = tarfile.fileobj.read(self._block(self.size))
1163
1164 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001165 try:
1166 next = self.fromtarfile(tarfile)
1167 except HeaderError:
1168 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001169
1170 # Patch the TarInfo object from the next header with
1171 # the longname information.
1172 next.offset = self.offset
1173 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001174 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001175 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001176 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001177
1178 return next
1179
1180 def _proc_sparse(self, tarfile):
1181 """Process a GNU sparse header plus extra headers.
1182 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001183 # We already collected some sparse structures in frombuf().
1184 structs, isextended, origsize = self._sparse_structs
1185 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001186
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001187 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001188 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 buf = tarfile.fileobj.read(BLOCKSIZE)
1190 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001191 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001192 try:
1193 offset = nti(buf[pos:pos + 12])
1194 numbytes = nti(buf[pos + 12:pos + 24])
1195 except ValueError:
1196 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001197 if offset and numbytes:
1198 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001199 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001200 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001201 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001202
1203 self.offset_data = tarfile.fileobj.tell()
1204 tarfile.offset = self.offset_data + self._block(self.size)
1205 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001206 return self
1207
1208 def _proc_pax(self, tarfile):
1209 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001210 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001211 """
1212 # Read the header information.
1213 buf = tarfile.fileobj.read(self._block(self.size))
1214
1215 # A pax header stores supplemental information for either
1216 # the following file (extended) or all following files
1217 # (global).
1218 if self.type == XGLTYPE:
1219 pax_headers = tarfile.pax_headers
1220 else:
1221 pax_headers = tarfile.pax_headers.copy()
1222
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001223 # Check if the pax header contains a hdrcharset field. This tells us
1224 # the encoding of the path, linkpath, uname and gname fields. Normally,
1225 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1226 # implementations are allowed to store them as raw binary strings if
1227 # the translation to UTF-8 fails.
1228 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1229 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001230 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001231
1232 # For the time being, we don't care about anything other than "BINARY".
1233 # The only other value that is currently allowed by the standard is
1234 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1235 hdrcharset = pax_headers.get("hdrcharset")
1236 if hdrcharset == "BINARY":
1237 encoding = tarfile.encoding
1238 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001239 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001240
Guido van Rossumd8faa362007-04-27 19:54:29 +00001241 # Parse pax header information. A record looks like that:
1242 # "%d %s=%s\n" % (length, keyword, value). length is the size
1243 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001244 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001245 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001246 pos = 0
1247 while True:
1248 match = regex.match(buf, pos)
1249 if not match:
1250 break
1251
1252 length, keyword = match.groups()
1253 length = int(length)
Rishi5a8d1212020-07-15 13:51:00 +02001254 if length == 0:
1255 raise InvalidHeaderError("invalid header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001256 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1257
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001258 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001259 # as the error handler, but we better not take the risk. For
1260 # example, GNU tar <= 1.23 is known to store filenames it cannot
1261 # translate to UTF-8 as raw strings (unfortunately without a
1262 # hdrcharset=BINARY header).
1263 # We first try the strict standard encoding, and if that fails we
1264 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001265 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001266 tarfile.errors)
1267 if keyword in PAX_NAME_FIELDS:
1268 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1269 tarfile.errors)
1270 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001271 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001272 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001273
1274 pax_headers[keyword] = value
1275 pos += length
1276
Guido van Rossume7ba4952007-06-06 23:52:48 +00001277 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001278 try:
1279 next = self.fromtarfile(tarfile)
1280 except HeaderError:
1281 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001282
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001283 # Process GNU sparse information.
1284 if "GNU.sparse.map" in pax_headers:
1285 # GNU extended sparse format version 0.1.
1286 self._proc_gnusparse_01(next, pax_headers)
1287
1288 elif "GNU.sparse.size" in pax_headers:
1289 # GNU extended sparse format version 0.0.
1290 self._proc_gnusparse_00(next, pax_headers, buf)
1291
1292 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1293 # GNU extended sparse format version 1.0.
1294 self._proc_gnusparse_10(next, pax_headers, tarfile)
1295
Guido van Rossume7ba4952007-06-06 23:52:48 +00001296 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001297 # Patch the TarInfo object with the extended header info.
1298 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1299 next.offset = self.offset
1300
1301 if "size" in pax_headers:
1302 # If the extended header replaces the size field,
1303 # we need to recalculate the offset where the next
1304 # header starts.
1305 offset = next.offset_data
1306 if next.isreg() or next.type not in SUPPORTED_TYPES:
1307 offset += next._block(next.size)
1308 tarfile.offset = offset
1309
1310 return next
1311
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001312 def _proc_gnusparse_00(self, next, pax_headers, buf):
1313 """Process a GNU tar extended sparse header, version 0.0.
1314 """
1315 offsets = []
1316 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1317 offsets.append(int(match.group(1)))
1318 numbytes = []
1319 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1320 numbytes.append(int(match.group(1)))
1321 next.sparse = list(zip(offsets, numbytes))
1322
1323 def _proc_gnusparse_01(self, next, pax_headers):
1324 """Process a GNU tar extended sparse header, version 0.1.
1325 """
1326 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1327 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1328
1329 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1330 """Process a GNU tar extended sparse header, version 1.0.
1331 """
1332 fields = None
1333 sparse = []
1334 buf = tarfile.fileobj.read(BLOCKSIZE)
1335 fields, buf = buf.split(b"\n", 1)
1336 fields = int(fields)
1337 while len(sparse) < fields * 2:
1338 if b"\n" not in buf:
1339 buf += tarfile.fileobj.read(BLOCKSIZE)
1340 number, buf = buf.split(b"\n", 1)
1341 sparse.append(int(number))
1342 next.offset_data = tarfile.fileobj.tell()
1343 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1344
Guido van Rossume7ba4952007-06-06 23:52:48 +00001345 def _apply_pax_info(self, pax_headers, encoding, errors):
1346 """Replace fields with supplemental information from a previous
1347 pax extended or global header.
1348 """
1349 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001350 if keyword == "GNU.sparse.name":
1351 setattr(self, "path", value)
1352 elif keyword == "GNU.sparse.size":
1353 setattr(self, "size", int(value))
1354 elif keyword == "GNU.sparse.realsize":
1355 setattr(self, "size", int(value))
1356 elif keyword in PAX_FIELDS:
1357 if keyword in PAX_NUMBER_FIELDS:
1358 try:
1359 value = PAX_NUMBER_FIELDS[keyword](value)
1360 except ValueError:
1361 value = 0
1362 if keyword == "path":
1363 value = value.rstrip("/")
1364 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001365
1366 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001367
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001368 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1369 """Decode a single field from a pax record.
1370 """
1371 try:
1372 return value.decode(encoding, "strict")
1373 except UnicodeDecodeError:
1374 return value.decode(fallback_encoding, fallback_errors)
1375
Guido van Rossumd8faa362007-04-27 19:54:29 +00001376 def _block(self, count):
1377 """Round up a byte count by BLOCKSIZE and return it,
1378 e.g. _block(834) => 1024.
1379 """
1380 blocks, remainder = divmod(count, BLOCKSIZE)
1381 if remainder:
1382 blocks += 1
1383 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001384
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001385 def isreg(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001386 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001387 return self.type in REGULAR_TYPES
Raymond Hettingera694f232019-03-27 13:16:34 -07001388
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001389 def isfile(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001390 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 return self.isreg()
Raymond Hettingera694f232019-03-27 13:16:34 -07001392
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001393 def isdir(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001394 'Return True if it is a directory.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395 return self.type == DIRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001396
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397 def issym(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001398 'Return True if it is a symbolic link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 return self.type == SYMTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001400
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 def islnk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001402 'Return True if it is a hard link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001403 return self.type == LNKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405 def ischr(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001406 'Return True if it is a character device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001407 return self.type == CHRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001408
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001409 def isblk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001410 'Return True if it is a block device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001411 return self.type == BLKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001413 def isfifo(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001414 'Return True if it is a FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001415 return self.type == FIFOTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001416
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001417 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001418 return self.sparse is not None
Raymond Hettingera694f232019-03-27 13:16:34 -07001419
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001420 def isdev(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001421 'Return True if it is one of character device, block device or FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001422 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1423# class TarInfo
1424
1425class TarFile(object):
1426 """The TarFile Class provides an interface to tar archives.
1427 """
1428
1429 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1430
1431 dereference = False # If true, add content of linked file to the
1432 # tar file, else the link.
1433
1434 ignore_zeros = False # If true, skips empty or invalid blocks and
1435 # continues processing.
1436
Lars Gustäbel365aff32009-12-13 11:42:29 +00001437 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001438 # messages (if debug >= 0). If > 0, errors
1439 # are passed to the caller as exceptions.
1440
Guido van Rossumd8faa362007-04-27 19:54:29 +00001441 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001442
Guido van Rossume7ba4952007-06-06 23:52:48 +00001443 encoding = ENCODING # Encoding for 8-bit character strings.
1444
1445 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001446
Guido van Rossumd8faa362007-04-27 19:54:29 +00001447 tarinfo = TarInfo # The default TarInfo class to use.
1448
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001449 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001450
1451 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1452 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001453 errors="surrogateescape", pax_headers=None, debug=None,
1454 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001455 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1456 read from an existing archive, 'a' to append data to an existing
1457 file or 'w' to create a new file overwriting an existing one. `mode'
1458 defaults to 'r'.
1459 If `fileobj' is given, it is used for reading or writing data. If it
1460 can be determined, `mode' is overridden by `fileobj's mode.
1461 `fileobj' is not closed, when TarFile is closed.
1462 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001463 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001464 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001465 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001466 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001467 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001468
1469 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001470 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001471 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001472 self.mode = "w"
1473 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001474 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001475 self._extfileobj = False
1476 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001477 if (name is None and hasattr(fileobj, "name") and
1478 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001479 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001481 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001482 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001483 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001484 self.fileobj = fileobj
1485
Guido van Rossumd8faa362007-04-27 19:54:29 +00001486 # Init attributes.
1487 if format is not None:
1488 self.format = format
1489 if tarinfo is not None:
1490 self.tarinfo = tarinfo
1491 if dereference is not None:
1492 self.dereference = dereference
1493 if ignore_zeros is not None:
1494 self.ignore_zeros = ignore_zeros
1495 if encoding is not None:
1496 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001497 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001498
1499 if pax_headers is not None and self.format == PAX_FORMAT:
1500 self.pax_headers = pax_headers
1501 else:
1502 self.pax_headers = {}
1503
Guido van Rossumd8faa362007-04-27 19:54:29 +00001504 if debug is not None:
1505 self.debug = debug
1506 if errorlevel is not None:
1507 self.errorlevel = errorlevel
1508
1509 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001510 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001511 self.closed = False
1512 self.members = [] # list of members as TarInfo objects
1513 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001514 self.offset = self.fileobj.tell()
1515 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001516 self.inodes = {} # dictionary caching the inodes of
1517 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518
Lars Gustäbel7b465392009-11-18 20:29:25 +00001519 try:
1520 if self.mode == "r":
1521 self.firstmember = None
1522 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523
Lars Gustäbel7b465392009-11-18 20:29:25 +00001524 if self.mode == "a":
1525 # Move to the end of the archive,
1526 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001527 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001528 self.fileobj.seek(self.offset)
1529 try:
1530 tarinfo = self.tarinfo.fromtarfile(self)
1531 self.members.append(tarinfo)
1532 except EOFHeaderError:
1533 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001534 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001535 except HeaderError as e:
1536 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001537
Lars Gustäbel20703c62015-05-27 12:53:44 +02001538 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001539 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540
Lars Gustäbel7b465392009-11-18 20:29:25 +00001541 if self.pax_headers:
1542 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1543 self.fileobj.write(buf)
1544 self.offset += len(buf)
1545 except:
1546 if not self._extfileobj:
1547 self.fileobj.close()
1548 self.closed = True
1549 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001550
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 #--------------------------------------------------------------------------
1552 # Below are the classmethods which act as alternate constructors to the
1553 # TarFile class. The open() method is the only one that is needed for
1554 # public use; it is the "super"-constructor and is able to select an
1555 # adequate "sub"-constructor for a particular compression using the mapping
1556 # from OPEN_METH.
1557 #
1558 # This concept allows one to subclass TarFile without losing the comfort of
1559 # the super-constructor. A sub-constructor is registered and made available
1560 # by adding it to the mapping in OPEN_METH.
1561
Guido van Rossum75b64e62005-01-16 00:16:11 +00001562 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001563 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001564 """Open a tar archive for reading, writing or appending. Return
1565 an appropriate TarFile class.
1566
1567 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001568 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001569 'r:' open for reading exclusively uncompressed
1570 'r:gz' open for reading with gzip compression
1571 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001572 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001573 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001574 'w' or 'w:' open for writing without compression
1575 'w:gz' open for writing with gzip compression
1576 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001577 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001578
Berker Peksag0fe63252015-02-13 21:02:12 +02001579 'x' or 'x:' create a tarfile exclusively without compression, raise
1580 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001581 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001582 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001583 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001584 if the file is already created
1585 'x:xz' create an lzma compressed tarfile, raise an exception
1586 if the file is already created
1587
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001588 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589 'r|' open an uncompressed stream of tar blocks for reading
1590 'r|gz' open a gzip compressed stream of tar blocks
1591 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001592 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001593 'w|' open an uncompressed stream for writing
1594 'w|gz' open a gzip compressed stream for writing
1595 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001596 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001597 """
1598
1599 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001600 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001601
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001602 if mode in ("r", "r:*"):
1603 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001604 def not_compressed(comptype):
1605 return cls.OPEN_METH[comptype] == 'taropen'
1606 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001607 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001608 if fileobj is not None:
1609 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001610 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001611 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001612 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001613 if fileobj is not None:
1614 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001615 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001616 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001617
1618 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619 filemode, comptype = mode.split(":", 1)
1620 filemode = filemode or "r"
1621 comptype = comptype or "tar"
1622
1623 # Select the *open() function according to
1624 # given compression.
1625 if comptype in cls.OPEN_METH:
1626 func = getattr(cls, cls.OPEN_METH[comptype])
1627 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001628 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001629 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630
1631 elif "|" in mode:
1632 filemode, comptype = mode.split("|", 1)
1633 filemode = filemode or "r"
1634 comptype = comptype or "tar"
1635
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001636 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001637 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638
Antoine Pitrou605c2932010-09-23 20:15:14 +00001639 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1640 try:
1641 t = cls(name, filemode, stream, **kwargs)
1642 except:
1643 stream.close()
1644 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001645 t._extfileobj = False
1646 return t
1647
Berker Peksag0fe63252015-02-13 21:02:12 +02001648 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001649 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650
Thomas Wouters477c8d52006-05-27 19:21:47 +00001651 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001652
Guido van Rossum75b64e62005-01-16 00:16:11 +00001653 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001654 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001655 """Open uncompressed tar archive name for reading or writing.
1656 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001657 if mode not in ("r", "a", "w", "x"):
1658 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001659 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001660
Guido van Rossum75b64e62005-01-16 00:16:11 +00001661 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001662 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663 """Open gzip compressed tar archive name for reading or writing.
1664 Appending is not allowed.
1665 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001666 if mode not in ("r", "w", "x"):
1667 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001668
1669 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001670 from gzip import GzipFile
1671 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001672 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001673
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001674 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001675 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001676 except OSError:
1677 if fileobj is not None and mode == 'r':
1678 raise ReadError("not a gzip file")
1679 raise
1680
1681 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001682 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001683 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001684 fileobj.close()
1685 if mode == 'r':
1686 raise ReadError("not a gzip file")
1687 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001688 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001689 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001690 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001691 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001692 return t
1693
Guido van Rossum75b64e62005-01-16 00:16:11 +00001694 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001695 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 """Open bzip2 compressed tar archive name for reading or writing.
1697 Appending is not allowed.
1698 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001699 if mode not in ("r", "w", "x"):
1700 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001701
1702 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001703 from bz2 import BZ2File
Brett Cannoncd171c82013-07-04 17:43:24 -04001704 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001705 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001707 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708
1709 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001710 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001711 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001712 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001713 if mode == 'r':
1714 raise ReadError("not a bzip2 file")
1715 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001716 except:
1717 fileobj.close()
1718 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719 t._extfileobj = False
1720 return t
1721
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001722 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001723 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001724 """Open lzma compressed tar archive name for reading or writing.
1725 Appending is not allowed.
1726 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001727 if mode not in ("r", "w", "x"):
1728 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001729
1730 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001731 from lzma import LZMAFile, LZMAError
Brett Cannoncd171c82013-07-04 17:43:24 -04001732 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001733 raise CompressionError("lzma module is not available")
1734
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001735 fileobj = LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001736
1737 try:
1738 t = cls.taropen(name, mode, fileobj, **kwargs)
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001739 except (LZMAError, EOFError):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001740 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001741 if mode == 'r':
1742 raise ReadError("not an lzma file")
1743 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001744 except:
1745 fileobj.close()
1746 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001747 t._extfileobj = False
1748 return t
1749
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001750 # All *open() methods are registered here.
1751 OPEN_METH = {
1752 "tar": "taropen", # uncompressed tar
1753 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001754 "bz2": "bz2open", # bzip2 compressed tar
1755 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756 }
1757
1758 #--------------------------------------------------------------------------
1759 # The public methods which TarFile provides:
1760
1761 def close(self):
1762 """Close the TarFile. In write-mode, two finishing zero blocks are
1763 appended to the archive.
1764 """
1765 if self.closed:
1766 return
1767
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001768 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001769 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001770 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001771 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1772 self.offset += (BLOCKSIZE * 2)
1773 # fill up the end with zero-blocks
1774 # (like option -b20 for tar does)
1775 blocks, remainder = divmod(self.offset, RECORDSIZE)
1776 if remainder > 0:
1777 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1778 finally:
1779 if not self._extfileobj:
1780 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781
1782 def getmember(self, name):
1783 """Return a TarInfo object for member `name'. If `name' can not be
1784 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001785 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001786 most up-to-date version.
1787 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001788 tarinfo = self._getmember(name)
1789 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001790 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001791 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792
1793 def getmembers(self):
1794 """Return the members of the archive as a list of TarInfo objects. The
1795 list has the same order as the members in the archive.
1796 """
1797 self._check()
1798 if not self._loaded: # if we want to obtain a list of
1799 self._load() # all members, we first have to
1800 # scan the whole archive.
1801 return self.members
1802
1803 def getnames(self):
1804 """Return the members of the archive as a list of their names. It has
1805 the same order as the list returned by getmembers().
1806 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001807 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001808
1809 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001810 """Create a TarInfo object from the result of os.stat or equivalent
1811 on an existing file. The file is either named by `name', or
1812 specified as a file object `fileobj' with a file descriptor. If
1813 given, `arcname' specifies an alternative name for the file in the
1814 archive, otherwise, the name is taken from the 'name' attribute of
1815 'fileobj', or the 'name' argument. The name should be a text
1816 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001818 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001819
1820 # When fileobj is given, replace name by
1821 # fileobj's real name.
1822 if fileobj is not None:
1823 name = fileobj.name
1824
1825 # Building the name of the member in the archive.
1826 # Backward slashes are converted to forward slashes,
1827 # Absolute paths are turned to relative paths.
1828 if arcname is None:
1829 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001831 arcname = arcname.replace(os.sep, "/")
1832 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001833
1834 # Now, fill the TarInfo object with
1835 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001836 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001837 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001838
Anthony Sottile8377cd42019-02-25 14:32:27 -08001839 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 if fileobj is None:
Anthony Sottile8377cd42019-02-25 14:32:27 -08001841 if not self.dereference:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842 statres = os.lstat(name)
1843 else:
1844 statres = os.stat(name)
1845 else:
1846 statres = os.fstat(fileobj.fileno())
1847 linkname = ""
1848
1849 stmd = statres.st_mode
1850 if stat.S_ISREG(stmd):
1851 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001852 if not self.dereference and statres.st_nlink > 1 and \
1853 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854 # Is it a hardlink to an already
1855 # archived file?
1856 type = LNKTYPE
1857 linkname = self.inodes[inode]
1858 else:
1859 # The inode is added only if its valid.
1860 # For win32 it is always 0.
1861 type = REGTYPE
1862 if inode[0]:
1863 self.inodes[inode] = arcname
1864 elif stat.S_ISDIR(stmd):
1865 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866 elif stat.S_ISFIFO(stmd):
1867 type = FIFOTYPE
1868 elif stat.S_ISLNK(stmd):
1869 type = SYMTYPE
1870 linkname = os.readlink(name)
1871 elif stat.S_ISCHR(stmd):
1872 type = CHRTYPE
1873 elif stat.S_ISBLK(stmd):
1874 type = BLKTYPE
1875 else:
1876 return None
1877
1878 # Fill the TarInfo object with all
1879 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001880 tarinfo.name = arcname
1881 tarinfo.mode = stmd
1882 tarinfo.uid = statres.st_uid
1883 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001884 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001885 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001886 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001887 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001889 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001890 tarinfo.linkname = linkname
1891 if pwd:
1892 try:
1893 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1894 except KeyError:
1895 pass
1896 if grp:
1897 try:
1898 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1899 except KeyError:
1900 pass
1901
1902 if type in (CHRTYPE, BLKTYPE):
1903 if hasattr(os, "major") and hasattr(os, "minor"):
1904 tarinfo.devmajor = os.major(statres.st_rdev)
1905 tarinfo.devminor = os.minor(statres.st_rdev)
1906 return tarinfo
1907
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001908 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 """Print a table of contents to sys.stdout. If `verbose' is False, only
1910 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001911 output is produced. `members' is optional and must be a subset of the
1912 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 """
1914 self._check()
1915
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001916 if members is None:
1917 members = self
1918 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001920 _safe_print(stat.filemode(tarinfo.mode))
1921 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1922 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001924 _safe_print("%10s" %
1925 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001927 _safe_print("%10d" % tarinfo.size)
1928 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1929 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001931 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932
1933 if verbose:
1934 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001935 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001936 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001937 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001938 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001940 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941 """Add the file `name' to the archive. `name' may be any type of file
1942 (directory, fifo, symbolic link, etc.). If given, `arcname'
1943 specifies an alternative name for the file in the archive.
1944 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001945 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001946 that expects a TarInfo object argument and returns the changed
1947 TarInfo object, if it returns None the TarInfo object will be
1948 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001949 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001950 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001951
1952 if arcname is None:
1953 arcname = name
1954
1955 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001956 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001957 self._dbg(2, "tarfile: Skipped %r" % name)
1958 return
1959
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960 self._dbg(1, name)
1961
1962 # Create a TarInfo object from the file.
1963 tarinfo = self.gettarinfo(name, arcname)
1964
1965 if tarinfo is None:
1966 self._dbg(1, "tarfile: Unsupported type %r" % name)
1967 return
1968
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001969 # Change or exclude the TarInfo object.
1970 if filter is not None:
1971 tarinfo = filter(tarinfo)
1972 if tarinfo is None:
1973 self._dbg(2, "tarfile: Excluded %r" % name)
1974 return
1975
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001976 # Append the tar header and data to the archive.
1977 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001978 with bltn_open(name, "rb") as f:
1979 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001981 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001982 self.addfile(tarinfo)
1983 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001984 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001985 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001986 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001987
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001988 else:
1989 self.addfile(tarinfo)
1990
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001991 def addfile(self, tarinfo, fileobj=None):
1992 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001993 given, it should be a binary file, and tarinfo.size bytes are read
1994 from it and added to the archive. You can create TarInfo objects
1995 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001997 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001998
Thomas Wouters89f507f2006-12-13 04:49:30 +00001999 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002000
Guido van Rossume7ba4952007-06-06 23:52:48 +00002001 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00002002 self.fileobj.write(buf)
2003 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002004 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002005 # If there's data to follow, append it.
2006 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002007 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002008 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2009 if remainder > 0:
2010 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2011 blocks += 1
2012 self.offset += blocks * BLOCKSIZE
2013
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002014 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002015
Eric V. Smith7a803892015-04-15 10:27:58 -04002016 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002017 """Extract all members from the archive to the current working
2018 directory and set owner, modification time and permissions on
2019 directories afterwards. `path' specifies a different directory
2020 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04002021 list returned by getmembers(). If `numeric_owner` is True, only
2022 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002023 """
2024 directories = []
2025
2026 if members is None:
2027 members = self
2028
2029 for tarinfo in members:
2030 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002031 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002032 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002033 tarinfo = copy.copy(tarinfo)
2034 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002035 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002036 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2037 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002038
2039 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002040 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002041 directories.reverse()
2042
2043 # Set correct owner, mtime and filemode on directories.
2044 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002045 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002046 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002047 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002048 self.utime(tarinfo, dirpath)
2049 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002050 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002051 if self.errorlevel > 1:
2052 raise
2053 else:
2054 self._dbg(1, "tarfile: %s" % e)
2055
Eric V. Smith7a803892015-04-15 10:27:58 -04002056 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002057 """Extract a member from the archive to the current working directory,
2058 using its full name. Its file information is extracted as accurately
2059 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002060 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002061 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2062 is True, only the numbers for user/group names are used and not
2063 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064 """
2065 self._check("r")
2066
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002067 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002068 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002069 else:
2070 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002071
Neal Norwitza4f651a2004-07-20 22:07:44 +00002072 # Prepare the link target for makelink().
2073 if tarinfo.islnk():
2074 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2075
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002077 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002078 set_attrs=set_attrs,
2079 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002080 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002081 if self.errorlevel > 0:
2082 raise
2083 else:
2084 if e.filename is None:
2085 self._dbg(1, "tarfile: %s" % e.strerror)
2086 else:
2087 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002088 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002089 if self.errorlevel > 1:
2090 raise
2091 else:
2092 self._dbg(1, "tarfile: %s" % e)
2093
2094 def extractfile(self, member):
2095 """Extract a member from the archive as a file object. `member' may be
Andrey Doroschenkoec427892020-10-20 17:05:01 +03002096 a filename or a TarInfo object. If `member' is a regular file or
2097 a link, an io.BufferedReader object is returned. For all other
2098 existing members, None is returned. If `member' does not appear
2099 in the archive, KeyError is raised.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100 """
2101 self._check("r")
2102
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002103 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002104 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002105 else:
2106 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002108 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2109 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002110 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002111
2112 elif tarinfo.islnk() or tarinfo.issym():
2113 if isinstance(self.fileobj, _Stream):
2114 # A small but ugly workaround for the case that someone tries
2115 # to extract a (sym)link as a file-object from a non-seekable
2116 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002117 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002118 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002119 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002120 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002121 else:
2122 # If there's no data associated with the member (directory, chrdev,
2123 # blkdev, etc.), return None instead of a file object.
2124 return None
2125
Eric V. Smith7a803892015-04-15 10:27:58 -04002126 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2127 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002128 """Extract the TarInfo object tarinfo to a physical
2129 file called targetpath.
2130 """
2131 # Fetch the TarInfo object for the given name
2132 # and build the destination pathname, replacing
2133 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002134 targetpath = targetpath.rstrip("/")
2135 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002136
2137 # Create all upper directories.
2138 upperdirs = os.path.dirname(targetpath)
2139 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002140 # Create directories that are not part of the archive with
2141 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002142 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002143
2144 if tarinfo.islnk() or tarinfo.issym():
2145 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2146 else:
2147 self._dbg(1, tarinfo.name)
2148
2149 if tarinfo.isreg():
2150 self.makefile(tarinfo, targetpath)
2151 elif tarinfo.isdir():
2152 self.makedir(tarinfo, targetpath)
2153 elif tarinfo.isfifo():
2154 self.makefifo(tarinfo, targetpath)
2155 elif tarinfo.ischr() or tarinfo.isblk():
2156 self.makedev(tarinfo, targetpath)
2157 elif tarinfo.islnk() or tarinfo.issym():
2158 self.makelink(tarinfo, targetpath)
2159 elif tarinfo.type not in SUPPORTED_TYPES:
2160 self.makeunknown(tarinfo, targetpath)
2161 else:
2162 self.makefile(tarinfo, targetpath)
2163
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002164 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002165 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002166 if not tarinfo.issym():
2167 self.chmod(tarinfo, targetpath)
2168 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002169
2170 #--------------------------------------------------------------------------
2171 # Below are the different file methods. They are called via
2172 # _extract_member() when extract() is called. They can be replaced in a
2173 # subclass to implement other functionality.
2174
2175 def makedir(self, tarinfo, targetpath):
2176 """Make a directory called targetpath.
2177 """
2178 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002179 # Use a safe mode for the directory, the real mode is set
2180 # later in _extract_member().
2181 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002182 except FileExistsError:
2183 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002184
2185 def makefile(self, tarinfo, targetpath):
2186 """Make a file called targetpath.
2187 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002188 source = self.fileobj
2189 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002190 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002191 with bltn_open(targetpath, "wb") as target:
2192 if tarinfo.sparse is not None:
2193 for offset, size in tarinfo.sparse:
2194 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002195 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002196 target.seek(tarinfo.size)
2197 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002198 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002199 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002200
2201 def makeunknown(self, tarinfo, targetpath):
2202 """Make a file from a TarInfo object with an unknown type
2203 at targetpath.
2204 """
2205 self.makefile(tarinfo, targetpath)
2206 self._dbg(1, "tarfile: Unknown file type %r, " \
2207 "extracted as regular file." % tarinfo.type)
2208
2209 def makefifo(self, tarinfo, targetpath):
2210 """Make a fifo called targetpath.
2211 """
2212 if hasattr(os, "mkfifo"):
2213 os.mkfifo(targetpath)
2214 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002215 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002216
2217 def makedev(self, tarinfo, targetpath):
2218 """Make a character or block device called targetpath.
2219 """
2220 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002221 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002222
2223 mode = tarinfo.mode
2224 if tarinfo.isblk():
2225 mode |= stat.S_IFBLK
2226 else:
2227 mode |= stat.S_IFCHR
2228
2229 os.mknod(targetpath, mode,
2230 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2231
2232 def makelink(self, tarinfo, targetpath):
2233 """Make a (symbolic) link called targetpath. If it cannot be created
2234 (platform limitation), we try to make a copy of the referenced file
2235 instead of a link.
2236 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002237 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002238 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002239 if tarinfo.issym():
Julien Palard4fedd712020-11-25 10:23:17 +01002240 if os.path.lexists(targetpath):
2241 # Avoid FileExistsError on following os.symlink.
2242 os.unlink(targetpath)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002243 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002244 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002245 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002246 if os.path.exists(tarinfo._link_target):
2247 os.link(tarinfo._link_target, targetpath)
2248 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002249 self._extract_member(self._find_link_target(tarinfo),
2250 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002251 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002252 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002253 self._extract_member(self._find_link_target(tarinfo),
2254 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002255 except KeyError:
2256 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257
Eric V. Smith7a803892015-04-15 10:27:58 -04002258 def chown(self, tarinfo, targetpath, numeric_owner):
2259 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002260 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2261 is False, fall back to .gid/.uid when the search based on name
2262 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002263 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002264 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002266 g = tarinfo.gid
2267 u = tarinfo.uid
2268 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002269 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002270 if grp:
2271 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002272 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002273 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002274 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002275 if pwd:
2276 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002277 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002278 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 try:
2280 if tarinfo.issym() and hasattr(os, "lchown"):
2281 os.lchown(targetpath, u, g)
2282 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002283 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002284 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002286
2287 def chmod(self, tarinfo, targetpath):
2288 """Set file permissions of targetpath according to tarinfo.
2289 """
Anthony Sottile8377cd42019-02-25 14:32:27 -08002290 try:
2291 os.chmod(targetpath, tarinfo.mode)
2292 except OSError:
2293 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294
2295 def utime(self, tarinfo, targetpath):
2296 """Set modification time of targetpath according to tarinfo.
2297 """
Jack Jansen834eff62003-03-07 12:47:06 +00002298 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002299 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002300 try:
2301 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002302 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002303 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304
2305 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306 def next(self):
2307 """Return the next member of the archive as a TarInfo object, when
2308 TarFile is opened for reading. Return None if there is no more
2309 available.
2310 """
2311 self._check("ra")
2312 if self.firstmember is not None:
2313 m = self.firstmember
2314 self.firstmember = None
2315 return m
2316
Lars Gustäbel03572682015-07-06 09:27:24 +02002317 # Advance the file pointer.
2318 if self.offset != self.fileobj.tell():
2319 self.fileobj.seek(self.offset - 1)
2320 if not self.fileobj.read(1):
2321 raise ReadError("unexpected end of data")
2322
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002324 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002327 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002328 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002329 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002330 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002331 self.offset += BLOCKSIZE
2332 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002333 except InvalidHeaderError as e:
2334 if self.ignore_zeros:
2335 self._dbg(2, "0x%X: %s" % (self.offset, e))
2336 self.offset += BLOCKSIZE
2337 continue
2338 elif self.offset == 0:
2339 raise ReadError(str(e))
2340 except EmptyHeaderError:
2341 if self.offset == 0:
2342 raise ReadError("empty file")
2343 except TruncatedHeaderError as e:
2344 if self.offset == 0:
2345 raise ReadError(str(e))
2346 except SubsequentHeaderError as e:
2347 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002348 break
2349
Lars Gustäbel9520a432009-11-22 18:48:49 +00002350 if tarinfo is not None:
2351 self.members.append(tarinfo)
2352 else:
2353 self._loaded = True
2354
Thomas Wouters477c8d52006-05-27 19:21:47 +00002355 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002356
2357 #--------------------------------------------------------------------------
2358 # Little helper methods:
2359
Lars Gustäbel1b512722010-06-03 12:45:16 +00002360 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002361 """Find an archive member by name from bottom to top.
2362 If tarinfo is given, it is used as the starting point.
2363 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002364 # Ensure that all members have been loaded.
2365 members = self.getmembers()
2366
Lars Gustäbel1b512722010-06-03 12:45:16 +00002367 # Limit the member search list up to tarinfo.
2368 if tarinfo is not None:
2369 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002370
Lars Gustäbel1b512722010-06-03 12:45:16 +00002371 if normalize:
2372 name = os.path.normpath(name)
2373
2374 for member in reversed(members):
2375 if normalize:
2376 member_name = os.path.normpath(member.name)
2377 else:
2378 member_name = member.name
2379
2380 if name == member_name:
2381 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002382
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002383 def _load(self):
2384 """Read through the entire archive file and look for readable
2385 members.
2386 """
2387 while True:
2388 tarinfo = self.next()
2389 if tarinfo is None:
2390 break
2391 self._loaded = True
2392
2393 def _check(self, mode=None):
2394 """Check if TarFile is still open, and if the operation's mode
2395 corresponds to TarFile's mode.
2396 """
2397 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002398 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002399 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002400 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002401
Lars Gustäbel1b512722010-06-03 12:45:16 +00002402 def _find_link_target(self, tarinfo):
2403 """Find the target member of a symlink or hardlink member in the
2404 archive.
2405 """
2406 if tarinfo.issym():
2407 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002408 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002409 limit = None
2410 else:
2411 # Search the archive before the link, because a hard link is
2412 # just a reference to an already archived file.
2413 linkname = tarinfo.linkname
2414 limit = tarinfo
2415
2416 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2417 if member is None:
2418 raise KeyError("linkname %r not found" % linkname)
2419 return member
2420
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002421 def __iter__(self):
2422 """Provide an iterator object.
2423 """
2424 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002425 yield from self.members
2426 return
2427
2428 # Yield items using TarFile's next() method.
2429 # When all members have been read, set TarFile as _loaded.
2430 index = 0
2431 # Fix for SF #1100429: Under rare circumstances it can
2432 # happen that getmembers() is called during iteration,
2433 # which will have already exhausted the next() method.
2434 if self.firstmember is not None:
2435 tarinfo = self.next()
2436 index += 1
2437 yield tarinfo
2438
2439 while True:
2440 if index < len(self.members):
2441 tarinfo = self.members[index]
2442 elif not self._loaded:
2443 tarinfo = self.next()
2444 if not tarinfo:
2445 self._loaded = True
2446 return
2447 else:
2448 return
2449 index += 1
2450 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002452 def _dbg(self, level, msg):
2453 """Write debugging output to sys.stderr.
2454 """
2455 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002456 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002457
2458 def __enter__(self):
2459 self._check()
2460 return self
2461
2462 def __exit__(self, type, value, traceback):
2463 if type is None:
2464 self.close()
2465 else:
2466 # An exception occurred. We must not call close() because
2467 # it would try to write end-of-archive blocks and padding.
2468 if not self._extfileobj:
2469 self.fileobj.close()
2470 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002471
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002472#--------------------
2473# exported functions
2474#--------------------
2475def is_tarfile(name):
2476 """Return True if name points to a tar archive that we
2477 are able to handle, else return False.
William Woodruffdd754ca2020-01-22 21:24:16 -05002478
2479 'name' should be a string, file, or file-like object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002480 """
2481 try:
William Woodruffdd754ca2020-01-22 21:24:16 -05002482 if hasattr(name, "read"):
2483 t = open(fileobj=name)
2484 else:
2485 t = open(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002486 t.close()
2487 return True
2488 except TarError:
2489 return False
2490
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002491open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002492
2493
2494def main():
2495 import argparse
2496
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002497 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002498 parser = argparse.ArgumentParser(description=description)
2499 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2500 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002501 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002502 group.add_argument('-l', '--list', metavar='<tarfile>',
2503 help='Show listing of a tarfile')
2504 group.add_argument('-e', '--extract', nargs='+',
2505 metavar=('<tarfile>', '<output_dir>'),
2506 help='Extract tarfile into target dir')
2507 group.add_argument('-c', '--create', nargs='+',
2508 metavar=('<name>', '<file>'),
2509 help='Create tarfile from sources')
2510 group.add_argument('-t', '--test', metavar='<tarfile>',
2511 help='Test if a tarfile is valid')
2512 args = parser.parse_args()
2513
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002514 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002515 src = args.test
2516 if is_tarfile(src):
2517 with open(src, 'r') as tar:
2518 tar.getmembers()
2519 print(tar.getmembers(), file=sys.stderr)
2520 if args.verbose:
2521 print('{!r} is a tar archive.'.format(src))
2522 else:
2523 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2524
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002525 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002526 src = args.list
2527 if is_tarfile(src):
2528 with TarFile.open(src, 'r:*') as tf:
2529 tf.list(verbose=args.verbose)
2530 else:
2531 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2532
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002533 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002534 if len(args.extract) == 1:
2535 src = args.extract[0]
2536 curdir = os.curdir
2537 elif len(args.extract) == 2:
2538 src, curdir = args.extract
2539 else:
2540 parser.exit(1, parser.format_help())
2541
2542 if is_tarfile(src):
2543 with TarFile.open(src, 'r:*') as tf:
2544 tf.extractall(path=curdir)
2545 if args.verbose:
2546 if curdir == '.':
2547 msg = '{!r} file is extracted.'.format(src)
2548 else:
2549 msg = ('{!r} file is extracted '
2550 'into {!r} directory.').format(src, curdir)
2551 print(msg)
2552 else:
2553 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2554
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002555 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002556 tar_name = args.create.pop(0)
2557 _, ext = os.path.splitext(tar_name)
2558 compressions = {
2559 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002560 '.gz': 'gz',
2561 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002562 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002563 '.xz': 'xz',
2564 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002565 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002566 '.bz2': 'bz2',
2567 '.tbz': 'bz2',
2568 '.tbz2': 'bz2',
2569 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002570 }
2571 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2572 tar_files = args.create
2573
2574 with TarFile.open(tar_name, tar_mode) as tf:
2575 for file_name in tar_files:
2576 tf.add(file_name)
2577
2578 if args.verbose:
2579 print('{!r} file created.'.format(tar_name))
2580
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002581if __name__ == '__main__':
2582 main()