blob: 6769066cabd6fcb2434a52126bb534f55af7fcb6 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
CAM Gerlache680c3d2019-03-21 09:44:51 -0500108DEFAULT_FORMAT = PAX_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200259def _safe_print(s):
260 encoding = getattr(sys.stdout, 'encoding', None)
261 if encoding is not None:
262 s = s.encode(encoding, 'backslashreplace').decode(encoding)
263 print(s, end=' ')
264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266class TarError(Exception):
267 """Base exception."""
268 pass
269class ExtractError(TarError):
270 """General exception for extract errors."""
271 pass
272class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300273 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 pass
275class CompressionError(TarError):
276 """Exception for unavailable compression methods."""
277 pass
278class StreamError(TarError):
279 """Exception for unsupported operations on stream-like TarFiles."""
280 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000281class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000282 """Base exception for header errors."""
283 pass
284class EmptyHeaderError(HeaderError):
285 """Exception for empty headers."""
286 pass
287class TruncatedHeaderError(HeaderError):
288 """Exception for truncated headers."""
289 pass
290class EOFHeaderError(HeaderError):
291 """Exception for end of file headers."""
292 pass
293class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000294 """Exception for invalid headers."""
295 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000296class SubsequentHeaderError(HeaderError):
297 """Exception for missing and invalid extended headers."""
298 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304 """Low-level file object. Supports reading and writing.
305 It is used instead of a regular file object for streaming
306 access.
307 """
308
309 def __init__(self, name, mode):
310 mode = {
311 "r": os.O_RDONLY,
312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313 }[mode]
314 if hasattr(os, "O_BINARY"):
315 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000316 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318 def close(self):
319 os.close(self.fd)
320
321 def read(self, size):
322 return os.read(self.fd, size)
323
324 def write(self, s):
325 os.write(self.fd, s)
326
327class _Stream:
328 """Class that serves as an adapter between TarFile and
329 a stream-like object. The stream-like object only
330 needs to have a read() or write() method and is accessed
331 blockwise. Use of gzip or bzip2 compression is possible.
332 A stream-like object could be for example: sys.stdin,
333 sys.stdout, a socket, a tape device etc.
334
335 _Stream is intended to be used only internally.
336 """
337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000339 """Construct a _Stream object.
340 """
341 self._extfileobj = True
342 if fileobj is None:
343 fileobj = _LowLevelFile(name, mode)
344 self._extfileobj = False
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if comptype == '*':
347 # Enable transparent compression detection for the
348 # stream interface
349 fileobj = _StreamProxy(fileobj)
350 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 self.name = name or ""
353 self.mode = mode
354 self.comptype = comptype
355 self.fileobj = fileobj
356 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000357 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000358 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.closed = False
360
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 try:
362 if comptype == "gz":
363 try:
364 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400365 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000366 raise CompressionError("zlib module is not available")
367 self.zlib = zlib
368 self.crc = zlib.crc32(b"")
369 if mode == "r":
370 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100371 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 else:
373 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000374
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100375 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000376 try:
377 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400378 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 raise CompressionError("bz2 module is not available")
380 if mode == "r":
381 self.dbuf = b""
382 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 else:
385 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386
387 elif comptype == "xz":
388 try:
389 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400390 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100391 raise CompressionError("lzma module is not available")
392 if mode == "r":
393 self.dbuf = b""
394 self.cmp = lzma.LZMADecompressor()
395 self.exception = lzma.LZMAError
396 else:
397 self.cmp = lzma.LZMACompressor()
398
399 elif comptype != "tar":
400 raise CompressionError("unknown compression type %r" % comptype)
401
Antoine Pitrou605c2932010-09-23 20:15:14 +0000402 except:
403 if not self._extfileobj:
404 self.fileobj.close()
405 self.closed = True
406 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407
408 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000409 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 self.close()
411
412 def _init_write_gz(self):
413 """Initialize for writing with gzip compression.
414 """
415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416 -self.zlib.MAX_WBITS,
417 self.zlib.DEF_MEM_LEVEL,
418 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000419 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if self.name.endswith(".gz"):
422 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000423 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
424 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425
426 def write(self, s):
427 """Write string s to the stream.
428 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000429 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 self.crc = self.zlib.crc32(s, self.crc)
431 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 s = self.cmp.compress(s)
434 self.__write(s)
435
436 def __write(self, s):
437 """Write string s to the stream if a whole new block
438 is ready to be written.
439 """
440 self.buf += s
441 while len(self.buf) > self.bufsize:
442 self.fileobj.write(self.buf[:self.bufsize])
443 self.buf = self.buf[self.bufsize:]
444
445 def close(self):
446 """Close the _Stream object. No operation should be
447 done on it afterwards.
448 """
449 if self.closed:
450 return
451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300453 try:
454 if self.mode == "w" and self.comptype != "tar":
455 self.buf += self.cmp.flush()
456
457 if self.mode == "w" and self.buf:
458 self.fileobj.write(self.buf)
459 self.buf = b""
460 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000461 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300462 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
463 finally:
464 if not self._extfileobj:
465 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466
467 def _init_read_gz(self):
468 """Initialize for reading a gzip compressed fileobj.
469 """
470 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000472
473 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000474 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000475 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000477 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 flag = ord(self.__read(1))
480 self.__read(6)
481
482 if flag & 4:
483 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
484 self.read(xlen)
485 if flag & 8:
486 while True:
487 s = self.__read(1)
488 if not s or s == NUL:
489 break
490 if flag & 16:
491 while True:
492 s = self.__read(1)
493 if not s or s == NUL:
494 break
495 if flag & 2:
496 self.__read(2)
497
498 def tell(self):
499 """Return the stream's file pointer position.
500 """
501 return self.pos
502
503 def seek(self, pos=0):
504 """Set the stream's file pointer to pos. Negative seeking
505 is forbidden.
506 """
507 if pos - self.pos >= 0:
508 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000509 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000510 self.read(self.bufsize)
511 self.read(remainder)
512 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000513 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000514 return self.pos
515
INADA Naoki8d130912018-07-06 14:06:00 +0900516 def read(self, size):
517 """Return the next size number of bytes from the stream."""
518 assert size is not None
519 buf = self._read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 self.pos += len(buf)
521 return buf
522
523 def _read(self, size):
524 """Return size bytes from the stream.
525 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000526 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000527 return self.__read(size)
528
529 c = len(self.dbuf)
hajoscher12a08c42018-07-04 10:13:18 +0200530 t = [self.dbuf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531 while c < size:
INADA Naoki8d130912018-07-06 14:06:00 +0900532 # Skip underlying buffer to avoid unaligned double buffering.
533 if self.buf:
534 buf = self.buf
535 self.buf = b""
536 else:
537 buf = self.fileobj.read(self.bufsize)
538 if not buf:
539 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540 try:
541 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100542 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000543 raise ReadError("invalid compressed data")
hajoscher12a08c42018-07-04 10:13:18 +0200544 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200546 t = b"".join(t)
547 self.dbuf = t[size:]
548 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549
550 def __read(self, size):
551 """Return size bytes from stream. If internal buffer is empty,
552 read another block from the stream.
553 """
554 c = len(self.buf)
hajoscher12a08c42018-07-04 10:13:18 +0200555 t = [self.buf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 while c < size:
557 buf = self.fileobj.read(self.bufsize)
558 if not buf:
559 break
hajoscher12a08c42018-07-04 10:13:18 +0200560 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200562 t = b"".join(t)
563 self.buf = t[size:]
564 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565# class _Stream
566
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000567class _StreamProxy(object):
568 """Small proxy class that enables transparent compression
569 detection for the Stream interface (mode 'r|*').
570 """
571
572 def __init__(self, fileobj):
573 self.fileobj = fileobj
574 self.buf = self.fileobj.read(BLOCKSIZE)
575
576 def read(self, size):
577 self.read = self.fileobj.read
578 return self.buf
579
580 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100581 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000582 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100583 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000584 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100585 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
586 return "xz"
587 else:
588 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000589
590 def close(self):
591 self.fileobj.close()
592# class StreamProxy
593
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594#------------------------
595# Extraction file object
596#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000597class _FileInFile(object):
598 """A thin wrapper around an existing file object that
599 provides a part of its data as an individual file
600 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000601 """
602
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000603 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000604 self.fileobj = fileobj
605 self.offset = offset
606 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000607 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200608 self.name = getattr(fileobj, "name", None)
609 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000610
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000611 if blockinfo is None:
612 blockinfo = [(0, size)]
613
614 # Construct a map with data and zero blocks.
615 self.map_index = 0
616 self.map = []
617 lastpos = 0
618 realpos = self.offset
619 for offset, size in blockinfo:
620 if offset > lastpos:
621 self.map.append((False, lastpos, offset, None))
622 self.map.append((True, offset, offset + size, realpos))
623 realpos += size
624 lastpos = offset + size
625 if lastpos < self.size:
626 self.map.append((False, lastpos, self.size, None))
627
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200628 def flush(self):
629 pass
630
631 def readable(self):
632 return True
633
634 def writable(self):
635 return False
636
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000637 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000638 return self.fileobj.seekable()
639
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000640 def tell(self):
641 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000642 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000643 return self.position
644
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200645 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000646 """Seek to a position in the file.
647 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200648 if whence == io.SEEK_SET:
649 self.position = min(max(position, 0), self.size)
650 elif whence == io.SEEK_CUR:
651 if position < 0:
652 self.position = max(self.position + position, 0)
653 else:
654 self.position = min(self.position + position, self.size)
655 elif whence == io.SEEK_END:
656 self.position = max(min(self.size + position, self.size), 0)
657 else:
658 raise ValueError("Invalid argument")
659 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000660
661 def read(self, size=None):
662 """Read data from the file.
663 """
664 if size is None:
665 size = self.size - self.position
666 else:
667 size = min(size, self.size - self.position)
668
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000669 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000670 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000671 while True:
672 data, start, stop, offset = self.map[self.map_index]
673 if start <= self.position < stop:
674 break
675 else:
676 self.map_index += 1
677 if self.map_index == len(self.map):
678 self.map_index = 0
679 length = min(size, stop - self.position)
680 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000681 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200682 b = self.fileobj.read(length)
683 if len(b) != length:
684 raise ReadError("unexpected end of data")
685 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000686 else:
687 buf += NUL * length
688 size -= length
689 self.position += length
690 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000691
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200692 def readinto(self, b):
693 buf = self.read(len(b))
694 b[:len(buf)] = buf
695 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696
697 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200699#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000700
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200701class ExFileObject(io.BufferedReader):
702
703 def __init__(self, tarfile, tarinfo):
704 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
705 tarinfo.size, tarinfo.sparse)
706 super().__init__(fileobj)
707#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708
709#------------------
710# Exported Classes
711#------------------
712class TarInfo(object):
713 """Informational class which holds the details about an
714 archive member given by a tar header block.
715 TarInfo objects are returned by TarFile.getmember(),
716 TarFile.getmembers() and TarFile.gettarinfo() and are
717 usually created internally.
718 """
719
Raymond Hettingera694f232019-03-27 13:16:34 -0700720 __slots__ = dict(
721 name = 'Name of the archive member.',
722 mode = 'Permission bits.',
723 uid = 'User ID of the user who originally stored this member.',
724 gid = 'Group ID of the user who originally stored this member.',
725 size = 'Size in bytes.',
726 mtime = 'Time of last modification.',
727 chksum = 'Header checksum.',
728 type = ('File type. type is usually one of these constants: '
729 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
730 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
731 linkname = ('Name of the target file name, which is only present '
732 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
733 uname = 'User name.',
734 gname = 'Group name.',
735 devmajor = 'Device major number.',
736 devminor = 'Device minor number.',
737 offset = 'The tar header starts here.',
738 offset_data = "The file's data starts here.",
739 pax_headers = ('A dictionary containing key-value pairs of an '
740 'associated pax extended header.'),
741 sparse = 'Sparse member information.',
742 tarfile = None,
743 _sparse_structs = None,
744 _link_target = None,
745 )
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000746
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747 def __init__(self, name=""):
748 """Construct a TarInfo object. name is the optional name
749 of the member.
750 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000752 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000753 self.uid = 0 # user id
754 self.gid = 0 # group id
755 self.size = 0 # file size
756 self.mtime = 0 # modification time
757 self.chksum = 0 # header checksum
758 self.type = REGTYPE # member type
759 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000760 self.uname = "" # user name
761 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000762 self.devmajor = 0 # device major number
763 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000764
Thomas Wouters477c8d52006-05-27 19:21:47 +0000765 self.offset = 0 # the tar header starts here
766 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000767
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000768 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 self.pax_headers = {} # pax header information
770
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200771 @property
772 def path(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700773 'In pax headers, "name" is called "path".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200776 @path.setter
777 def path(self, name):
778 self.name = name
779
780 @property
781 def linkpath(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700782 'In pax headers, "linkname" is called "linkpath".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200784
785 @linkpath.setter
786 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000789 def __repr__(self):
790 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
791
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000792 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 """Return the TarInfo's attributes as a dictionary.
794 """
795 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000796 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000797 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 "uid": self.uid,
799 "gid": self.gid,
800 "size": self.size,
801 "mtime": self.mtime,
802 "chksum": self.chksum,
803 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000804 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805 "uname": self.uname,
806 "gname": self.gname,
807 "devmajor": self.devmajor,
808 "devminor": self.devminor
809 }
810
811 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
812 info["name"] += "/"
813
814 return info
815
Victor Stinnerde629d42010-05-05 21:43:57 +0000816 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 """Return a tar header as a string of 512 byte blocks.
818 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000819 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000820
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000822 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000826 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 else:
828 raise ValueError("invalid format")
829
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831 """Return the object as a ustar header block.
832 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 info["magic"] = POSIX_MAGIC
834
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200835 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 raise ValueError("linkname is too long")
837
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200838 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
839 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000841 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000843 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844 """Return the object as a GNU header block sequence.
845 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 info["magic"] = GNU_MAGIC
847
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000848 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200849 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000850 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200852 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000853 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000855 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000857 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 """Return the object as a ustar header block. If it cannot be
859 represented this way, prepend a pax extended header sequence
860 with supplement information.
861 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862 info["magic"] = POSIX_MAGIC
863 pax_headers = self.pax_headers.copy()
864
865 # Test string fields for values that exceed the field length or cannot
866 # be represented in ASCII encoding.
867 for name, hname, length in (
868 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
869 ("uname", "uname", 32), ("gname", "gname", 32)):
870
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if hname in pax_headers:
872 # The pax header has priority.
873 continue
874
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875 # Try to encode the string as ASCII.
876 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000877 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000879 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 continue
881
Guido van Rossume7ba4952007-06-06 23:52:48 +0000882 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000883 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
885 # Test number fields for values that exceed the field limit or values
886 # that like to be stored as float.
887 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000888 if name in pax_headers:
889 # The pax header has priority. Avoid overflow.
890 info[name] = 0
891 continue
892
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893 val = info[name]
894 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000895 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 info[name] = 0
897
Guido van Rossume7ba4952007-06-06 23:52:48 +0000898 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000900 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000902 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000904 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905
906 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000907 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908 """Return the object as a pax global header block sequence.
909 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000910 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200912 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 """Split a name longer than 100 chars into a prefix
914 and a name part.
915 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200916 components = name.split("/")
917 for i in range(1, len(components)):
918 prefix = "/".join(components[:i])
919 name = "/".join(components[i:])
920 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
921 len(name.encode(encoding, errors)) <= LENGTH_NAME:
922 break
923 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200925
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 return prefix, name
927
928 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 """Return a header block. info is a dictionary with file
931 information, format must be one of the *_FORMAT constants.
932 """
William Chargin674935b2020-02-12 11:56:02 -0800933 has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)
934 if has_device_fields:
935 devmajor = itn(info.get("devmajor", 0), 8, format)
936 devminor = itn(info.get("devminor", 0), 8, format)
937 else:
938 devmajor = stn("", 8, encoding, errors)
939 devminor = stn("", 8, encoding, errors)
940
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000942 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000943 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 itn(info.get("uid", 0), 8, format),
945 itn(info.get("gid", 0), 8, format),
946 itn(info.get("size", 0), 12, format),
947 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000948 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000950 stn(info.get("linkname", ""), 100, encoding, errors),
951 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000952 stn(info.get("uname", ""), 32, encoding, errors),
953 stn(info.get("gname", ""), 32, encoding, errors),
William Chargin674935b2020-02-12 11:56:02 -0800954 devmajor,
955 devminor,
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000956 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957 ]
958
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000959 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000960 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000961 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962 return buf
963
964 @staticmethod
965 def _create_payload(payload):
966 """Return the string payload filled with zero bytes
967 up to the next 512 byte border.
968 """
969 blocks, remainder = divmod(len(payload), BLOCKSIZE)
970 if remainder > 0:
971 payload += (BLOCKSIZE - remainder) * NUL
972 return payload
973
974 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000975 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000976 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
977 for name.
978 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000979 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980
981 info = {}
982 info["name"] = "././@LongLink"
983 info["type"] = type
984 info["size"] = len(name)
985 info["magic"] = GNU_MAGIC
986
987 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000988 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000989 cls._create_payload(name)
990
991 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000992 def _create_pax_generic_header(cls, pax_headers, type, encoding):
993 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000994 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000995 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000996 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000997 # Check if one of the fields contains surrogate characters and thereby
998 # forces hdrcharset=BINARY, see _proc_pax() for more information.
999 binary = False
1000 for keyword, value in pax_headers.items():
1001 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001002 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001003 except UnicodeEncodeError:
1004 binary = True
1005 break
1006
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001007 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001008 if binary:
1009 # Put the hdrcharset field at the beginning of the header.
1010 records += b"21 hdrcharset=BINARY\n"
1011
Guido van Rossumd8faa362007-04-27 19:54:29 +00001012 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001013 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001014 if binary:
1015 # Try to restore the original byte representation of `value'.
1016 # Needless to say, that the encoding must match the string.
1017 value = value.encode(encoding, "surrogateescape")
1018 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001019 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001020
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1022 n = p = 0
1023 while True:
1024 n = l + len(str(p))
1025 if n == p:
1026 break
1027 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001028 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001029
1030 # We use a hardcoded "././@PaxHeader" name like star does
1031 # instead of the one that POSIX recommends.
1032 info = {}
1033 info["name"] = "././@PaxHeader"
1034 info["type"] = type
1035 info["size"] = len(records)
1036 info["magic"] = POSIX_MAGIC
1037
1038 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001039 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001040 cls._create_payload(records)
1041
Guido van Rossum75b64e62005-01-16 00:16:11 +00001042 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001043 def frombuf(cls, buf, encoding, errors):
1044 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001045 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001046 if len(buf) == 0:
1047 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001049 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001050 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001051 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001052
1053 chksum = nti(buf[148:156])
1054 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001055 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001056
Guido van Rossumd8faa362007-04-27 19:54:29 +00001057 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001058 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001059 obj.mode = nti(buf[100:108])
1060 obj.uid = nti(buf[108:116])
1061 obj.gid = nti(buf[116:124])
1062 obj.size = nti(buf[124:136])
1063 obj.mtime = nti(buf[136:148])
1064 obj.chksum = chksum
1065 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001066 obj.linkname = nts(buf[157:257], encoding, errors)
1067 obj.uname = nts(buf[265:297], encoding, errors)
1068 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001069 obj.devmajor = nti(buf[329:337])
1070 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001071 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001072
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073 # Old V7 tar format represents a directory as a regular
1074 # file with a trailing slash.
1075 if obj.type == AREGTYPE and obj.name.endswith("/"):
1076 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001077
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001078 # The old GNU sparse format occupies some of the unused
1079 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001080 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001081 if obj.type == GNUTYPE_SPARSE:
1082 pos = 386
1083 structs = []
1084 for i in range(4):
1085 try:
1086 offset = nti(buf[pos:pos + 12])
1087 numbytes = nti(buf[pos + 12:pos + 24])
1088 except ValueError:
1089 break
1090 structs.append((offset, numbytes))
1091 pos += 24
1092 isextended = bool(buf[482])
1093 origsize = nti(buf[483:495])
1094 obj._sparse_structs = (structs, isextended, origsize)
1095
Guido van Rossumd8faa362007-04-27 19:54:29 +00001096 # Remove redundant slashes from directories.
1097 if obj.isdir():
1098 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001099
Guido van Rossumd8faa362007-04-27 19:54:29 +00001100 # Reconstruct a ustar longname.
1101 if prefix and obj.type not in GNU_TYPES:
1102 obj.name = prefix + "/" + obj.name
1103 return obj
1104
1105 @classmethod
1106 def fromtarfile(cls, tarfile):
1107 """Return the next TarInfo object from TarFile object
1108 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001109 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001110 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001111 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001112 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1113 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001114
Guido van Rossumd8faa362007-04-27 19:54:29 +00001115 #--------------------------------------------------------------------------
1116 # The following are methods that are called depending on the type of a
1117 # member. The entry point is _proc_member() which can be overridden in a
1118 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1119 # implement the following
1120 # operations:
1121 # 1. Set self.offset_data to the position where the data blocks begin,
1122 # if there is data that follows.
1123 # 2. Set tarfile.offset to the position where the next member's header will
1124 # begin.
1125 # 3. Return self or another valid TarInfo object.
1126 def _proc_member(self, tarfile):
1127 """Choose the right processing method depending on
1128 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001129 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1131 return self._proc_gnulong(tarfile)
1132 elif self.type == GNUTYPE_SPARSE:
1133 return self._proc_sparse(tarfile)
1134 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1135 return self._proc_pax(tarfile)
1136 else:
1137 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001138
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139 def _proc_builtin(self, tarfile):
1140 """Process a builtin type or an unknown type which
1141 will be treated as a regular file.
1142 """
1143 self.offset_data = tarfile.fileobj.tell()
1144 offset = self.offset_data
1145 if self.isreg() or self.type not in SUPPORTED_TYPES:
1146 # Skip the following data blocks.
1147 offset += self._block(self.size)
1148 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001149
Guido van Rossume7ba4952007-06-06 23:52:48 +00001150 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001151 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001152 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001153
1154 return self
1155
1156 def _proc_gnulong(self, tarfile):
1157 """Process the blocks that hold a GNU longname
1158 or longlink member.
1159 """
1160 buf = tarfile.fileobj.read(self._block(self.size))
1161
1162 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001163 try:
1164 next = self.fromtarfile(tarfile)
1165 except HeaderError:
1166 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
1168 # Patch the TarInfo object from the next header with
1169 # the longname information.
1170 next.offset = self.offset
1171 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001172 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001173 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001174 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001175
1176 return next
1177
1178 def _proc_sparse(self, tarfile):
1179 """Process a GNU sparse header plus extra headers.
1180 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001181 # We already collected some sparse structures in frombuf().
1182 structs, isextended, origsize = self._sparse_structs
1183 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001184
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001185 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001186 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001187 buf = tarfile.fileobj.read(BLOCKSIZE)
1188 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001189 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001190 try:
1191 offset = nti(buf[pos:pos + 12])
1192 numbytes = nti(buf[pos + 12:pos + 24])
1193 except ValueError:
1194 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001195 if offset and numbytes:
1196 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001197 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001198 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001199 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001200
1201 self.offset_data = tarfile.fileobj.tell()
1202 tarfile.offset = self.offset_data + self._block(self.size)
1203 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001204 return self
1205
1206 def _proc_pax(self, tarfile):
1207 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001208 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001209 """
1210 # Read the header information.
1211 buf = tarfile.fileobj.read(self._block(self.size))
1212
1213 # A pax header stores supplemental information for either
1214 # the following file (extended) or all following files
1215 # (global).
1216 if self.type == XGLTYPE:
1217 pax_headers = tarfile.pax_headers
1218 else:
1219 pax_headers = tarfile.pax_headers.copy()
1220
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001221 # Check if the pax header contains a hdrcharset field. This tells us
1222 # the encoding of the path, linkpath, uname and gname fields. Normally,
1223 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1224 # implementations are allowed to store them as raw binary strings if
1225 # the translation to UTF-8 fails.
1226 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1227 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001228 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001229
1230 # For the time being, we don't care about anything other than "BINARY".
1231 # The only other value that is currently allowed by the standard is
1232 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1233 hdrcharset = pax_headers.get("hdrcharset")
1234 if hdrcharset == "BINARY":
1235 encoding = tarfile.encoding
1236 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001237 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001238
Guido van Rossumd8faa362007-04-27 19:54:29 +00001239 # Parse pax header information. A record looks like that:
1240 # "%d %s=%s\n" % (length, keyword, value). length is the size
1241 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001242 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001243 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001244 pos = 0
1245 while True:
1246 match = regex.match(buf, pos)
1247 if not match:
1248 break
1249
1250 length, keyword = match.groups()
1251 length = int(length)
Rishi5a8d1212020-07-15 13:51:00 +02001252 if length == 0:
1253 raise InvalidHeaderError("invalid header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001254 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1255
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001256 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001257 # as the error handler, but we better not take the risk. For
1258 # example, GNU tar <= 1.23 is known to store filenames it cannot
1259 # translate to UTF-8 as raw strings (unfortunately without a
1260 # hdrcharset=BINARY header).
1261 # We first try the strict standard encoding, and if that fails we
1262 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001263 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001264 tarfile.errors)
1265 if keyword in PAX_NAME_FIELDS:
1266 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1267 tarfile.errors)
1268 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001269 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001270 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001271
1272 pax_headers[keyword] = value
1273 pos += length
1274
Guido van Rossume7ba4952007-06-06 23:52:48 +00001275 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001276 try:
1277 next = self.fromtarfile(tarfile)
1278 except HeaderError:
1279 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001280
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001281 # Process GNU sparse information.
1282 if "GNU.sparse.map" in pax_headers:
1283 # GNU extended sparse format version 0.1.
1284 self._proc_gnusparse_01(next, pax_headers)
1285
1286 elif "GNU.sparse.size" in pax_headers:
1287 # GNU extended sparse format version 0.0.
1288 self._proc_gnusparse_00(next, pax_headers, buf)
1289
1290 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1291 # GNU extended sparse format version 1.0.
1292 self._proc_gnusparse_10(next, pax_headers, tarfile)
1293
Guido van Rossume7ba4952007-06-06 23:52:48 +00001294 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001295 # Patch the TarInfo object with the extended header info.
1296 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1297 next.offset = self.offset
1298
1299 if "size" in pax_headers:
1300 # If the extended header replaces the size field,
1301 # we need to recalculate the offset where the next
1302 # header starts.
1303 offset = next.offset_data
1304 if next.isreg() or next.type not in SUPPORTED_TYPES:
1305 offset += next._block(next.size)
1306 tarfile.offset = offset
1307
1308 return next
1309
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001310 def _proc_gnusparse_00(self, next, pax_headers, buf):
1311 """Process a GNU tar extended sparse header, version 0.0.
1312 """
1313 offsets = []
1314 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1315 offsets.append(int(match.group(1)))
1316 numbytes = []
1317 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1318 numbytes.append(int(match.group(1)))
1319 next.sparse = list(zip(offsets, numbytes))
1320
1321 def _proc_gnusparse_01(self, next, pax_headers):
1322 """Process a GNU tar extended sparse header, version 0.1.
1323 """
1324 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1325 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1326
1327 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1328 """Process a GNU tar extended sparse header, version 1.0.
1329 """
1330 fields = None
1331 sparse = []
1332 buf = tarfile.fileobj.read(BLOCKSIZE)
1333 fields, buf = buf.split(b"\n", 1)
1334 fields = int(fields)
1335 while len(sparse) < fields * 2:
1336 if b"\n" not in buf:
1337 buf += tarfile.fileobj.read(BLOCKSIZE)
1338 number, buf = buf.split(b"\n", 1)
1339 sparse.append(int(number))
1340 next.offset_data = tarfile.fileobj.tell()
1341 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1342
Guido van Rossume7ba4952007-06-06 23:52:48 +00001343 def _apply_pax_info(self, pax_headers, encoding, errors):
1344 """Replace fields with supplemental information from a previous
1345 pax extended or global header.
1346 """
1347 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001348 if keyword == "GNU.sparse.name":
1349 setattr(self, "path", value)
1350 elif keyword == "GNU.sparse.size":
1351 setattr(self, "size", int(value))
1352 elif keyword == "GNU.sparse.realsize":
1353 setattr(self, "size", int(value))
1354 elif keyword in PAX_FIELDS:
1355 if keyword in PAX_NUMBER_FIELDS:
1356 try:
1357 value = PAX_NUMBER_FIELDS[keyword](value)
1358 except ValueError:
1359 value = 0
1360 if keyword == "path":
1361 value = value.rstrip("/")
1362 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001363
1364 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001365
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001366 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1367 """Decode a single field from a pax record.
1368 """
1369 try:
1370 return value.decode(encoding, "strict")
1371 except UnicodeDecodeError:
1372 return value.decode(fallback_encoding, fallback_errors)
1373
Guido van Rossumd8faa362007-04-27 19:54:29 +00001374 def _block(self, count):
1375 """Round up a byte count by BLOCKSIZE and return it,
1376 e.g. _block(834) => 1024.
1377 """
1378 blocks, remainder = divmod(count, BLOCKSIZE)
1379 if remainder:
1380 blocks += 1
1381 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001382
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001383 def isreg(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001384 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001385 return self.type in REGULAR_TYPES
Raymond Hettingera694f232019-03-27 13:16:34 -07001386
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001387 def isfile(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001388 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001389 return self.isreg()
Raymond Hettingera694f232019-03-27 13:16:34 -07001390
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 def isdir(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001392 'Return True if it is a directory.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001393 return self.type == DIRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001394
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395 def issym(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001396 'Return True if it is a symbolic link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397 return self.type == SYMTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001398
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 def islnk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001400 'Return True if it is a hard link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 return self.type == LNKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001402
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001403 def ischr(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001404 'Return True if it is a character device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405 return self.type == CHRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001406
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001407 def isblk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001408 'Return True if it is a block device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001409 return self.type == BLKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001410
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001411 def isfifo(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001412 'Return True if it is a FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001413 return self.type == FIFOTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001414
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001415 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001416 return self.sparse is not None
Raymond Hettingera694f232019-03-27 13:16:34 -07001417
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001418 def isdev(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001419 'Return True if it is one of character device, block device or FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001420 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1421# class TarInfo
1422
1423class TarFile(object):
1424 """The TarFile Class provides an interface to tar archives.
1425 """
1426
1427 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1428
1429 dereference = False # If true, add content of linked file to the
1430 # tar file, else the link.
1431
1432 ignore_zeros = False # If true, skips empty or invalid blocks and
1433 # continues processing.
1434
Lars Gustäbel365aff32009-12-13 11:42:29 +00001435 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001436 # messages (if debug >= 0). If > 0, errors
1437 # are passed to the caller as exceptions.
1438
Guido van Rossumd8faa362007-04-27 19:54:29 +00001439 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001440
Guido van Rossume7ba4952007-06-06 23:52:48 +00001441 encoding = ENCODING # Encoding for 8-bit character strings.
1442
1443 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001444
Guido van Rossumd8faa362007-04-27 19:54:29 +00001445 tarinfo = TarInfo # The default TarInfo class to use.
1446
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001447 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001448
1449 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1450 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001451 errors="surrogateescape", pax_headers=None, debug=None,
1452 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001453 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1454 read from an existing archive, 'a' to append data to an existing
1455 file or 'w' to create a new file overwriting an existing one. `mode'
1456 defaults to 'r'.
1457 If `fileobj' is given, it is used for reading or writing data. If it
1458 can be determined, `mode' is overridden by `fileobj's mode.
1459 `fileobj' is not closed, when TarFile is closed.
1460 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001461 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001462 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001463 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001464 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001465 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001466
1467 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001468 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001469 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001470 self.mode = "w"
1471 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001472 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001473 self._extfileobj = False
1474 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001475 if (name is None and hasattr(fileobj, "name") and
1476 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001477 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001478 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001479 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001481 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001482 self.fileobj = fileobj
1483
Guido van Rossumd8faa362007-04-27 19:54:29 +00001484 # Init attributes.
1485 if format is not None:
1486 self.format = format
1487 if tarinfo is not None:
1488 self.tarinfo = tarinfo
1489 if dereference is not None:
1490 self.dereference = dereference
1491 if ignore_zeros is not None:
1492 self.ignore_zeros = ignore_zeros
1493 if encoding is not None:
1494 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001495 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001496
1497 if pax_headers is not None and self.format == PAX_FORMAT:
1498 self.pax_headers = pax_headers
1499 else:
1500 self.pax_headers = {}
1501
Guido van Rossumd8faa362007-04-27 19:54:29 +00001502 if debug is not None:
1503 self.debug = debug
1504 if errorlevel is not None:
1505 self.errorlevel = errorlevel
1506
1507 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001508 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001509 self.closed = False
1510 self.members = [] # list of members as TarInfo objects
1511 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001512 self.offset = self.fileobj.tell()
1513 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001514 self.inodes = {} # dictionary caching the inodes of
1515 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001516
Lars Gustäbel7b465392009-11-18 20:29:25 +00001517 try:
1518 if self.mode == "r":
1519 self.firstmember = None
1520 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001521
Lars Gustäbel7b465392009-11-18 20:29:25 +00001522 if self.mode == "a":
1523 # Move to the end of the archive,
1524 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001525 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001526 self.fileobj.seek(self.offset)
1527 try:
1528 tarinfo = self.tarinfo.fromtarfile(self)
1529 self.members.append(tarinfo)
1530 except EOFHeaderError:
1531 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001532 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001533 except HeaderError as e:
1534 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535
Lars Gustäbel20703c62015-05-27 12:53:44 +02001536 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001537 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001538
Lars Gustäbel7b465392009-11-18 20:29:25 +00001539 if self.pax_headers:
1540 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1541 self.fileobj.write(buf)
1542 self.offset += len(buf)
1543 except:
1544 if not self._extfileobj:
1545 self.fileobj.close()
1546 self.closed = True
1547 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001548
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001549 #--------------------------------------------------------------------------
1550 # Below are the classmethods which act as alternate constructors to the
1551 # TarFile class. The open() method is the only one that is needed for
1552 # public use; it is the "super"-constructor and is able to select an
1553 # adequate "sub"-constructor for a particular compression using the mapping
1554 # from OPEN_METH.
1555 #
1556 # This concept allows one to subclass TarFile without losing the comfort of
1557 # the super-constructor. A sub-constructor is registered and made available
1558 # by adding it to the mapping in OPEN_METH.
1559
Guido van Rossum75b64e62005-01-16 00:16:11 +00001560 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001561 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562 """Open a tar archive for reading, writing or appending. Return
1563 an appropriate TarFile class.
1564
1565 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001567 'r:' open for reading exclusively uncompressed
1568 'r:gz' open for reading with gzip compression
1569 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001570 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001571 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001572 'w' or 'w:' open for writing without compression
1573 'w:gz' open for writing with gzip compression
1574 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001575 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001576
Berker Peksag0fe63252015-02-13 21:02:12 +02001577 'x' or 'x:' create a tarfile exclusively without compression, raise
1578 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001579 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001580 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001581 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001582 if the file is already created
1583 'x:xz' create an lzma compressed tarfile, raise an exception
1584 if the file is already created
1585
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001586 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587 'r|' open an uncompressed stream of tar blocks for reading
1588 'r|gz' open a gzip compressed stream of tar blocks
1589 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001590 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001591 'w|' open an uncompressed stream for writing
1592 'w|gz' open a gzip compressed stream for writing
1593 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001594 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001595 """
1596
1597 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001598 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001599
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001600 if mode in ("r", "r:*"):
1601 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001602 def not_compressed(comptype):
1603 return cls.OPEN_METH[comptype] == 'taropen'
1604 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001605 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001606 if fileobj is not None:
1607 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001608 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001609 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001610 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001611 if fileobj is not None:
1612 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001613 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001614 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001615
1616 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001617 filemode, comptype = mode.split(":", 1)
1618 filemode = filemode or "r"
1619 comptype = comptype or "tar"
1620
1621 # Select the *open() function according to
1622 # given compression.
1623 if comptype in cls.OPEN_METH:
1624 func = getattr(cls, cls.OPEN_METH[comptype])
1625 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001626 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001627 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001628
1629 elif "|" in mode:
1630 filemode, comptype = mode.split("|", 1)
1631 filemode = filemode or "r"
1632 comptype = comptype or "tar"
1633
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001634 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001635 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636
Antoine Pitrou605c2932010-09-23 20:15:14 +00001637 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1638 try:
1639 t = cls(name, filemode, stream, **kwargs)
1640 except:
1641 stream.close()
1642 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643 t._extfileobj = False
1644 return t
1645
Berker Peksag0fe63252015-02-13 21:02:12 +02001646 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001647 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
Thomas Wouters477c8d52006-05-27 19:21:47 +00001649 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001650
Guido van Rossum75b64e62005-01-16 00:16:11 +00001651 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001652 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 """Open uncompressed tar archive name for reading or writing.
1654 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001655 if mode not in ("r", "a", "w", "x"):
1656 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001657 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658
Guido van Rossum75b64e62005-01-16 00:16:11 +00001659 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001660 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001661 """Open gzip compressed tar archive name for reading or writing.
1662 Appending is not allowed.
1663 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001664 if mode not in ("r", "w", "x"):
1665 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001666
1667 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001668 from gzip import GzipFile
1669 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001670 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001671
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001673 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001674 except OSError:
1675 if fileobj is not None and mode == 'r':
1676 raise ReadError("not a gzip file")
1677 raise
1678
1679 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001680 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001681 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001682 fileobj.close()
1683 if mode == 'r':
1684 raise ReadError("not a gzip file")
1685 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001686 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001687 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001688 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001689 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690 return t
1691
Guido van Rossum75b64e62005-01-16 00:16:11 +00001692 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001693 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694 """Open bzip2 compressed tar archive name for reading or writing.
1695 Appending is not allowed.
1696 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001697 if mode not in ("r", "w", "x"):
1698 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001699
1700 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001701 from bz2 import BZ2File
Brett Cannoncd171c82013-07-04 17:43:24 -04001702 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001703 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001704
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001705 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001706
1707 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001708 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001709 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001710 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001711 if mode == 'r':
1712 raise ReadError("not a bzip2 file")
1713 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001714 except:
1715 fileobj.close()
1716 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001717 t._extfileobj = False
1718 return t
1719
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001720 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001721 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001722 """Open lzma compressed tar archive name for reading or writing.
1723 Appending is not allowed.
1724 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001725 if mode not in ("r", "w", "x"):
1726 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001727
1728 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001729 from lzma import LZMAFile, LZMAError
Brett Cannoncd171c82013-07-04 17:43:24 -04001730 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001731 raise CompressionError("lzma module is not available")
1732
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001733 fileobj = LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001734
1735 try:
1736 t = cls.taropen(name, mode, fileobj, **kwargs)
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001737 except (LZMAError, EOFError):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001738 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001739 if mode == 'r':
1740 raise ReadError("not an lzma file")
1741 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001742 except:
1743 fileobj.close()
1744 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001745 t._extfileobj = False
1746 return t
1747
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001748 # All *open() methods are registered here.
1749 OPEN_METH = {
1750 "tar": "taropen", # uncompressed tar
1751 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001752 "bz2": "bz2open", # bzip2 compressed tar
1753 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001754 }
1755
1756 #--------------------------------------------------------------------------
1757 # The public methods which TarFile provides:
1758
1759 def close(self):
1760 """Close the TarFile. In write-mode, two finishing zero blocks are
1761 appended to the archive.
1762 """
1763 if self.closed:
1764 return
1765
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001767 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001768 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001769 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1770 self.offset += (BLOCKSIZE * 2)
1771 # fill up the end with zero-blocks
1772 # (like option -b20 for tar does)
1773 blocks, remainder = divmod(self.offset, RECORDSIZE)
1774 if remainder > 0:
1775 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1776 finally:
1777 if not self._extfileobj:
1778 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001779
1780 def getmember(self, name):
1781 """Return a TarInfo object for member `name'. If `name' can not be
1782 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001783 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784 most up-to-date version.
1785 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001786 tarinfo = self._getmember(name)
1787 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001788 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001789 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001790
1791 def getmembers(self):
1792 """Return the members of the archive as a list of TarInfo objects. The
1793 list has the same order as the members in the archive.
1794 """
1795 self._check()
1796 if not self._loaded: # if we want to obtain a list of
1797 self._load() # all members, we first have to
1798 # scan the whole archive.
1799 return self.members
1800
1801 def getnames(self):
1802 """Return the members of the archive as a list of their names. It has
1803 the same order as the list returned by getmembers().
1804 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001805 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806
1807 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001808 """Create a TarInfo object from the result of os.stat or equivalent
1809 on an existing file. The file is either named by `name', or
1810 specified as a file object `fileobj' with a file descriptor. If
1811 given, `arcname' specifies an alternative name for the file in the
1812 archive, otherwise, the name is taken from the 'name' attribute of
1813 'fileobj', or the 'name' argument. The name should be a text
1814 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001815 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001816 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001817
1818 # When fileobj is given, replace name by
1819 # fileobj's real name.
1820 if fileobj is not None:
1821 name = fileobj.name
1822
1823 # Building the name of the member in the archive.
1824 # Backward slashes are converted to forward slashes,
1825 # Absolute paths are turned to relative paths.
1826 if arcname is None:
1827 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001829 arcname = arcname.replace(os.sep, "/")
1830 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001831
1832 # Now, fill the TarInfo object with
1833 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001834 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001835 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001836
Anthony Sottile8377cd42019-02-25 14:32:27 -08001837 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001838 if fileobj is None:
Anthony Sottile8377cd42019-02-25 14:32:27 -08001839 if not self.dereference:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 statres = os.lstat(name)
1841 else:
1842 statres = os.stat(name)
1843 else:
1844 statres = os.fstat(fileobj.fileno())
1845 linkname = ""
1846
1847 stmd = statres.st_mode
1848 if stat.S_ISREG(stmd):
1849 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001850 if not self.dereference and statres.st_nlink > 1 and \
1851 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 # Is it a hardlink to an already
1853 # archived file?
1854 type = LNKTYPE
1855 linkname = self.inodes[inode]
1856 else:
1857 # The inode is added only if its valid.
1858 # For win32 it is always 0.
1859 type = REGTYPE
1860 if inode[0]:
1861 self.inodes[inode] = arcname
1862 elif stat.S_ISDIR(stmd):
1863 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864 elif stat.S_ISFIFO(stmd):
1865 type = FIFOTYPE
1866 elif stat.S_ISLNK(stmd):
1867 type = SYMTYPE
1868 linkname = os.readlink(name)
1869 elif stat.S_ISCHR(stmd):
1870 type = CHRTYPE
1871 elif stat.S_ISBLK(stmd):
1872 type = BLKTYPE
1873 else:
1874 return None
1875
1876 # Fill the TarInfo object with all
1877 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001878 tarinfo.name = arcname
1879 tarinfo.mode = stmd
1880 tarinfo.uid = statres.st_uid
1881 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001882 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001883 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001884 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001885 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001886 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001887 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 tarinfo.linkname = linkname
1889 if pwd:
1890 try:
1891 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1892 except KeyError:
1893 pass
1894 if grp:
1895 try:
1896 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1897 except KeyError:
1898 pass
1899
1900 if type in (CHRTYPE, BLKTYPE):
1901 if hasattr(os, "major") and hasattr(os, "minor"):
1902 tarinfo.devmajor = os.major(statres.st_rdev)
1903 tarinfo.devminor = os.minor(statres.st_rdev)
1904 return tarinfo
1905
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001906 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907 """Print a table of contents to sys.stdout. If `verbose' is False, only
1908 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001909 output is produced. `members' is optional and must be a subset of the
1910 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911 """
1912 self._check()
1913
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001914 if members is None:
1915 members = self
1916 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001918 _safe_print(stat.filemode(tarinfo.mode))
1919 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1920 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001922 _safe_print("%10s" %
1923 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001925 _safe_print("%10d" % tarinfo.size)
1926 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1927 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001928
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001929 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930
1931 if verbose:
1932 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001933 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001935 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001936 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001938 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 """Add the file `name' to the archive. `name' may be any type of file
1940 (directory, fifo, symbolic link, etc.). If given, `arcname'
1941 specifies an alternative name for the file in the archive.
1942 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001943 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001944 that expects a TarInfo object argument and returns the changed
1945 TarInfo object, if it returns None the TarInfo object will be
1946 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001947 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001948 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001949
1950 if arcname is None:
1951 arcname = name
1952
1953 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001954 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001955 self._dbg(2, "tarfile: Skipped %r" % name)
1956 return
1957
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001958 self._dbg(1, name)
1959
1960 # Create a TarInfo object from the file.
1961 tarinfo = self.gettarinfo(name, arcname)
1962
1963 if tarinfo is None:
1964 self._dbg(1, "tarfile: Unsupported type %r" % name)
1965 return
1966
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001967 # Change or exclude the TarInfo object.
1968 if filter is not None:
1969 tarinfo = filter(tarinfo)
1970 if tarinfo is None:
1971 self._dbg(2, "tarfile: Excluded %r" % name)
1972 return
1973
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001974 # Append the tar header and data to the archive.
1975 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001976 with bltn_open(name, "rb") as f:
1977 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001979 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001980 self.addfile(tarinfo)
1981 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001982 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001983 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001984 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001985
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001986 else:
1987 self.addfile(tarinfo)
1988
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001989 def addfile(self, tarinfo, fileobj=None):
1990 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001991 given, it should be a binary file, and tarinfo.size bytes are read
1992 from it and added to the archive. You can create TarInfo objects
1993 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001994 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001995 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996
Thomas Wouters89f507f2006-12-13 04:49:30 +00001997 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001998
Guido van Rossume7ba4952007-06-06 23:52:48 +00001999 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00002000 self.fileobj.write(buf)
2001 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002002 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003 # If there's data to follow, append it.
2004 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002005 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002006 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2007 if remainder > 0:
2008 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2009 blocks += 1
2010 self.offset += blocks * BLOCKSIZE
2011
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002012 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013
Eric V. Smith7a803892015-04-15 10:27:58 -04002014 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002015 """Extract all members from the archive to the current working
2016 directory and set owner, modification time and permissions on
2017 directories afterwards. `path' specifies a different directory
2018 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04002019 list returned by getmembers(). If `numeric_owner` is True, only
2020 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002021 """
2022 directories = []
2023
2024 if members is None:
2025 members = self
2026
2027 for tarinfo in members:
2028 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002029 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002030 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002031 tarinfo = copy.copy(tarinfo)
2032 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002033 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002034 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2035 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002036
2037 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002038 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002039 directories.reverse()
2040
2041 # Set correct owner, mtime and filemode on directories.
2042 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002043 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002044 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002045 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002046 self.utime(tarinfo, dirpath)
2047 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002048 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002049 if self.errorlevel > 1:
2050 raise
2051 else:
2052 self._dbg(1, "tarfile: %s" % e)
2053
Eric V. Smith7a803892015-04-15 10:27:58 -04002054 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055 """Extract a member from the archive to the current working directory,
2056 using its full name. Its file information is extracted as accurately
2057 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002058 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002059 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2060 is True, only the numbers for user/group names are used and not
2061 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 """
2063 self._check("r")
2064
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002065 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002067 else:
2068 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069
Neal Norwitza4f651a2004-07-20 22:07:44 +00002070 # Prepare the link target for makelink().
2071 if tarinfo.islnk():
2072 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2073
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002074 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002075 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002076 set_attrs=set_attrs,
2077 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002078 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079 if self.errorlevel > 0:
2080 raise
2081 else:
2082 if e.filename is None:
2083 self._dbg(1, "tarfile: %s" % e.strerror)
2084 else:
2085 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002086 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087 if self.errorlevel > 1:
2088 raise
2089 else:
2090 self._dbg(1, "tarfile: %s" % e)
2091
2092 def extractfile(self, member):
2093 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002094 a filename or a TarInfo object. If `member' is a regular file or a
2095 link, an io.BufferedReader object is returned. Otherwise, None is
2096 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002097 """
2098 self._check("r")
2099
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002100 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002101 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002102 else:
2103 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002104
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002105 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2106 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002107 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002108
2109 elif tarinfo.islnk() or tarinfo.issym():
2110 if isinstance(self.fileobj, _Stream):
2111 # A small but ugly workaround for the case that someone tries
2112 # to extract a (sym)link as a file-object from a non-seekable
2113 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002114 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002116 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002117 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002118 else:
2119 # If there's no data associated with the member (directory, chrdev,
2120 # blkdev, etc.), return None instead of a file object.
2121 return None
2122
Eric V. Smith7a803892015-04-15 10:27:58 -04002123 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2124 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002125 """Extract the TarInfo object tarinfo to a physical
2126 file called targetpath.
2127 """
2128 # Fetch the TarInfo object for the given name
2129 # and build the destination pathname, replacing
2130 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002131 targetpath = targetpath.rstrip("/")
2132 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002133
2134 # Create all upper directories.
2135 upperdirs = os.path.dirname(targetpath)
2136 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002137 # Create directories that are not part of the archive with
2138 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002139 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002140
2141 if tarinfo.islnk() or tarinfo.issym():
2142 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2143 else:
2144 self._dbg(1, tarinfo.name)
2145
2146 if tarinfo.isreg():
2147 self.makefile(tarinfo, targetpath)
2148 elif tarinfo.isdir():
2149 self.makedir(tarinfo, targetpath)
2150 elif tarinfo.isfifo():
2151 self.makefifo(tarinfo, targetpath)
2152 elif tarinfo.ischr() or tarinfo.isblk():
2153 self.makedev(tarinfo, targetpath)
2154 elif tarinfo.islnk() or tarinfo.issym():
2155 self.makelink(tarinfo, targetpath)
2156 elif tarinfo.type not in SUPPORTED_TYPES:
2157 self.makeunknown(tarinfo, targetpath)
2158 else:
2159 self.makefile(tarinfo, targetpath)
2160
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002161 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002162 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002163 if not tarinfo.issym():
2164 self.chmod(tarinfo, targetpath)
2165 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002166
2167 #--------------------------------------------------------------------------
2168 # Below are the different file methods. They are called via
2169 # _extract_member() when extract() is called. They can be replaced in a
2170 # subclass to implement other functionality.
2171
2172 def makedir(self, tarinfo, targetpath):
2173 """Make a directory called targetpath.
2174 """
2175 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002176 # Use a safe mode for the directory, the real mode is set
2177 # later in _extract_member().
2178 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002179 except FileExistsError:
2180 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002181
2182 def makefile(self, tarinfo, targetpath):
2183 """Make a file called targetpath.
2184 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002185 source = self.fileobj
2186 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002187 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002188 with bltn_open(targetpath, "wb") as target:
2189 if tarinfo.sparse is not None:
2190 for offset, size in tarinfo.sparse:
2191 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002192 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002193 target.seek(tarinfo.size)
2194 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002195 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002196 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002197
2198 def makeunknown(self, tarinfo, targetpath):
2199 """Make a file from a TarInfo object with an unknown type
2200 at targetpath.
2201 """
2202 self.makefile(tarinfo, targetpath)
2203 self._dbg(1, "tarfile: Unknown file type %r, " \
2204 "extracted as regular file." % tarinfo.type)
2205
2206 def makefifo(self, tarinfo, targetpath):
2207 """Make a fifo called targetpath.
2208 """
2209 if hasattr(os, "mkfifo"):
2210 os.mkfifo(targetpath)
2211 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002212 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002213
2214 def makedev(self, tarinfo, targetpath):
2215 """Make a character or block device called targetpath.
2216 """
2217 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002218 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002219
2220 mode = tarinfo.mode
2221 if tarinfo.isblk():
2222 mode |= stat.S_IFBLK
2223 else:
2224 mode |= stat.S_IFCHR
2225
2226 os.mknod(targetpath, mode,
2227 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2228
2229 def makelink(self, tarinfo, targetpath):
2230 """Make a (symbolic) link called targetpath. If it cannot be created
2231 (platform limitation), we try to make a copy of the referenced file
2232 instead of a link.
2233 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002234 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002235 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002236 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002237 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002238 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002239 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002240 if os.path.exists(tarinfo._link_target):
2241 os.link(tarinfo._link_target, targetpath)
2242 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002243 self._extract_member(self._find_link_target(tarinfo),
2244 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002245 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002246 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002247 self._extract_member(self._find_link_target(tarinfo),
2248 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002249 except KeyError:
2250 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002251
Eric V. Smith7a803892015-04-15 10:27:58 -04002252 def chown(self, tarinfo, targetpath, numeric_owner):
2253 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002254 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2255 is False, fall back to .gid/.uid when the search based on name
2256 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002257 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002258 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002259 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002260 g = tarinfo.gid
2261 u = tarinfo.uid
2262 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002263 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002264 if grp:
2265 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002266 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002267 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002268 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002269 if pwd:
2270 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002271 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002272 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 try:
2274 if tarinfo.issym() and hasattr(os, "lchown"):
2275 os.lchown(targetpath, u, g)
2276 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002277 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002278 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002279 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280
2281 def chmod(self, tarinfo, targetpath):
2282 """Set file permissions of targetpath according to tarinfo.
2283 """
Anthony Sottile8377cd42019-02-25 14:32:27 -08002284 try:
2285 os.chmod(targetpath, tarinfo.mode)
2286 except OSError:
2287 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288
2289 def utime(self, tarinfo, targetpath):
2290 """Set modification time of targetpath according to tarinfo.
2291 """
Jack Jansen834eff62003-03-07 12:47:06 +00002292 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002293 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002294 try:
2295 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002296 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002298
2299 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002300 def next(self):
2301 """Return the next member of the archive as a TarInfo object, when
2302 TarFile is opened for reading. Return None if there is no more
2303 available.
2304 """
2305 self._check("ra")
2306 if self.firstmember is not None:
2307 m = self.firstmember
2308 self.firstmember = None
2309 return m
2310
Lars Gustäbel03572682015-07-06 09:27:24 +02002311 # Advance the file pointer.
2312 if self.offset != self.fileobj.tell():
2313 self.fileobj.seek(self.offset - 1)
2314 if not self.fileobj.read(1):
2315 raise ReadError("unexpected end of data")
2316
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002318 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002319 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002320 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002321 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002322 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002324 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002325 self.offset += BLOCKSIZE
2326 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002327 except InvalidHeaderError as e:
2328 if self.ignore_zeros:
2329 self._dbg(2, "0x%X: %s" % (self.offset, e))
2330 self.offset += BLOCKSIZE
2331 continue
2332 elif self.offset == 0:
2333 raise ReadError(str(e))
2334 except EmptyHeaderError:
2335 if self.offset == 0:
2336 raise ReadError("empty file")
2337 except TruncatedHeaderError as e:
2338 if self.offset == 0:
2339 raise ReadError(str(e))
2340 except SubsequentHeaderError as e:
2341 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342 break
2343
Lars Gustäbel9520a432009-11-22 18:48:49 +00002344 if tarinfo is not None:
2345 self.members.append(tarinfo)
2346 else:
2347 self._loaded = True
2348
Thomas Wouters477c8d52006-05-27 19:21:47 +00002349 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002350
2351 #--------------------------------------------------------------------------
2352 # Little helper methods:
2353
Lars Gustäbel1b512722010-06-03 12:45:16 +00002354 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002355 """Find an archive member by name from bottom to top.
2356 If tarinfo is given, it is used as the starting point.
2357 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002358 # Ensure that all members have been loaded.
2359 members = self.getmembers()
2360
Lars Gustäbel1b512722010-06-03 12:45:16 +00002361 # Limit the member search list up to tarinfo.
2362 if tarinfo is not None:
2363 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002364
Lars Gustäbel1b512722010-06-03 12:45:16 +00002365 if normalize:
2366 name = os.path.normpath(name)
2367
2368 for member in reversed(members):
2369 if normalize:
2370 member_name = os.path.normpath(member.name)
2371 else:
2372 member_name = member.name
2373
2374 if name == member_name:
2375 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002376
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002377 def _load(self):
2378 """Read through the entire archive file and look for readable
2379 members.
2380 """
2381 while True:
2382 tarinfo = self.next()
2383 if tarinfo is None:
2384 break
2385 self._loaded = True
2386
2387 def _check(self, mode=None):
2388 """Check if TarFile is still open, and if the operation's mode
2389 corresponds to TarFile's mode.
2390 """
2391 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002392 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002393 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002394 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002395
Lars Gustäbel1b512722010-06-03 12:45:16 +00002396 def _find_link_target(self, tarinfo):
2397 """Find the target member of a symlink or hardlink member in the
2398 archive.
2399 """
2400 if tarinfo.issym():
2401 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002402 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002403 limit = None
2404 else:
2405 # Search the archive before the link, because a hard link is
2406 # just a reference to an already archived file.
2407 linkname = tarinfo.linkname
2408 limit = tarinfo
2409
2410 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2411 if member is None:
2412 raise KeyError("linkname %r not found" % linkname)
2413 return member
2414
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002415 def __iter__(self):
2416 """Provide an iterator object.
2417 """
2418 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002419 yield from self.members
2420 return
2421
2422 # Yield items using TarFile's next() method.
2423 # When all members have been read, set TarFile as _loaded.
2424 index = 0
2425 # Fix for SF #1100429: Under rare circumstances it can
2426 # happen that getmembers() is called during iteration,
2427 # which will have already exhausted the next() method.
2428 if self.firstmember is not None:
2429 tarinfo = self.next()
2430 index += 1
2431 yield tarinfo
2432
2433 while True:
2434 if index < len(self.members):
2435 tarinfo = self.members[index]
2436 elif not self._loaded:
2437 tarinfo = self.next()
2438 if not tarinfo:
2439 self._loaded = True
2440 return
2441 else:
2442 return
2443 index += 1
2444 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002445
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002446 def _dbg(self, level, msg):
2447 """Write debugging output to sys.stderr.
2448 """
2449 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002450 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002451
2452 def __enter__(self):
2453 self._check()
2454 return self
2455
2456 def __exit__(self, type, value, traceback):
2457 if type is None:
2458 self.close()
2459 else:
2460 # An exception occurred. We must not call close() because
2461 # it would try to write end-of-archive blocks and padding.
2462 if not self._extfileobj:
2463 self.fileobj.close()
2464 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002465
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002466#--------------------
2467# exported functions
2468#--------------------
2469def is_tarfile(name):
2470 """Return True if name points to a tar archive that we
2471 are able to handle, else return False.
William Woodruffdd754ca2020-01-22 21:24:16 -05002472
2473 'name' should be a string, file, or file-like object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002474 """
2475 try:
William Woodruffdd754ca2020-01-22 21:24:16 -05002476 if hasattr(name, "read"):
2477 t = open(fileobj=name)
2478 else:
2479 t = open(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002480 t.close()
2481 return True
2482 except TarError:
2483 return False
2484
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002485open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002486
2487
2488def main():
2489 import argparse
2490
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002491 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002492 parser = argparse.ArgumentParser(description=description)
2493 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2494 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002495 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002496 group.add_argument('-l', '--list', metavar='<tarfile>',
2497 help='Show listing of a tarfile')
2498 group.add_argument('-e', '--extract', nargs='+',
2499 metavar=('<tarfile>', '<output_dir>'),
2500 help='Extract tarfile into target dir')
2501 group.add_argument('-c', '--create', nargs='+',
2502 metavar=('<name>', '<file>'),
2503 help='Create tarfile from sources')
2504 group.add_argument('-t', '--test', metavar='<tarfile>',
2505 help='Test if a tarfile is valid')
2506 args = parser.parse_args()
2507
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002508 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002509 src = args.test
2510 if is_tarfile(src):
2511 with open(src, 'r') as tar:
2512 tar.getmembers()
2513 print(tar.getmembers(), file=sys.stderr)
2514 if args.verbose:
2515 print('{!r} is a tar archive.'.format(src))
2516 else:
2517 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2518
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002519 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002520 src = args.list
2521 if is_tarfile(src):
2522 with TarFile.open(src, 'r:*') as tf:
2523 tf.list(verbose=args.verbose)
2524 else:
2525 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2526
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002527 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002528 if len(args.extract) == 1:
2529 src = args.extract[0]
2530 curdir = os.curdir
2531 elif len(args.extract) == 2:
2532 src, curdir = args.extract
2533 else:
2534 parser.exit(1, parser.format_help())
2535
2536 if is_tarfile(src):
2537 with TarFile.open(src, 'r:*') as tf:
2538 tf.extractall(path=curdir)
2539 if args.verbose:
2540 if curdir == '.':
2541 msg = '{!r} file is extracted.'.format(src)
2542 else:
2543 msg = ('{!r} file is extracted '
2544 'into {!r} directory.').format(src, curdir)
2545 print(msg)
2546 else:
2547 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2548
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002549 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002550 tar_name = args.create.pop(0)
2551 _, ext = os.path.splitext(tar_name)
2552 compressions = {
2553 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002554 '.gz': 'gz',
2555 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002556 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002557 '.xz': 'xz',
2558 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002559 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002560 '.bz2': 'bz2',
2561 '.tbz': 'bz2',
2562 '.tbz2': 'bz2',
2563 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002564 }
2565 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2566 tar_files = args.create
2567
2568 with TarFile.open(tar_name, tar_mode) as tf:
2569 for file_name in tar_files:
2570 tf.add(file_name)
2571
2572 if args.verbose:
2573 print('{!r} file created.'.format(tar_name))
2574
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002575if __name__ == '__main__':
2576 main()