blob: 90a2c95b315b3ca884992dd929fed4e63f57d744 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
CAM Gerlache680c3d2019-03-21 09:44:51 -0500108DEFAULT_FORMAT = PAX_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200259def _safe_print(s):
260 encoding = getattr(sys.stdout, 'encoding', None)
261 if encoding is not None:
262 s = s.encode(encoding, 'backslashreplace').decode(encoding)
263 print(s, end=' ')
264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266class TarError(Exception):
267 """Base exception."""
268 pass
269class ExtractError(TarError):
270 """General exception for extract errors."""
271 pass
272class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300273 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 pass
275class CompressionError(TarError):
276 """Exception for unavailable compression methods."""
277 pass
278class StreamError(TarError):
279 """Exception for unsupported operations on stream-like TarFiles."""
280 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000281class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000282 """Base exception for header errors."""
283 pass
284class EmptyHeaderError(HeaderError):
285 """Exception for empty headers."""
286 pass
287class TruncatedHeaderError(HeaderError):
288 """Exception for truncated headers."""
289 pass
290class EOFHeaderError(HeaderError):
291 """Exception for end of file headers."""
292 pass
293class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000294 """Exception for invalid headers."""
295 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000296class SubsequentHeaderError(HeaderError):
297 """Exception for missing and invalid extended headers."""
298 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304 """Low-level file object. Supports reading and writing.
305 It is used instead of a regular file object for streaming
306 access.
307 """
308
309 def __init__(self, name, mode):
310 mode = {
311 "r": os.O_RDONLY,
312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313 }[mode]
314 if hasattr(os, "O_BINARY"):
315 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000316 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318 def close(self):
319 os.close(self.fd)
320
321 def read(self, size):
322 return os.read(self.fd, size)
323
324 def write(self, s):
325 os.write(self.fd, s)
326
327class _Stream:
328 """Class that serves as an adapter between TarFile and
329 a stream-like object. The stream-like object only
330 needs to have a read() or write() method and is accessed
331 blockwise. Use of gzip or bzip2 compression is possible.
332 A stream-like object could be for example: sys.stdin,
333 sys.stdout, a socket, a tape device etc.
334
335 _Stream is intended to be used only internally.
336 """
337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000339 """Construct a _Stream object.
340 """
341 self._extfileobj = True
342 if fileobj is None:
343 fileobj = _LowLevelFile(name, mode)
344 self._extfileobj = False
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if comptype == '*':
347 # Enable transparent compression detection for the
348 # stream interface
349 fileobj = _StreamProxy(fileobj)
350 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 self.name = name or ""
353 self.mode = mode
354 self.comptype = comptype
355 self.fileobj = fileobj
356 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000357 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000358 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.closed = False
360
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 try:
362 if comptype == "gz":
363 try:
364 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400365 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000366 raise CompressionError("zlib module is not available")
367 self.zlib = zlib
368 self.crc = zlib.crc32(b"")
369 if mode == "r":
370 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100371 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 else:
373 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000374
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100375 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000376 try:
377 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400378 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 raise CompressionError("bz2 module is not available")
380 if mode == "r":
381 self.dbuf = b""
382 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 else:
385 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386
387 elif comptype == "xz":
388 try:
389 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400390 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100391 raise CompressionError("lzma module is not available")
392 if mode == "r":
393 self.dbuf = b""
394 self.cmp = lzma.LZMADecompressor()
395 self.exception = lzma.LZMAError
396 else:
397 self.cmp = lzma.LZMACompressor()
398
399 elif comptype != "tar":
400 raise CompressionError("unknown compression type %r" % comptype)
401
Antoine Pitrou605c2932010-09-23 20:15:14 +0000402 except:
403 if not self._extfileobj:
404 self.fileobj.close()
405 self.closed = True
406 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407
408 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000409 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 self.close()
411
412 def _init_write_gz(self):
413 """Initialize for writing with gzip compression.
414 """
415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416 -self.zlib.MAX_WBITS,
417 self.zlib.DEF_MEM_LEVEL,
418 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000419 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if self.name.endswith(".gz"):
422 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000423 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
424 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425
426 def write(self, s):
427 """Write string s to the stream.
428 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000429 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 self.crc = self.zlib.crc32(s, self.crc)
431 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 s = self.cmp.compress(s)
434 self.__write(s)
435
436 def __write(self, s):
437 """Write string s to the stream if a whole new block
438 is ready to be written.
439 """
440 self.buf += s
441 while len(self.buf) > self.bufsize:
442 self.fileobj.write(self.buf[:self.bufsize])
443 self.buf = self.buf[self.bufsize:]
444
445 def close(self):
446 """Close the _Stream object. No operation should be
447 done on it afterwards.
448 """
449 if self.closed:
450 return
451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300453 try:
454 if self.mode == "w" and self.comptype != "tar":
455 self.buf += self.cmp.flush()
456
457 if self.mode == "w" and self.buf:
458 self.fileobj.write(self.buf)
459 self.buf = b""
460 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000461 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300462 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
463 finally:
464 if not self._extfileobj:
465 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466
467 def _init_read_gz(self):
468 """Initialize for reading a gzip compressed fileobj.
469 """
470 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000472
473 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000474 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000475 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000477 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 flag = ord(self.__read(1))
480 self.__read(6)
481
482 if flag & 4:
483 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
484 self.read(xlen)
485 if flag & 8:
486 while True:
487 s = self.__read(1)
488 if not s or s == NUL:
489 break
490 if flag & 16:
491 while True:
492 s = self.__read(1)
493 if not s or s == NUL:
494 break
495 if flag & 2:
496 self.__read(2)
497
498 def tell(self):
499 """Return the stream's file pointer position.
500 """
501 return self.pos
502
503 def seek(self, pos=0):
504 """Set the stream's file pointer to pos. Negative seeking
505 is forbidden.
506 """
507 if pos - self.pos >= 0:
508 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000509 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000510 self.read(self.bufsize)
511 self.read(remainder)
512 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000513 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000514 return self.pos
515
INADA Naoki8d130912018-07-06 14:06:00 +0900516 def read(self, size):
517 """Return the next size number of bytes from the stream."""
518 assert size is not None
519 buf = self._read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 self.pos += len(buf)
521 return buf
522
523 def _read(self, size):
524 """Return size bytes from the stream.
525 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000526 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000527 return self.__read(size)
528
529 c = len(self.dbuf)
hajoscher12a08c42018-07-04 10:13:18 +0200530 t = [self.dbuf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531 while c < size:
INADA Naoki8d130912018-07-06 14:06:00 +0900532 # Skip underlying buffer to avoid unaligned double buffering.
533 if self.buf:
534 buf = self.buf
535 self.buf = b""
536 else:
537 buf = self.fileobj.read(self.bufsize)
538 if not buf:
539 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540 try:
541 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100542 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000543 raise ReadError("invalid compressed data")
hajoscher12a08c42018-07-04 10:13:18 +0200544 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200546 t = b"".join(t)
547 self.dbuf = t[size:]
548 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549
550 def __read(self, size):
551 """Return size bytes from stream. If internal buffer is empty,
552 read another block from the stream.
553 """
554 c = len(self.buf)
hajoscher12a08c42018-07-04 10:13:18 +0200555 t = [self.buf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 while c < size:
557 buf = self.fileobj.read(self.bufsize)
558 if not buf:
559 break
hajoscher12a08c42018-07-04 10:13:18 +0200560 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200562 t = b"".join(t)
563 self.buf = t[size:]
564 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565# class _Stream
566
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000567class _StreamProxy(object):
568 """Small proxy class that enables transparent compression
569 detection for the Stream interface (mode 'r|*').
570 """
571
572 def __init__(self, fileobj):
573 self.fileobj = fileobj
574 self.buf = self.fileobj.read(BLOCKSIZE)
575
576 def read(self, size):
577 self.read = self.fileobj.read
578 return self.buf
579
580 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100581 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000582 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100583 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000584 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100585 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
586 return "xz"
587 else:
588 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000589
590 def close(self):
591 self.fileobj.close()
592# class StreamProxy
593
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594#------------------------
595# Extraction file object
596#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000597class _FileInFile(object):
598 """A thin wrapper around an existing file object that
599 provides a part of its data as an individual file
600 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000601 """
602
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000603 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000604 self.fileobj = fileobj
605 self.offset = offset
606 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000607 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200608 self.name = getattr(fileobj, "name", None)
609 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000610
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000611 if blockinfo is None:
612 blockinfo = [(0, size)]
613
614 # Construct a map with data and zero blocks.
615 self.map_index = 0
616 self.map = []
617 lastpos = 0
618 realpos = self.offset
619 for offset, size in blockinfo:
620 if offset > lastpos:
621 self.map.append((False, lastpos, offset, None))
622 self.map.append((True, offset, offset + size, realpos))
623 realpos += size
624 lastpos = offset + size
625 if lastpos < self.size:
626 self.map.append((False, lastpos, self.size, None))
627
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200628 def flush(self):
629 pass
630
631 def readable(self):
632 return True
633
634 def writable(self):
635 return False
636
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000637 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000638 return self.fileobj.seekable()
639
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000640 def tell(self):
641 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000642 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000643 return self.position
644
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200645 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000646 """Seek to a position in the file.
647 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200648 if whence == io.SEEK_SET:
649 self.position = min(max(position, 0), self.size)
650 elif whence == io.SEEK_CUR:
651 if position < 0:
652 self.position = max(self.position + position, 0)
653 else:
654 self.position = min(self.position + position, self.size)
655 elif whence == io.SEEK_END:
656 self.position = max(min(self.size + position, self.size), 0)
657 else:
658 raise ValueError("Invalid argument")
659 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000660
661 def read(self, size=None):
662 """Read data from the file.
663 """
664 if size is None:
665 size = self.size - self.position
666 else:
667 size = min(size, self.size - self.position)
668
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000669 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000670 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000671 while True:
672 data, start, stop, offset = self.map[self.map_index]
673 if start <= self.position < stop:
674 break
675 else:
676 self.map_index += 1
677 if self.map_index == len(self.map):
678 self.map_index = 0
679 length = min(size, stop - self.position)
680 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000681 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200682 b = self.fileobj.read(length)
683 if len(b) != length:
684 raise ReadError("unexpected end of data")
685 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000686 else:
687 buf += NUL * length
688 size -= length
689 self.position += length
690 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000691
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200692 def readinto(self, b):
693 buf = self.read(len(b))
694 b[:len(buf)] = buf
695 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696
697 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200699#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000700
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200701class ExFileObject(io.BufferedReader):
702
703 def __init__(self, tarfile, tarinfo):
704 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
705 tarinfo.size, tarinfo.sparse)
706 super().__init__(fileobj)
707#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708
709#------------------
710# Exported Classes
711#------------------
712class TarInfo(object):
713 """Informational class which holds the details about an
714 archive member given by a tar header block.
715 TarInfo objects are returned by TarFile.getmember(),
716 TarFile.getmembers() and TarFile.gettarinfo() and are
717 usually created internally.
718 """
719
Raymond Hettingera694f232019-03-27 13:16:34 -0700720 __slots__ = dict(
721 name = 'Name of the archive member.',
722 mode = 'Permission bits.',
723 uid = 'User ID of the user who originally stored this member.',
724 gid = 'Group ID of the user who originally stored this member.',
725 size = 'Size in bytes.',
726 mtime = 'Time of last modification.',
727 chksum = 'Header checksum.',
728 type = ('File type. type is usually one of these constants: '
729 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
730 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
731 linkname = ('Name of the target file name, which is only present '
732 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
733 uname = 'User name.',
734 gname = 'Group name.',
735 devmajor = 'Device major number.',
736 devminor = 'Device minor number.',
737 offset = 'The tar header starts here.',
738 offset_data = "The file's data starts here.",
739 pax_headers = ('A dictionary containing key-value pairs of an '
740 'associated pax extended header.'),
741 sparse = 'Sparse member information.',
742 tarfile = None,
743 _sparse_structs = None,
744 _link_target = None,
745 )
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000746
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747 def __init__(self, name=""):
748 """Construct a TarInfo object. name is the optional name
749 of the member.
750 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000752 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000753 self.uid = 0 # user id
754 self.gid = 0 # group id
755 self.size = 0 # file size
756 self.mtime = 0 # modification time
757 self.chksum = 0 # header checksum
758 self.type = REGTYPE # member type
759 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000760 self.uname = "" # user name
761 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000762 self.devmajor = 0 # device major number
763 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000764
Thomas Wouters477c8d52006-05-27 19:21:47 +0000765 self.offset = 0 # the tar header starts here
766 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000767
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000768 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 self.pax_headers = {} # pax header information
770
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200771 @property
772 def path(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700773 'In pax headers, "name" is called "path".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200776 @path.setter
777 def path(self, name):
778 self.name = name
779
780 @property
781 def linkpath(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700782 'In pax headers, "linkname" is called "linkpath".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200784
785 @linkpath.setter
786 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000789 def __repr__(self):
790 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
791
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000792 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 """Return the TarInfo's attributes as a dictionary.
794 """
795 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000796 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000797 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 "uid": self.uid,
799 "gid": self.gid,
800 "size": self.size,
801 "mtime": self.mtime,
802 "chksum": self.chksum,
803 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000804 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805 "uname": self.uname,
806 "gname": self.gname,
807 "devmajor": self.devmajor,
808 "devminor": self.devminor
809 }
810
811 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
812 info["name"] += "/"
813
814 return info
815
Victor Stinnerde629d42010-05-05 21:43:57 +0000816 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 """Return a tar header as a string of 512 byte blocks.
818 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000819 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000820
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000822 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000826 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 else:
828 raise ValueError("invalid format")
829
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831 """Return the object as a ustar header block.
832 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 info["magic"] = POSIX_MAGIC
834
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200835 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 raise ValueError("linkname is too long")
837
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200838 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
839 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000841 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000843 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844 """Return the object as a GNU header block sequence.
845 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 info["magic"] = GNU_MAGIC
847
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000848 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200849 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000850 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200852 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000853 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000855 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000857 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 """Return the object as a ustar header block. If it cannot be
859 represented this way, prepend a pax extended header sequence
860 with supplement information.
861 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862 info["magic"] = POSIX_MAGIC
863 pax_headers = self.pax_headers.copy()
864
865 # Test string fields for values that exceed the field length or cannot
866 # be represented in ASCII encoding.
867 for name, hname, length in (
868 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
869 ("uname", "uname", 32), ("gname", "gname", 32)):
870
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if hname in pax_headers:
872 # The pax header has priority.
873 continue
874
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875 # Try to encode the string as ASCII.
876 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000877 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000879 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 continue
881
Guido van Rossume7ba4952007-06-06 23:52:48 +0000882 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000883 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
885 # Test number fields for values that exceed the field limit or values
886 # that like to be stored as float.
887 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000888 if name in pax_headers:
889 # The pax header has priority. Avoid overflow.
890 info[name] = 0
891 continue
892
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893 val = info[name]
894 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000895 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 info[name] = 0
897
Guido van Rossume7ba4952007-06-06 23:52:48 +0000898 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000900 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000902 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000904 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905
906 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000907 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908 """Return the object as a pax global header block sequence.
909 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000910 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200912 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 """Split a name longer than 100 chars into a prefix
914 and a name part.
915 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200916 components = name.split("/")
917 for i in range(1, len(components)):
918 prefix = "/".join(components[:i])
919 name = "/".join(components[i:])
920 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
921 len(name.encode(encoding, errors)) <= LENGTH_NAME:
922 break
923 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200925
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 return prefix, name
927
928 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 """Return a header block. info is a dictionary with file
931 information, format must be one of the *_FORMAT constants.
932 """
933 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000934 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000935 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 itn(info.get("uid", 0), 8, format),
937 itn(info.get("gid", 0), 8, format),
938 itn(info.get("size", 0), 12, format),
939 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000940 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000942 stn(info.get("linkname", ""), 100, encoding, errors),
943 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000944 stn(info.get("uname", ""), 32, encoding, errors),
945 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 itn(info.get("devmajor", 0), 8, format),
947 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000948 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949 ]
950
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000953 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954 return buf
955
956 @staticmethod
957 def _create_payload(payload):
958 """Return the string payload filled with zero bytes
959 up to the next 512 byte border.
960 """
961 blocks, remainder = divmod(len(payload), BLOCKSIZE)
962 if remainder > 0:
963 payload += (BLOCKSIZE - remainder) * NUL
964 return payload
965
966 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000967 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
969 for name.
970 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000971 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000972
973 info = {}
974 info["name"] = "././@LongLink"
975 info["type"] = type
976 info["size"] = len(name)
977 info["magic"] = GNU_MAGIC
978
979 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000980 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981 cls._create_payload(name)
982
983 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000984 def _create_pax_generic_header(cls, pax_headers, type, encoding):
985 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000989 # Check if one of the fields contains surrogate characters and thereby
990 # forces hdrcharset=BINARY, see _proc_pax() for more information.
991 binary = False
992 for keyword, value in pax_headers.items():
993 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000994 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995 except UnicodeEncodeError:
996 binary = True
997 break
998
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000999 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001000 if binary:
1001 # Put the hdrcharset field at the beginning of the header.
1002 records += b"21 hdrcharset=BINARY\n"
1003
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001005 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001006 if binary:
1007 # Try to restore the original byte representation of `value'.
1008 # Needless to say, that the encoding must match the string.
1009 value = value.encode(encoding, "surrogateescape")
1010 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001011 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001012
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1014 n = p = 0
1015 while True:
1016 n = l + len(str(p))
1017 if n == p:
1018 break
1019 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001020 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
1022 # We use a hardcoded "././@PaxHeader" name like star does
1023 # instead of the one that POSIX recommends.
1024 info = {}
1025 info["name"] = "././@PaxHeader"
1026 info["type"] = type
1027 info["size"] = len(records)
1028 info["magic"] = POSIX_MAGIC
1029
1030 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001031 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 cls._create_payload(records)
1033
Guido van Rossum75b64e62005-01-16 00:16:11 +00001034 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001035 def frombuf(cls, buf, encoding, errors):
1036 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001037 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001038 if len(buf) == 0:
1039 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001041 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001043 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001044
1045 chksum = nti(buf[148:156])
1046 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001047 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001050 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001051 obj.mode = nti(buf[100:108])
1052 obj.uid = nti(buf[108:116])
1053 obj.gid = nti(buf[116:124])
1054 obj.size = nti(buf[124:136])
1055 obj.mtime = nti(buf[136:148])
1056 obj.chksum = chksum
1057 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001058 obj.linkname = nts(buf[157:257], encoding, errors)
1059 obj.uname = nts(buf[265:297], encoding, errors)
1060 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 obj.devmajor = nti(buf[329:337])
1062 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001063 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 # Old V7 tar format represents a directory as a regular
1066 # file with a trailing slash.
1067 if obj.type == AREGTYPE and obj.name.endswith("/"):
1068 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001069
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001070 # The old GNU sparse format occupies some of the unused
1071 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001072 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001073 if obj.type == GNUTYPE_SPARSE:
1074 pos = 386
1075 structs = []
1076 for i in range(4):
1077 try:
1078 offset = nti(buf[pos:pos + 12])
1079 numbytes = nti(buf[pos + 12:pos + 24])
1080 except ValueError:
1081 break
1082 structs.append((offset, numbytes))
1083 pos += 24
1084 isextended = bool(buf[482])
1085 origsize = nti(buf[483:495])
1086 obj._sparse_structs = (structs, isextended, origsize)
1087
Guido van Rossumd8faa362007-04-27 19:54:29 +00001088 # Remove redundant slashes from directories.
1089 if obj.isdir():
1090 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091
Guido van Rossumd8faa362007-04-27 19:54:29 +00001092 # Reconstruct a ustar longname.
1093 if prefix and obj.type not in GNU_TYPES:
1094 obj.name = prefix + "/" + obj.name
1095 return obj
1096
1097 @classmethod
1098 def fromtarfile(cls, tarfile):
1099 """Return the next TarInfo object from TarFile object
1100 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001101 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1105 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 #--------------------------------------------------------------------------
1108 # The following are methods that are called depending on the type of a
1109 # member. The entry point is _proc_member() which can be overridden in a
1110 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1111 # implement the following
1112 # operations:
1113 # 1. Set self.offset_data to the position where the data blocks begin,
1114 # if there is data that follows.
1115 # 2. Set tarfile.offset to the position where the next member's header will
1116 # begin.
1117 # 3. Return self or another valid TarInfo object.
1118 def _proc_member(self, tarfile):
1119 """Choose the right processing method depending on
1120 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001121 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1123 return self._proc_gnulong(tarfile)
1124 elif self.type == GNUTYPE_SPARSE:
1125 return self._proc_sparse(tarfile)
1126 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1127 return self._proc_pax(tarfile)
1128 else:
1129 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001130
Guido van Rossumd8faa362007-04-27 19:54:29 +00001131 def _proc_builtin(self, tarfile):
1132 """Process a builtin type or an unknown type which
1133 will be treated as a regular file.
1134 """
1135 self.offset_data = tarfile.fileobj.tell()
1136 offset = self.offset_data
1137 if self.isreg() or self.type not in SUPPORTED_TYPES:
1138 # Skip the following data blocks.
1139 offset += self._block(self.size)
1140 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001141
Guido van Rossume7ba4952007-06-06 23:52:48 +00001142 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001144 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145
1146 return self
1147
1148 def _proc_gnulong(self, tarfile):
1149 """Process the blocks that hold a GNU longname
1150 or longlink member.
1151 """
1152 buf = tarfile.fileobj.read(self._block(self.size))
1153
1154 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001155 try:
1156 next = self.fromtarfile(tarfile)
1157 except HeaderError:
1158 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 # Patch the TarInfo object from the next header with
1161 # the longname information.
1162 next.offset = self.offset
1163 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001164 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001166 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
1168 return next
1169
1170 def _proc_sparse(self, tarfile):
1171 """Process a GNU sparse header plus extra headers.
1172 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001173 # We already collected some sparse structures in frombuf().
1174 structs, isextended, origsize = self._sparse_structs
1175 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001177 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001178 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179 buf = tarfile.fileobj.read(BLOCKSIZE)
1180 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001181 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001182 try:
1183 offset = nti(buf[pos:pos + 12])
1184 numbytes = nti(buf[pos + 12:pos + 24])
1185 except ValueError:
1186 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001187 if offset and numbytes:
1188 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001190 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001191 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001192
1193 self.offset_data = tarfile.fileobj.tell()
1194 tarfile.offset = self.offset_data + self._block(self.size)
1195 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001196 return self
1197
1198 def _proc_pax(self, tarfile):
1199 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001200 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001201 """
1202 # Read the header information.
1203 buf = tarfile.fileobj.read(self._block(self.size))
1204
1205 # A pax header stores supplemental information for either
1206 # the following file (extended) or all following files
1207 # (global).
1208 if self.type == XGLTYPE:
1209 pax_headers = tarfile.pax_headers
1210 else:
1211 pax_headers = tarfile.pax_headers.copy()
1212
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001213 # Check if the pax header contains a hdrcharset field. This tells us
1214 # the encoding of the path, linkpath, uname and gname fields. Normally,
1215 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1216 # implementations are allowed to store them as raw binary strings if
1217 # the translation to UTF-8 fails.
1218 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1219 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001220 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001221
1222 # For the time being, we don't care about anything other than "BINARY".
1223 # The only other value that is currently allowed by the standard is
1224 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1225 hdrcharset = pax_headers.get("hdrcharset")
1226 if hdrcharset == "BINARY":
1227 encoding = tarfile.encoding
1228 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001229 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001230
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231 # Parse pax header information. A record looks like that:
1232 # "%d %s=%s\n" % (length, keyword, value). length is the size
1233 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001234 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001235 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001236 pos = 0
1237 while True:
1238 match = regex.match(buf, pos)
1239 if not match:
1240 break
1241
1242 length, keyword = match.groups()
1243 length = int(length)
1244 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1245
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001246 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001247 # as the error handler, but we better not take the risk. For
1248 # example, GNU tar <= 1.23 is known to store filenames it cannot
1249 # translate to UTF-8 as raw strings (unfortunately without a
1250 # hdrcharset=BINARY header).
1251 # We first try the strict standard encoding, and if that fails we
1252 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001253 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001254 tarfile.errors)
1255 if keyword in PAX_NAME_FIELDS:
1256 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1257 tarfile.errors)
1258 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001259 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001260 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261
1262 pax_headers[keyword] = value
1263 pos += length
1264
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001266 try:
1267 next = self.fromtarfile(tarfile)
1268 except HeaderError:
1269 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001271 # Process GNU sparse information.
1272 if "GNU.sparse.map" in pax_headers:
1273 # GNU extended sparse format version 0.1.
1274 self._proc_gnusparse_01(next, pax_headers)
1275
1276 elif "GNU.sparse.size" in pax_headers:
1277 # GNU extended sparse format version 0.0.
1278 self._proc_gnusparse_00(next, pax_headers, buf)
1279
1280 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1281 # GNU extended sparse format version 1.0.
1282 self._proc_gnusparse_10(next, pax_headers, tarfile)
1283
Guido van Rossume7ba4952007-06-06 23:52:48 +00001284 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001285 # Patch the TarInfo object with the extended header info.
1286 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1287 next.offset = self.offset
1288
1289 if "size" in pax_headers:
1290 # If the extended header replaces the size field,
1291 # we need to recalculate the offset where the next
1292 # header starts.
1293 offset = next.offset_data
1294 if next.isreg() or next.type not in SUPPORTED_TYPES:
1295 offset += next._block(next.size)
1296 tarfile.offset = offset
1297
1298 return next
1299
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001300 def _proc_gnusparse_00(self, next, pax_headers, buf):
1301 """Process a GNU tar extended sparse header, version 0.0.
1302 """
1303 offsets = []
1304 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1305 offsets.append(int(match.group(1)))
1306 numbytes = []
1307 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1308 numbytes.append(int(match.group(1)))
1309 next.sparse = list(zip(offsets, numbytes))
1310
1311 def _proc_gnusparse_01(self, next, pax_headers):
1312 """Process a GNU tar extended sparse header, version 0.1.
1313 """
1314 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1315 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1316
1317 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1318 """Process a GNU tar extended sparse header, version 1.0.
1319 """
1320 fields = None
1321 sparse = []
1322 buf = tarfile.fileobj.read(BLOCKSIZE)
1323 fields, buf = buf.split(b"\n", 1)
1324 fields = int(fields)
1325 while len(sparse) < fields * 2:
1326 if b"\n" not in buf:
1327 buf += tarfile.fileobj.read(BLOCKSIZE)
1328 number, buf = buf.split(b"\n", 1)
1329 sparse.append(int(number))
1330 next.offset_data = tarfile.fileobj.tell()
1331 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1332
Guido van Rossume7ba4952007-06-06 23:52:48 +00001333 def _apply_pax_info(self, pax_headers, encoding, errors):
1334 """Replace fields with supplemental information from a previous
1335 pax extended or global header.
1336 """
1337 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001338 if keyword == "GNU.sparse.name":
1339 setattr(self, "path", value)
1340 elif keyword == "GNU.sparse.size":
1341 setattr(self, "size", int(value))
1342 elif keyword == "GNU.sparse.realsize":
1343 setattr(self, "size", int(value))
1344 elif keyword in PAX_FIELDS:
1345 if keyword in PAX_NUMBER_FIELDS:
1346 try:
1347 value = PAX_NUMBER_FIELDS[keyword](value)
1348 except ValueError:
1349 value = 0
1350 if keyword == "path":
1351 value = value.rstrip("/")
1352 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001353
1354 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001356 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1357 """Decode a single field from a pax record.
1358 """
1359 try:
1360 return value.decode(encoding, "strict")
1361 except UnicodeDecodeError:
1362 return value.decode(fallback_encoding, fallback_errors)
1363
Guido van Rossumd8faa362007-04-27 19:54:29 +00001364 def _block(self, count):
1365 """Round up a byte count by BLOCKSIZE and return it,
1366 e.g. _block(834) => 1024.
1367 """
1368 blocks, remainder = divmod(count, BLOCKSIZE)
1369 if remainder:
1370 blocks += 1
1371 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001372
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def isreg(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001374 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001375 return self.type in REGULAR_TYPES
Raymond Hettingera694f232019-03-27 13:16:34 -07001376
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001377 def isfile(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001378 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001379 return self.isreg()
Raymond Hettingera694f232019-03-27 13:16:34 -07001380
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001381 def isdir(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001382 'Return True if it is a directory.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001383 return self.type == DIRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001384
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001385 def issym(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001386 'Return True if it is a symbolic link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001387 return self.type == SYMTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001388
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001389 def islnk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001390 'Return True if it is a hard link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 return self.type == LNKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001392
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001393 def ischr(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001394 'Return True if it is a character device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395 return self.type == CHRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001396
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397 def isblk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001398 'Return True if it is a block device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 return self.type == BLKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001400
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 def isfifo(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001402 'Return True if it is a FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001403 return self.type == FIFOTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001406 return self.sparse is not None
Raymond Hettingera694f232019-03-27 13:16:34 -07001407
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408 def isdev(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001409 'Return True if it is one of character device, block device or FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001410 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1411# class TarInfo
1412
1413class TarFile(object):
1414 """The TarFile Class provides an interface to tar archives.
1415 """
1416
1417 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1418
1419 dereference = False # If true, add content of linked file to the
1420 # tar file, else the link.
1421
1422 ignore_zeros = False # If true, skips empty or invalid blocks and
1423 # continues processing.
1424
Lars Gustäbel365aff32009-12-13 11:42:29 +00001425 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 # messages (if debug >= 0). If > 0, errors
1427 # are passed to the caller as exceptions.
1428
Guido van Rossumd8faa362007-04-27 19:54:29 +00001429 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001430
Guido van Rossume7ba4952007-06-06 23:52:48 +00001431 encoding = ENCODING # Encoding for 8-bit character strings.
1432
1433 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 tarinfo = TarInfo # The default TarInfo class to use.
1436
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001437 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438
1439 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1440 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001441 errors="surrogateescape", pax_headers=None, debug=None,
1442 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1444 read from an existing archive, 'a' to append data to an existing
1445 file or 'w' to create a new file overwriting an existing one. `mode'
1446 defaults to 'r'.
1447 If `fileobj' is given, it is used for reading or writing data. If it
1448 can be determined, `mode' is overridden by `fileobj's mode.
1449 `fileobj' is not closed, when TarFile is closed.
1450 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001451 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001452 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001453 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001454 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001455 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001456
1457 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001458 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001459 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001460 self.mode = "w"
1461 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001462 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001463 self._extfileobj = False
1464 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001465 if (name is None and hasattr(fileobj, "name") and
1466 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001467 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001468 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001469 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001470 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001471 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472 self.fileobj = fileobj
1473
Guido van Rossumd8faa362007-04-27 19:54:29 +00001474 # Init attributes.
1475 if format is not None:
1476 self.format = format
1477 if tarinfo is not None:
1478 self.tarinfo = tarinfo
1479 if dereference is not None:
1480 self.dereference = dereference
1481 if ignore_zeros is not None:
1482 self.ignore_zeros = ignore_zeros
1483 if encoding is not None:
1484 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001485 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001486
1487 if pax_headers is not None and self.format == PAX_FORMAT:
1488 self.pax_headers = pax_headers
1489 else:
1490 self.pax_headers = {}
1491
Guido van Rossumd8faa362007-04-27 19:54:29 +00001492 if debug is not None:
1493 self.debug = debug
1494 if errorlevel is not None:
1495 self.errorlevel = errorlevel
1496
1497 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001498 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001499 self.closed = False
1500 self.members = [] # list of members as TarInfo objects
1501 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001502 self.offset = self.fileobj.tell()
1503 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001504 self.inodes = {} # dictionary caching the inodes of
1505 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506
Lars Gustäbel7b465392009-11-18 20:29:25 +00001507 try:
1508 if self.mode == "r":
1509 self.firstmember = None
1510 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511
Lars Gustäbel7b465392009-11-18 20:29:25 +00001512 if self.mode == "a":
1513 # Move to the end of the archive,
1514 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001515 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001516 self.fileobj.seek(self.offset)
1517 try:
1518 tarinfo = self.tarinfo.fromtarfile(self)
1519 self.members.append(tarinfo)
1520 except EOFHeaderError:
1521 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001522 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001523 except HeaderError as e:
1524 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001525
Lars Gustäbel20703c62015-05-27 12:53:44 +02001526 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001527 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528
Lars Gustäbel7b465392009-11-18 20:29:25 +00001529 if self.pax_headers:
1530 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1531 self.fileobj.write(buf)
1532 self.offset += len(buf)
1533 except:
1534 if not self._extfileobj:
1535 self.fileobj.close()
1536 self.closed = True
1537 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001538
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001539 #--------------------------------------------------------------------------
1540 # Below are the classmethods which act as alternate constructors to the
1541 # TarFile class. The open() method is the only one that is needed for
1542 # public use; it is the "super"-constructor and is able to select an
1543 # adequate "sub"-constructor for a particular compression using the mapping
1544 # from OPEN_METH.
1545 #
1546 # This concept allows one to subclass TarFile without losing the comfort of
1547 # the super-constructor. A sub-constructor is registered and made available
1548 # by adding it to the mapping in OPEN_METH.
1549
Guido van Rossum75b64e62005-01-16 00:16:11 +00001550 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001551 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 """Open a tar archive for reading, writing or appending. Return
1553 an appropriate TarFile class.
1554
1555 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001556 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001557 'r:' open for reading exclusively uncompressed
1558 'r:gz' open for reading with gzip compression
1559 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001560 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001561 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562 'w' or 'w:' open for writing without compression
1563 'w:gz' open for writing with gzip compression
1564 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001565 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566
Berker Peksag0fe63252015-02-13 21:02:12 +02001567 'x' or 'x:' create a tarfile exclusively without compression, raise
1568 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001569 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001570 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001571 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001572 if the file is already created
1573 'x:xz' create an lzma compressed tarfile, raise an exception
1574 if the file is already created
1575
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001576 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001577 'r|' open an uncompressed stream of tar blocks for reading
1578 'r|gz' open a gzip compressed stream of tar blocks
1579 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001580 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581 'w|' open an uncompressed stream for writing
1582 'w|gz' open a gzip compressed stream for writing
1583 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001584 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001585 """
1586
1587 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001590 if mode in ("r", "r:*"):
1591 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001592 def not_compressed(comptype):
1593 return cls.OPEN_METH[comptype] == 'taropen'
1594 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001595 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001596 if fileobj is not None:
1597 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001598 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001599 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001600 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001601 if fileobj is not None:
1602 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001603 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001604 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001605
1606 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 filemode, comptype = mode.split(":", 1)
1608 filemode = filemode or "r"
1609 comptype = comptype or "tar"
1610
1611 # Select the *open() function according to
1612 # given compression.
1613 if comptype in cls.OPEN_METH:
1614 func = getattr(cls, cls.OPEN_METH[comptype])
1615 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001616 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001617 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618
1619 elif "|" in mode:
1620 filemode, comptype = mode.split("|", 1)
1621 filemode = filemode or "r"
1622 comptype = comptype or "tar"
1623
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001624 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001625 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001626
Antoine Pitrou605c2932010-09-23 20:15:14 +00001627 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1628 try:
1629 t = cls(name, filemode, stream, **kwargs)
1630 except:
1631 stream.close()
1632 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633 t._extfileobj = False
1634 return t
1635
Berker Peksag0fe63252015-02-13 21:02:12 +02001636 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001637 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638
Thomas Wouters477c8d52006-05-27 19:21:47 +00001639 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640
Guido van Rossum75b64e62005-01-16 00:16:11 +00001641 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001642 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643 """Open uncompressed tar archive name for reading or writing.
1644 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001645 if mode not in ("r", "a", "w", "x"):
1646 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001647 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
Guido van Rossum75b64e62005-01-16 00:16:11 +00001649 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001650 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651 """Open gzip compressed tar archive name for reading or writing.
1652 Appending is not allowed.
1653 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001654 if mode not in ("r", "w", "x"):
1655 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001656
1657 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001658 from gzip import GzipFile
1659 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001660 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001661
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001663 fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001664 except OSError:
1665 if fileobj is not None and mode == 'r':
1666 raise ReadError("not a gzip file")
1667 raise
1668
1669 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001670 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001671 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001672 fileobj.close()
1673 if mode == 'r':
1674 raise ReadError("not a gzip file")
1675 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001676 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001677 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001678 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001679 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001680 return t
1681
Guido van Rossum75b64e62005-01-16 00:16:11 +00001682 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001683 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001684 """Open bzip2 compressed tar archive name for reading or writing.
1685 Appending is not allowed.
1686 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001687 if mode not in ("r", "w", "x"):
1688 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001689
1690 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001691 from bz2 import BZ2File
Brett Cannoncd171c82013-07-04 17:43:24 -04001692 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001693 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001694
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001695 fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696
1697 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001698 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001699 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001700 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001701 if mode == 'r':
1702 raise ReadError("not a bzip2 file")
1703 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001704 except:
1705 fileobj.close()
1706 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001707 t._extfileobj = False
1708 return t
1709
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001710 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001711 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001712 """Open lzma compressed tar archive name for reading or writing.
1713 Appending is not allowed.
1714 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001715 if mode not in ("r", "w", "x"):
1716 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001717
1718 try:
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001719 from lzma import LZMAFile, LZMAError
Brett Cannoncd171c82013-07-04 17:43:24 -04001720 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001721 raise CompressionError("lzma module is not available")
1722
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001723 fileobj = LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001724
1725 try:
1726 t = cls.taropen(name, mode, fileobj, **kwargs)
Serhiy Storchaka9017e0b2020-01-24 19:55:52 +02001727 except (LZMAError, EOFError):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001728 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001729 if mode == 'r':
1730 raise ReadError("not an lzma file")
1731 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001732 except:
1733 fileobj.close()
1734 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001735 t._extfileobj = False
1736 return t
1737
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001738 # All *open() methods are registered here.
1739 OPEN_METH = {
1740 "tar": "taropen", # uncompressed tar
1741 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001742 "bz2": "bz2open", # bzip2 compressed tar
1743 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001744 }
1745
1746 #--------------------------------------------------------------------------
1747 # The public methods which TarFile provides:
1748
1749 def close(self):
1750 """Close the TarFile. In write-mode, two finishing zero blocks are
1751 appended to the archive.
1752 """
1753 if self.closed:
1754 return
1755
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001756 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001757 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001758 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001759 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1760 self.offset += (BLOCKSIZE * 2)
1761 # fill up the end with zero-blocks
1762 # (like option -b20 for tar does)
1763 blocks, remainder = divmod(self.offset, RECORDSIZE)
1764 if remainder > 0:
1765 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1766 finally:
1767 if not self._extfileobj:
1768 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 def getmember(self, name):
1771 """Return a TarInfo object for member `name'. If `name' can not be
1772 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001773 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001774 most up-to-date version.
1775 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001776 tarinfo = self._getmember(name)
1777 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001778 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001779 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001780
1781 def getmembers(self):
1782 """Return the members of the archive as a list of TarInfo objects. The
1783 list has the same order as the members in the archive.
1784 """
1785 self._check()
1786 if not self._loaded: # if we want to obtain a list of
1787 self._load() # all members, we first have to
1788 # scan the whole archive.
1789 return self.members
1790
1791 def getnames(self):
1792 """Return the members of the archive as a list of their names. It has
1793 the same order as the list returned by getmembers().
1794 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001795 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001796
1797 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001798 """Create a TarInfo object from the result of os.stat or equivalent
1799 on an existing file. The file is either named by `name', or
1800 specified as a file object `fileobj' with a file descriptor. If
1801 given, `arcname' specifies an alternative name for the file in the
1802 archive, otherwise, the name is taken from the 'name' attribute of
1803 'fileobj', or the 'name' argument. The name should be a text
1804 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001805 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001806 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001807
1808 # When fileobj is given, replace name by
1809 # fileobj's real name.
1810 if fileobj is not None:
1811 name = fileobj.name
1812
1813 # Building the name of the member in the archive.
1814 # Backward slashes are converted to forward slashes,
1815 # Absolute paths are turned to relative paths.
1816 if arcname is None:
1817 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001819 arcname = arcname.replace(os.sep, "/")
1820 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001821
1822 # Now, fill the TarInfo object with
1823 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001824 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001825 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001826
Anthony Sottile8377cd42019-02-25 14:32:27 -08001827 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 if fileobj is None:
Anthony Sottile8377cd42019-02-25 14:32:27 -08001829 if not self.dereference:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 statres = os.lstat(name)
1831 else:
1832 statres = os.stat(name)
1833 else:
1834 statres = os.fstat(fileobj.fileno())
1835 linkname = ""
1836
1837 stmd = statres.st_mode
1838 if stat.S_ISREG(stmd):
1839 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001840 if not self.dereference and statres.st_nlink > 1 and \
1841 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842 # Is it a hardlink to an already
1843 # archived file?
1844 type = LNKTYPE
1845 linkname = self.inodes[inode]
1846 else:
1847 # The inode is added only if its valid.
1848 # For win32 it is always 0.
1849 type = REGTYPE
1850 if inode[0]:
1851 self.inodes[inode] = arcname
1852 elif stat.S_ISDIR(stmd):
1853 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001854 elif stat.S_ISFIFO(stmd):
1855 type = FIFOTYPE
1856 elif stat.S_ISLNK(stmd):
1857 type = SYMTYPE
1858 linkname = os.readlink(name)
1859 elif stat.S_ISCHR(stmd):
1860 type = CHRTYPE
1861 elif stat.S_ISBLK(stmd):
1862 type = BLKTYPE
1863 else:
1864 return None
1865
1866 # Fill the TarInfo object with all
1867 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001868 tarinfo.name = arcname
1869 tarinfo.mode = stmd
1870 tarinfo.uid = statres.st_uid
1871 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001872 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001873 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001874 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001875 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001877 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 tarinfo.linkname = linkname
1879 if pwd:
1880 try:
1881 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1882 except KeyError:
1883 pass
1884 if grp:
1885 try:
1886 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1887 except KeyError:
1888 pass
1889
1890 if type in (CHRTYPE, BLKTYPE):
1891 if hasattr(os, "major") and hasattr(os, "minor"):
1892 tarinfo.devmajor = os.major(statres.st_rdev)
1893 tarinfo.devminor = os.minor(statres.st_rdev)
1894 return tarinfo
1895
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001896 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001897 """Print a table of contents to sys.stdout. If `verbose' is False, only
1898 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001899 output is produced. `members' is optional and must be a subset of the
1900 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901 """
1902 self._check()
1903
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001904 if members is None:
1905 members = self
1906 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001908 _safe_print(stat.filemode(tarinfo.mode))
1909 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1910 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001911 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001912 _safe_print("%10s" %
1913 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001914 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001915 _safe_print("%10d" % tarinfo.size)
1916 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1917 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001918
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001919 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920
1921 if verbose:
1922 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001923 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001925 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001926 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001928 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 """Add the file `name' to the archive. `name' may be any type of file
1930 (directory, fifo, symbolic link, etc.). If given, `arcname'
1931 specifies an alternative name for the file in the archive.
1932 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001933 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001934 that expects a TarInfo object argument and returns the changed
1935 TarInfo object, if it returns None the TarInfo object will be
1936 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001937 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001938 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939
1940 if arcname is None:
1941 arcname = name
1942
1943 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001944 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945 self._dbg(2, "tarfile: Skipped %r" % name)
1946 return
1947
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948 self._dbg(1, name)
1949
1950 # Create a TarInfo object from the file.
1951 tarinfo = self.gettarinfo(name, arcname)
1952
1953 if tarinfo is None:
1954 self._dbg(1, "tarfile: Unsupported type %r" % name)
1955 return
1956
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001957 # Change or exclude the TarInfo object.
1958 if filter is not None:
1959 tarinfo = filter(tarinfo)
1960 if tarinfo is None:
1961 self._dbg(2, "tarfile: Excluded %r" % name)
1962 return
1963
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001964 # Append the tar header and data to the archive.
1965 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001966 with bltn_open(name, "rb") as f:
1967 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001968
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001969 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970 self.addfile(tarinfo)
1971 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001972 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001973 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001974 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001975
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001976 else:
1977 self.addfile(tarinfo)
1978
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001979 def addfile(self, tarinfo, fileobj=None):
1980 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001981 given, it should be a binary file, and tarinfo.size bytes are read
1982 from it and added to the archive. You can create TarInfo objects
1983 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001984 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001985 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986
Thomas Wouters89f507f2006-12-13 04:49:30 +00001987 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988
Guido van Rossume7ba4952007-06-06 23:52:48 +00001989 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001990 self.fileobj.write(buf)
1991 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001992 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001993 # If there's data to follow, append it.
1994 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001995 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001996 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1997 if remainder > 0:
1998 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1999 blocks += 1
2000 self.offset += blocks * BLOCKSIZE
2001
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002002 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003
Eric V. Smith7a803892015-04-15 10:27:58 -04002004 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002005 """Extract all members from the archive to the current working
2006 directory and set owner, modification time and permissions on
2007 directories afterwards. `path' specifies a different directory
2008 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04002009 list returned by getmembers(). If `numeric_owner` is True, only
2010 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002011 """
2012 directories = []
2013
2014 if members is None:
2015 members = self
2016
2017 for tarinfo in members:
2018 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002019 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002020 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002021 tarinfo = copy.copy(tarinfo)
2022 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002023 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002024 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2025 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002026
2027 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002028 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002029 directories.reverse()
2030
2031 # Set correct owner, mtime and filemode on directories.
2032 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002033 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002034 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002035 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002036 self.utime(tarinfo, dirpath)
2037 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002038 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002039 if self.errorlevel > 1:
2040 raise
2041 else:
2042 self._dbg(1, "tarfile: %s" % e)
2043
Eric V. Smith7a803892015-04-15 10:27:58 -04002044 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045 """Extract a member from the archive to the current working directory,
2046 using its full name. Its file information is extracted as accurately
2047 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002048 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002049 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2050 is True, only the numbers for user/group names are used and not
2051 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 """
2053 self._check("r")
2054
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002055 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002056 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002057 else:
2058 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002059
Neal Norwitza4f651a2004-07-20 22:07:44 +00002060 # Prepare the link target for makelink().
2061 if tarinfo.islnk():
2062 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2063
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002064 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002065 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002066 set_attrs=set_attrs,
2067 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002068 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 if self.errorlevel > 0:
2070 raise
2071 else:
2072 if e.filename is None:
2073 self._dbg(1, "tarfile: %s" % e.strerror)
2074 else:
2075 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002076 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002077 if self.errorlevel > 1:
2078 raise
2079 else:
2080 self._dbg(1, "tarfile: %s" % e)
2081
2082 def extractfile(self, member):
2083 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002084 a filename or a TarInfo object. If `member' is a regular file or a
2085 link, an io.BufferedReader object is returned. Otherwise, None is
2086 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002087 """
2088 self._check("r")
2089
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002090 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002091 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002092 else:
2093 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002094
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002095 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2096 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002097 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098
2099 elif tarinfo.islnk() or tarinfo.issym():
2100 if isinstance(self.fileobj, _Stream):
2101 # A small but ugly workaround for the case that someone tries
2102 # to extract a (sym)link as a file-object from a non-seekable
2103 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002104 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002105 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002106 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002107 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002108 else:
2109 # If there's no data associated with the member (directory, chrdev,
2110 # blkdev, etc.), return None instead of a file object.
2111 return None
2112
Eric V. Smith7a803892015-04-15 10:27:58 -04002113 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2114 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002115 """Extract the TarInfo object tarinfo to a physical
2116 file called targetpath.
2117 """
2118 # Fetch the TarInfo object for the given name
2119 # and build the destination pathname, replacing
2120 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002121 targetpath = targetpath.rstrip("/")
2122 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002123
2124 # Create all upper directories.
2125 upperdirs = os.path.dirname(targetpath)
2126 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002127 # Create directories that are not part of the archive with
2128 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002129 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002130
2131 if tarinfo.islnk() or tarinfo.issym():
2132 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2133 else:
2134 self._dbg(1, tarinfo.name)
2135
2136 if tarinfo.isreg():
2137 self.makefile(tarinfo, targetpath)
2138 elif tarinfo.isdir():
2139 self.makedir(tarinfo, targetpath)
2140 elif tarinfo.isfifo():
2141 self.makefifo(tarinfo, targetpath)
2142 elif tarinfo.ischr() or tarinfo.isblk():
2143 self.makedev(tarinfo, targetpath)
2144 elif tarinfo.islnk() or tarinfo.issym():
2145 self.makelink(tarinfo, targetpath)
2146 elif tarinfo.type not in SUPPORTED_TYPES:
2147 self.makeunknown(tarinfo, targetpath)
2148 else:
2149 self.makefile(tarinfo, targetpath)
2150
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002151 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002152 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002153 if not tarinfo.issym():
2154 self.chmod(tarinfo, targetpath)
2155 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002156
2157 #--------------------------------------------------------------------------
2158 # Below are the different file methods. They are called via
2159 # _extract_member() when extract() is called. They can be replaced in a
2160 # subclass to implement other functionality.
2161
2162 def makedir(self, tarinfo, targetpath):
2163 """Make a directory called targetpath.
2164 """
2165 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002166 # Use a safe mode for the directory, the real mode is set
2167 # later in _extract_member().
2168 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002169 except FileExistsError:
2170 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002171
2172 def makefile(self, tarinfo, targetpath):
2173 """Make a file called targetpath.
2174 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002175 source = self.fileobj
2176 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002177 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002178 with bltn_open(targetpath, "wb") as target:
2179 if tarinfo.sparse is not None:
2180 for offset, size in tarinfo.sparse:
2181 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002182 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002183 target.seek(tarinfo.size)
2184 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002185 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002186 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002187
2188 def makeunknown(self, tarinfo, targetpath):
2189 """Make a file from a TarInfo object with an unknown type
2190 at targetpath.
2191 """
2192 self.makefile(tarinfo, targetpath)
2193 self._dbg(1, "tarfile: Unknown file type %r, " \
2194 "extracted as regular file." % tarinfo.type)
2195
2196 def makefifo(self, tarinfo, targetpath):
2197 """Make a fifo called targetpath.
2198 """
2199 if hasattr(os, "mkfifo"):
2200 os.mkfifo(targetpath)
2201 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002202 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002203
2204 def makedev(self, tarinfo, targetpath):
2205 """Make a character or block device called targetpath.
2206 """
2207 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002208 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002209
2210 mode = tarinfo.mode
2211 if tarinfo.isblk():
2212 mode |= stat.S_IFBLK
2213 else:
2214 mode |= stat.S_IFCHR
2215
2216 os.mknod(targetpath, mode,
2217 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2218
2219 def makelink(self, tarinfo, targetpath):
2220 """Make a (symbolic) link called targetpath. If it cannot be created
2221 (platform limitation), we try to make a copy of the referenced file
2222 instead of a link.
2223 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002224 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002225 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002227 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002229 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002230 if os.path.exists(tarinfo._link_target):
2231 os.link(tarinfo._link_target, targetpath)
2232 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002233 self._extract_member(self._find_link_target(tarinfo),
2234 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002235 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002236 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002237 self._extract_member(self._find_link_target(tarinfo),
2238 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002239 except KeyError:
2240 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002241
Eric V. Smith7a803892015-04-15 10:27:58 -04002242 def chown(self, tarinfo, targetpath, numeric_owner):
2243 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002244 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2245 is False, fall back to .gid/.uid when the search based on name
2246 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002248 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002249 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002250 g = tarinfo.gid
2251 u = tarinfo.uid
2252 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002253 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002254 if grp:
2255 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002256 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002257 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002258 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002259 if pwd:
2260 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002261 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002262 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002263 try:
2264 if tarinfo.issym() and hasattr(os, "lchown"):
2265 os.lchown(targetpath, u, g)
2266 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002267 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002268 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002269 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270
2271 def chmod(self, tarinfo, targetpath):
2272 """Set file permissions of targetpath according to tarinfo.
2273 """
Anthony Sottile8377cd42019-02-25 14:32:27 -08002274 try:
2275 os.chmod(targetpath, tarinfo.mode)
2276 except OSError:
2277 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278
2279 def utime(self, tarinfo, targetpath):
2280 """Set modification time of targetpath according to tarinfo.
2281 """
Jack Jansen834eff62003-03-07 12:47:06 +00002282 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002283 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 try:
2285 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002286 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002287 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002288
2289 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290 def next(self):
2291 """Return the next member of the archive as a TarInfo object, when
2292 TarFile is opened for reading. Return None if there is no more
2293 available.
2294 """
2295 self._check("ra")
2296 if self.firstmember is not None:
2297 m = self.firstmember
2298 self.firstmember = None
2299 return m
2300
Lars Gustäbel03572682015-07-06 09:27:24 +02002301 # Advance the file pointer.
2302 if self.offset != self.fileobj.tell():
2303 self.fileobj.seek(self.offset - 1)
2304 if not self.fileobj.read(1):
2305 raise ReadError("unexpected end of data")
2306
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002307 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002308 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002309 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002310 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002311 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002312 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002313 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002314 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315 self.offset += BLOCKSIZE
2316 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002317 except InvalidHeaderError as e:
2318 if self.ignore_zeros:
2319 self._dbg(2, "0x%X: %s" % (self.offset, e))
2320 self.offset += BLOCKSIZE
2321 continue
2322 elif self.offset == 0:
2323 raise ReadError(str(e))
2324 except EmptyHeaderError:
2325 if self.offset == 0:
2326 raise ReadError("empty file")
2327 except TruncatedHeaderError as e:
2328 if self.offset == 0:
2329 raise ReadError(str(e))
2330 except SubsequentHeaderError as e:
2331 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002332 break
2333
Lars Gustäbel9520a432009-11-22 18:48:49 +00002334 if tarinfo is not None:
2335 self.members.append(tarinfo)
2336 else:
2337 self._loaded = True
2338
Thomas Wouters477c8d52006-05-27 19:21:47 +00002339 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002340
2341 #--------------------------------------------------------------------------
2342 # Little helper methods:
2343
Lars Gustäbel1b512722010-06-03 12:45:16 +00002344 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002345 """Find an archive member by name from bottom to top.
2346 If tarinfo is given, it is used as the starting point.
2347 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002348 # Ensure that all members have been loaded.
2349 members = self.getmembers()
2350
Lars Gustäbel1b512722010-06-03 12:45:16 +00002351 # Limit the member search list up to tarinfo.
2352 if tarinfo is not None:
2353 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002354
Lars Gustäbel1b512722010-06-03 12:45:16 +00002355 if normalize:
2356 name = os.path.normpath(name)
2357
2358 for member in reversed(members):
2359 if normalize:
2360 member_name = os.path.normpath(member.name)
2361 else:
2362 member_name = member.name
2363
2364 if name == member_name:
2365 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002366
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002367 def _load(self):
2368 """Read through the entire archive file and look for readable
2369 members.
2370 """
2371 while True:
2372 tarinfo = self.next()
2373 if tarinfo is None:
2374 break
2375 self._loaded = True
2376
2377 def _check(self, mode=None):
2378 """Check if TarFile is still open, and if the operation's mode
2379 corresponds to TarFile's mode.
2380 """
2381 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002382 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002383 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002384 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002385
Lars Gustäbel1b512722010-06-03 12:45:16 +00002386 def _find_link_target(self, tarinfo):
2387 """Find the target member of a symlink or hardlink member in the
2388 archive.
2389 """
2390 if tarinfo.issym():
2391 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002392 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002393 limit = None
2394 else:
2395 # Search the archive before the link, because a hard link is
2396 # just a reference to an already archived file.
2397 linkname = tarinfo.linkname
2398 limit = tarinfo
2399
2400 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2401 if member is None:
2402 raise KeyError("linkname %r not found" % linkname)
2403 return member
2404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002405 def __iter__(self):
2406 """Provide an iterator object.
2407 """
2408 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002409 yield from self.members
2410 return
2411
2412 # Yield items using TarFile's next() method.
2413 # When all members have been read, set TarFile as _loaded.
2414 index = 0
2415 # Fix for SF #1100429: Under rare circumstances it can
2416 # happen that getmembers() is called during iteration,
2417 # which will have already exhausted the next() method.
2418 if self.firstmember is not None:
2419 tarinfo = self.next()
2420 index += 1
2421 yield tarinfo
2422
2423 while True:
2424 if index < len(self.members):
2425 tarinfo = self.members[index]
2426 elif not self._loaded:
2427 tarinfo = self.next()
2428 if not tarinfo:
2429 self._loaded = True
2430 return
2431 else:
2432 return
2433 index += 1
2434 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002435
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002436 def _dbg(self, level, msg):
2437 """Write debugging output to sys.stderr.
2438 """
2439 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002440 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002441
2442 def __enter__(self):
2443 self._check()
2444 return self
2445
2446 def __exit__(self, type, value, traceback):
2447 if type is None:
2448 self.close()
2449 else:
2450 # An exception occurred. We must not call close() because
2451 # it would try to write end-of-archive blocks and padding.
2452 if not self._extfileobj:
2453 self.fileobj.close()
2454 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002455
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002456#--------------------
2457# exported functions
2458#--------------------
2459def is_tarfile(name):
2460 """Return True if name points to a tar archive that we
2461 are able to handle, else return False.
William Woodruffdd754ca2020-01-22 21:24:16 -05002462
2463 'name' should be a string, file, or file-like object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002464 """
2465 try:
William Woodruffdd754ca2020-01-22 21:24:16 -05002466 if hasattr(name, "read"):
2467 t = open(fileobj=name)
2468 else:
2469 t = open(name)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002470 t.close()
2471 return True
2472 except TarError:
2473 return False
2474
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002475open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002476
2477
2478def main():
2479 import argparse
2480
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002481 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002482 parser = argparse.ArgumentParser(description=description)
2483 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2484 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002485 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002486 group.add_argument('-l', '--list', metavar='<tarfile>',
2487 help='Show listing of a tarfile')
2488 group.add_argument('-e', '--extract', nargs='+',
2489 metavar=('<tarfile>', '<output_dir>'),
2490 help='Extract tarfile into target dir')
2491 group.add_argument('-c', '--create', nargs='+',
2492 metavar=('<name>', '<file>'),
2493 help='Create tarfile from sources')
2494 group.add_argument('-t', '--test', metavar='<tarfile>',
2495 help='Test if a tarfile is valid')
2496 args = parser.parse_args()
2497
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002498 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002499 src = args.test
2500 if is_tarfile(src):
2501 with open(src, 'r') as tar:
2502 tar.getmembers()
2503 print(tar.getmembers(), file=sys.stderr)
2504 if args.verbose:
2505 print('{!r} is a tar archive.'.format(src))
2506 else:
2507 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2508
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002509 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002510 src = args.list
2511 if is_tarfile(src):
2512 with TarFile.open(src, 'r:*') as tf:
2513 tf.list(verbose=args.verbose)
2514 else:
2515 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2516
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002517 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002518 if len(args.extract) == 1:
2519 src = args.extract[0]
2520 curdir = os.curdir
2521 elif len(args.extract) == 2:
2522 src, curdir = args.extract
2523 else:
2524 parser.exit(1, parser.format_help())
2525
2526 if is_tarfile(src):
2527 with TarFile.open(src, 'r:*') as tf:
2528 tf.extractall(path=curdir)
2529 if args.verbose:
2530 if curdir == '.':
2531 msg = '{!r} file is extracted.'.format(src)
2532 else:
2533 msg = ('{!r} file is extracted '
2534 'into {!r} directory.').format(src, curdir)
2535 print(msg)
2536 else:
2537 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2538
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002539 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002540 tar_name = args.create.pop(0)
2541 _, ext = os.path.splitext(tar_name)
2542 compressions = {
2543 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002544 '.gz': 'gz',
2545 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002546 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002547 '.xz': 'xz',
2548 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002549 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002550 '.bz2': 'bz2',
2551 '.tbz': 'bz2',
2552 '.tbz2': 'bz2',
2553 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002554 }
2555 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2556 tar_files = args.create
2557
2558 with TarFile.open(tar_name, tar_mode) as tf:
2559 for file_name in tar_files:
2560 tf.add(file_name)
2561
2562 if args.verbose:
2563 print('{!r} file created.'.format(tar_name))
2564
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002565if __name__ == '__main__':
2566 main()