blob: 2c06f9160c658a745d7de251e9e928f74a4c6d5a [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Guido van Rossum98297ee2007-11-06 21:34:58 +000034__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000035
36#---------
37# Imports
38#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020039from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000040import sys
41import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020042import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000043import shutil
44import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import time
46import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000047import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000048import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000049
50try:
Xavier de Gayef44abda2016-12-09 09:33:09 +010051 import pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040052except ImportError:
Xavier de Gayef44abda2016-12-09 09:33:09 +010053 pwd = None
54try:
55 import grp
56except ImportError:
57 grp = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000058
Brian Curtin16633fa2010-07-09 13:54:27 +000059# os.symlink on Windows prior to 6.0 raises NotImplementedError
60symlink_exception = (AttributeError, NotImplementedError)
61try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000063 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020064 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000065except NameError:
66 pass
67
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000068# from tarfile import *
Martin Panter104dcda2016-01-16 06:59:13 +000069__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
70 "CompressionError", "StreamError", "ExtractError", "HeaderError",
71 "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
72 "DEFAULT_FORMAT", "open"]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073
74#---------------------------------------------------------
75# tar constants
76#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000077NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000078BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000079RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000080GNU_MAGIC = b"ustar \0" # magic gnu tar string
81POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Guido van Rossumd8faa362007-04-27 19:54:29 +000083LENGTH_NAME = 100 # maximum length of a filename
84LENGTH_LINK = 100 # maximum length of a linkname
85LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000086
Lars Gustäbelb506dc32007-08-07 18:36:16 +000087REGTYPE = b"0" # regular file
88AREGTYPE = b"\0" # regular file
89LNKTYPE = b"1" # link (inside tarfile)
90SYMTYPE = b"2" # symbolic link
91CHRTYPE = b"3" # character special device
92BLKTYPE = b"4" # block special device
93DIRTYPE = b"5" # directory
94FIFOTYPE = b"6" # fifo special device
95CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097GNUTYPE_LONGNAME = b"L" # GNU tar longname
98GNUTYPE_LONGLINK = b"K" # GNU tar longlink
99GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000101XHDTYPE = b"x" # POSIX.1-2001 extended header
102XGLTYPE = b"g" # POSIX.1-2001 global header
103SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000104
105USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
106GNU_FORMAT = 1 # GNU tar format
107PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
CAM Gerlache680c3d2019-03-21 09:44:51 -0500108DEFAULT_FORMAT = PAX_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000109
110#---------------------------------------------------------
111# tarfile constants
112#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000113# File types that tarfile supports:
114SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
115 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000116 CONTTYPE, CHRTYPE, BLKTYPE,
117 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
118 GNUTYPE_SPARSE)
119
Guido van Rossumd8faa362007-04-27 19:54:29 +0000120# File types that will be treated as a regular file.
121REGULAR_TYPES = (REGTYPE, AREGTYPE,
122 CONTTYPE, GNUTYPE_SPARSE)
123
124# File types that are part of the GNU tar format.
125GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
126 GNUTYPE_SPARSE)
127
128# Fields from a pax header that override a TarInfo attribute.
129PAX_FIELDS = ("path", "linkpath", "size", "mtime",
130 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000131
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000132# Fields from a pax header that are affected by hdrcharset.
133PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
134
Guido van Rossume7ba4952007-06-06 23:52:48 +0000135# Fields in a pax header that are numbers, all other fields
136# are treated as strings.
137PAX_NUMBER_FIELDS = {
138 "atime": float,
139 "ctime": float,
140 "mtime": float,
141 "uid": int,
142 "gid": int,
143 "size": int
144}
145
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000146#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147# initialization
148#---------------------------------------------------------
Larry Hastings10108a72016-09-05 15:11:23 -0700149if os.name == "nt":
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000150 ENCODING = "utf-8"
151else:
152 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000153
154#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155# Some useful functions
156#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def stn(s, length, encoding, errors):
159 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000162 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000163
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000164def nts(s, encoding, errors):
165 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000167 p = s.find(b"\0")
168 if p != -1:
169 s = s[:p]
170 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000171
Thomas Wouters477c8d52006-05-27 19:21:47 +0000172def nti(s):
173 """Convert a number field to a python number.
174 """
175 # There are two possible encodings for a number field, see
176 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200177 if s[0] in (0o200, 0o377):
178 n = 0
179 for i in range(len(s) - 1):
180 n <<= 8
181 n += s[i + 1]
182 if s[0] == 0o377:
183 n = -(256 ** (len(s) - 1) - n)
184 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000185 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200186 s = nts(s, "ascii", "strict")
187 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000188 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000189 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000190 return n
191
Guido van Rossumd8faa362007-04-27 19:54:29 +0000192def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 """Convert a python number to a number field.
194 """
195 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
196 # octal digits followed by a null-byte, this allows values up to
197 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 # that if necessary. A leading 0o200 or 0o377 byte indicate this
199 # particular encoding, the following digits-1 bytes are a big-endian
200 # base-256 representation. This allows values up to (256**(digits-1))-1.
201 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
202 # number.
Joffrey F72d9b2b2018-02-26 16:02:21 -0800203 n = int(n)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204 if 0 <= n < 8 ** (digits - 1):
Joffrey F72d9b2b2018-02-26 16:02:21 -0800205 s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
207 if n >= 0:
208 s = bytearray([0o200])
209 else:
210 s = bytearray([0o377])
211 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000212
Guido van Rossum805365e2007-05-07 22:24:25 +0000213 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200214 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000215 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200216 else:
217 raise ValueError("overflow in number field")
218
Thomas Wouters477c8d52006-05-27 19:21:47 +0000219 return s
220
221def calc_chksums(buf):
222 """Calculate the checksum for a member's header by summing up all
223 characters except for the chksum field which is treated as if
224 it was filled with spaces. According to the GNU tar sources,
225 some tars (Sun and NeXT) calculate chksum with signed char,
226 which will be different if there are chars in the buffer with
227 the high bit set. So we calculate two checksums, unsigned and
228 signed.
229 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200230 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
231 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000232 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000233
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700234def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000235 """Copy length bytes from fileobj src to fileobj dst.
236 If length is None, copy the entire content.
237 """
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700238 bufsize = bufsize or 16 * 1024
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 if length == 0:
240 return
241 if length is None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700242 shutil.copyfileobj(src, dst, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000243 return
244
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700245 blocks, remainder = divmod(length, bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000246 for b in range(blocks):
Łukasz Langa04bedfa2016-09-09 19:48:14 -0700247 buf = src.read(bufsize)
248 if len(buf) < bufsize:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251
252 if remainder != 0:
253 buf = src.read(remainder)
254 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200255 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000256 dst.write(buf)
257 return
258
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200259def _safe_print(s):
260 encoding = getattr(sys.stdout, 'encoding', None)
261 if encoding is not None:
262 s = s.encode(encoding, 'backslashreplace').decode(encoding)
263 print(s, end=' ')
264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266class TarError(Exception):
267 """Base exception."""
268 pass
269class ExtractError(TarError):
270 """General exception for extract errors."""
271 pass
272class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300273 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000274 pass
275class CompressionError(TarError):
276 """Exception for unavailable compression methods."""
277 pass
278class StreamError(TarError):
279 """Exception for unsupported operations on stream-like TarFiles."""
280 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000281class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000282 """Base exception for header errors."""
283 pass
284class EmptyHeaderError(HeaderError):
285 """Exception for empty headers."""
286 pass
287class TruncatedHeaderError(HeaderError):
288 """Exception for truncated headers."""
289 pass
290class EOFHeaderError(HeaderError):
291 """Exception for end of file headers."""
292 pass
293class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000294 """Exception for invalid headers."""
295 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000296class SubsequentHeaderError(HeaderError):
297 """Exception for missing and invalid extended headers."""
298 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000299
300#---------------------------
301# internal stream interface
302#---------------------------
303class _LowLevelFile:
304 """Low-level file object. Supports reading and writing.
305 It is used instead of a regular file object for streaming
306 access.
307 """
308
309 def __init__(self, name, mode):
310 mode = {
311 "r": os.O_RDONLY,
312 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
313 }[mode]
314 if hasattr(os, "O_BINARY"):
315 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000316 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000317
318 def close(self):
319 os.close(self.fd)
320
321 def read(self, size):
322 return os.read(self.fd, size)
323
324 def write(self, s):
325 os.write(self.fd, s)
326
327class _Stream:
328 """Class that serves as an adapter between TarFile and
329 a stream-like object. The stream-like object only
330 needs to have a read() or write() method and is accessed
331 blockwise. Use of gzip or bzip2 compression is possible.
332 A stream-like object could be for example: sys.stdin,
333 sys.stdout, a socket, a tape device etc.
334
335 _Stream is intended to be used only internally.
336 """
337
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000338 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000339 """Construct a _Stream object.
340 """
341 self._extfileobj = True
342 if fileobj is None:
343 fileobj = _LowLevelFile(name, mode)
344 self._extfileobj = False
345
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000346 if comptype == '*':
347 # Enable transparent compression detection for the
348 # stream interface
349 fileobj = _StreamProxy(fileobj)
350 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000351
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000352 self.name = name or ""
353 self.mode = mode
354 self.comptype = comptype
355 self.fileobj = fileobj
356 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000357 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000358 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000359 self.closed = False
360
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 try:
362 if comptype == "gz":
363 try:
364 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400365 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000366 raise CompressionError("zlib module is not available")
367 self.zlib = zlib
368 self.crc = zlib.crc32(b"")
369 if mode == "r":
370 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100371 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000372 else:
373 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000374
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100375 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000376 try:
377 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400378 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 raise CompressionError("bz2 module is not available")
380 if mode == "r":
381 self.dbuf = b""
382 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200383 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000384 else:
385 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386
387 elif comptype == "xz":
388 try:
389 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400390 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100391 raise CompressionError("lzma module is not available")
392 if mode == "r":
393 self.dbuf = b""
394 self.cmp = lzma.LZMADecompressor()
395 self.exception = lzma.LZMAError
396 else:
397 self.cmp = lzma.LZMACompressor()
398
399 elif comptype != "tar":
400 raise CompressionError("unknown compression type %r" % comptype)
401
Antoine Pitrou605c2932010-09-23 20:15:14 +0000402 except:
403 if not self._extfileobj:
404 self.fileobj.close()
405 self.closed = True
406 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000407
408 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000409 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000410 self.close()
411
412 def _init_write_gz(self):
413 """Initialize for writing with gzip compression.
414 """
415 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
416 -self.zlib.MAX_WBITS,
417 self.zlib.DEF_MEM_LEVEL,
418 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000419 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000420 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000421 if self.name.endswith(".gz"):
422 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000423 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
424 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425
426 def write(self, s):
427 """Write string s to the stream.
428 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000429 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000430 self.crc = self.zlib.crc32(s, self.crc)
431 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000432 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000433 s = self.cmp.compress(s)
434 self.__write(s)
435
436 def __write(self, s):
437 """Write string s to the stream if a whole new block
438 is ready to be written.
439 """
440 self.buf += s
441 while len(self.buf) > self.bufsize:
442 self.fileobj.write(self.buf[:self.bufsize])
443 self.buf = self.buf[self.bufsize:]
444
445 def close(self):
446 """Close the _Stream object. No operation should be
447 done on it afterwards.
448 """
449 if self.closed:
450 return
451
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000452 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300453 try:
454 if self.mode == "w" and self.comptype != "tar":
455 self.buf += self.cmp.flush()
456
457 if self.mode == "w" and self.buf:
458 self.fileobj.write(self.buf)
459 self.buf = b""
460 if self.comptype == "gz":
Martin Panterb82032f2015-12-11 05:19:29 +0000461 self.fileobj.write(struct.pack("<L", self.crc))
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300462 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
463 finally:
464 if not self._extfileobj:
465 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466
467 def _init_read_gz(self):
468 """Initialize for reading a gzip compressed fileobj.
469 """
470 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000471 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000472
473 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000474 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000475 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000477 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000478
479 flag = ord(self.__read(1))
480 self.__read(6)
481
482 if flag & 4:
483 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
484 self.read(xlen)
485 if flag & 8:
486 while True:
487 s = self.__read(1)
488 if not s or s == NUL:
489 break
490 if flag & 16:
491 while True:
492 s = self.__read(1)
493 if not s or s == NUL:
494 break
495 if flag & 2:
496 self.__read(2)
497
498 def tell(self):
499 """Return the stream's file pointer position.
500 """
501 return self.pos
502
503 def seek(self, pos=0):
504 """Set the stream's file pointer to pos. Negative seeking
505 is forbidden.
506 """
507 if pos - self.pos >= 0:
508 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000509 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000510 self.read(self.bufsize)
511 self.read(remainder)
512 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000513 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000514 return self.pos
515
INADA Naoki8d130912018-07-06 14:06:00 +0900516 def read(self, size):
517 """Return the next size number of bytes from the stream."""
518 assert size is not None
519 buf = self._read(size)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000520 self.pos += len(buf)
521 return buf
522
523 def _read(self, size):
524 """Return size bytes from the stream.
525 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000526 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000527 return self.__read(size)
528
529 c = len(self.dbuf)
hajoscher12a08c42018-07-04 10:13:18 +0200530 t = [self.dbuf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000531 while c < size:
INADA Naoki8d130912018-07-06 14:06:00 +0900532 # Skip underlying buffer to avoid unaligned double buffering.
533 if self.buf:
534 buf = self.buf
535 self.buf = b""
536 else:
537 buf = self.fileobj.read(self.bufsize)
538 if not buf:
539 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000540 try:
541 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100542 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000543 raise ReadError("invalid compressed data")
hajoscher12a08c42018-07-04 10:13:18 +0200544 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200546 t = b"".join(t)
547 self.dbuf = t[size:]
548 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000549
550 def __read(self, size):
551 """Return size bytes from stream. If internal buffer is empty,
552 read another block from the stream.
553 """
554 c = len(self.buf)
hajoscher12a08c42018-07-04 10:13:18 +0200555 t = [self.buf]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000556 while c < size:
557 buf = self.fileobj.read(self.bufsize)
558 if not buf:
559 break
hajoscher12a08c42018-07-04 10:13:18 +0200560 t.append(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 c += len(buf)
hajoscher12a08c42018-07-04 10:13:18 +0200562 t = b"".join(t)
563 self.buf = t[size:]
564 return t[:size]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565# class _Stream
566
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000567class _StreamProxy(object):
568 """Small proxy class that enables transparent compression
569 detection for the Stream interface (mode 'r|*').
570 """
571
572 def __init__(self, fileobj):
573 self.fileobj = fileobj
574 self.buf = self.fileobj.read(BLOCKSIZE)
575
576 def read(self, size):
577 self.read = self.fileobj.read
578 return self.buf
579
580 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100581 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000582 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100583 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000584 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100585 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
586 return "xz"
587 else:
588 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000589
590 def close(self):
591 self.fileobj.close()
592# class StreamProxy
593
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000594#------------------------
595# Extraction file object
596#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000597class _FileInFile(object):
598 """A thin wrapper around an existing file object that
599 provides a part of its data as an individual file
600 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000601 """
602
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000603 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000604 self.fileobj = fileobj
605 self.offset = offset
606 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000607 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200608 self.name = getattr(fileobj, "name", None)
609 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000610
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000611 if blockinfo is None:
612 blockinfo = [(0, size)]
613
614 # Construct a map with data and zero blocks.
615 self.map_index = 0
616 self.map = []
617 lastpos = 0
618 realpos = self.offset
619 for offset, size in blockinfo:
620 if offset > lastpos:
621 self.map.append((False, lastpos, offset, None))
622 self.map.append((True, offset, offset + size, realpos))
623 realpos += size
624 lastpos = offset + size
625 if lastpos < self.size:
626 self.map.append((False, lastpos, self.size, None))
627
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200628 def flush(self):
629 pass
630
631 def readable(self):
632 return True
633
634 def writable(self):
635 return False
636
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000637 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000638 return self.fileobj.seekable()
639
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000640 def tell(self):
641 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000642 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000643 return self.position
644
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200645 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000646 """Seek to a position in the file.
647 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200648 if whence == io.SEEK_SET:
649 self.position = min(max(position, 0), self.size)
650 elif whence == io.SEEK_CUR:
651 if position < 0:
652 self.position = max(self.position + position, 0)
653 else:
654 self.position = min(self.position + position, self.size)
655 elif whence == io.SEEK_END:
656 self.position = max(min(self.size + position, self.size), 0)
657 else:
658 raise ValueError("Invalid argument")
659 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000660
661 def read(self, size=None):
662 """Read data from the file.
663 """
664 if size is None:
665 size = self.size - self.position
666 else:
667 size = min(size, self.size - self.position)
668
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000669 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000670 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000671 while True:
672 data, start, stop, offset = self.map[self.map_index]
673 if start <= self.position < stop:
674 break
675 else:
676 self.map_index += 1
677 if self.map_index == len(self.map):
678 self.map_index = 0
679 length = min(size, stop - self.position)
680 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000681 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200682 b = self.fileobj.read(length)
683 if len(b) != length:
684 raise ReadError("unexpected end of data")
685 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000686 else:
687 buf += NUL * length
688 size -= length
689 self.position += length
690 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000691
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200692 def readinto(self, b):
693 buf = self.read(len(b))
694 b[:len(buf)] = buf
695 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000696
697 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200699#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000700
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200701class ExFileObject(io.BufferedReader):
702
703 def __init__(self, tarfile, tarinfo):
704 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
705 tarinfo.size, tarinfo.sparse)
706 super().__init__(fileobj)
707#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000708
709#------------------
710# Exported Classes
711#------------------
712class TarInfo(object):
713 """Informational class which holds the details about an
714 archive member given by a tar header block.
715 TarInfo objects are returned by TarFile.getmember(),
716 TarFile.getmembers() and TarFile.gettarinfo() and are
717 usually created internally.
718 """
719
Raymond Hettingera694f232019-03-27 13:16:34 -0700720 __slots__ = dict(
721 name = 'Name of the archive member.',
722 mode = 'Permission bits.',
723 uid = 'User ID of the user who originally stored this member.',
724 gid = 'Group ID of the user who originally stored this member.',
725 size = 'Size in bytes.',
726 mtime = 'Time of last modification.',
727 chksum = 'Header checksum.',
728 type = ('File type. type is usually one of these constants: '
729 'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '
730 'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),
731 linkname = ('Name of the target file name, which is only present '
732 'in TarInfo objects of type LNKTYPE and SYMTYPE.'),
733 uname = 'User name.',
734 gname = 'Group name.',
735 devmajor = 'Device major number.',
736 devminor = 'Device minor number.',
737 offset = 'The tar header starts here.',
738 offset_data = "The file's data starts here.",
739 pax_headers = ('A dictionary containing key-value pairs of an '
740 'associated pax extended header.'),
741 sparse = 'Sparse member information.',
742 tarfile = None,
743 _sparse_structs = None,
744 _link_target = None,
745 )
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000746
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000747 def __init__(self, name=""):
748 """Construct a TarInfo object. name is the optional name
749 of the member.
750 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000751 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000752 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000753 self.uid = 0 # user id
754 self.gid = 0 # group id
755 self.size = 0 # file size
756 self.mtime = 0 # modification time
757 self.chksum = 0 # header checksum
758 self.type = REGTYPE # member type
759 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000760 self.uname = "" # user name
761 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000762 self.devmajor = 0 # device major number
763 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000764
Thomas Wouters477c8d52006-05-27 19:21:47 +0000765 self.offset = 0 # the tar header starts here
766 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000767
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000768 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000769 self.pax_headers = {} # pax header information
770
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200771 @property
772 def path(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700773 'In pax headers, "name" is called "path".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 return self.name
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200776 @path.setter
777 def path(self, name):
778 self.name = name
779
780 @property
781 def linkpath(self):
Raymond Hettingera694f232019-03-27 13:16:34 -0700782 'In pax headers, "linkname" is called "linkpath".'
Guido van Rossumd8faa362007-04-27 19:54:29 +0000783 return self.linkname
Serhiy Storchakabdf6b912017-03-19 08:40:32 +0200784
785 @linkpath.setter
786 def linkpath(self, linkname):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000787 self.linkname = linkname
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000789 def __repr__(self):
790 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
791
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000792 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000793 """Return the TarInfo's attributes as a dictionary.
794 """
795 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000796 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000797 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 "uid": self.uid,
799 "gid": self.gid,
800 "size": self.size,
801 "mtime": self.mtime,
802 "chksum": self.chksum,
803 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000804 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000805 "uname": self.uname,
806 "gname": self.gname,
807 "devmajor": self.devmajor,
808 "devminor": self.devminor
809 }
810
811 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
812 info["name"] += "/"
813
814 return info
815
Victor Stinnerde629d42010-05-05 21:43:57 +0000816 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 """Return a tar header as a string of 512 byte blocks.
818 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000819 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000820
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000822 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000826 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 else:
828 raise ValueError("invalid format")
829
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831 """Return the object as a ustar header block.
832 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833 info["magic"] = POSIX_MAGIC
834
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200835 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000836 raise ValueError("linkname is too long")
837
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200838 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
839 info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000841 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000843 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844 """Return the object as a GNU header block sequence.
845 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000846 info["magic"] = GNU_MAGIC
847
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000848 buf = b""
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200849 if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000850 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000851
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200852 if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000853 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000854
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000855 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000857 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 """Return the object as a ustar header block. If it cannot be
859 represented this way, prepend a pax extended header sequence
860 with supplement information.
861 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000862 info["magic"] = POSIX_MAGIC
863 pax_headers = self.pax_headers.copy()
864
865 # Test string fields for values that exceed the field length or cannot
866 # be represented in ASCII encoding.
867 for name, hname, length in (
868 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
869 ("uname", "uname", 32), ("gname", "gname", 32)):
870
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if hname in pax_headers:
872 # The pax header has priority.
873 continue
874
Guido van Rossumd8faa362007-04-27 19:54:29 +0000875 # Try to encode the string as ASCII.
876 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000877 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000879 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 continue
881
Guido van Rossume7ba4952007-06-06 23:52:48 +0000882 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000883 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
885 # Test number fields for values that exceed the field limit or values
886 # that like to be stored as float.
887 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000888 if name in pax_headers:
889 # The pax header has priority. Avoid overflow.
890 info[name] = 0
891 continue
892
Guido van Rossumd8faa362007-04-27 19:54:29 +0000893 val = info[name]
894 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000895 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000896 info[name] = 0
897
Guido van Rossume7ba4952007-06-06 23:52:48 +0000898 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000899 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000900 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000901 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000902 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000903
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000904 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000905
906 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000907 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000908 """Return the object as a pax global header block sequence.
909 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000910 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000911
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200912 def _posix_split_name(self, name, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000913 """Split a name longer than 100 chars into a prefix
914 and a name part.
915 """
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200916 components = name.split("/")
917 for i in range(1, len(components)):
918 prefix = "/".join(components[:i])
919 name = "/".join(components[i:])
920 if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \
921 len(name.encode(encoding, errors)) <= LENGTH_NAME:
922 break
923 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000924 raise ValueError("name is too long")
Lars Gustäbel0f450ab2016-04-19 08:43:17 +0200925
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 return prefix, name
927
928 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000929 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 """Return a header block. info is a dictionary with file
931 information, format must be one of the *_FORMAT constants.
932 """
933 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000934 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000935 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 itn(info.get("uid", 0), 8, format),
937 itn(info.get("gid", 0), 8, format),
938 itn(info.get("size", 0), 12, format),
939 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000940 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000941 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000942 stn(info.get("linkname", ""), 100, encoding, errors),
943 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000944 stn(info.get("uname", ""), 32, encoding, errors),
945 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000946 itn(info.get("devmajor", 0), 8, format),
947 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000948 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000949 ]
950
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000953 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954 return buf
955
956 @staticmethod
957 def _create_payload(payload):
958 """Return the string payload filled with zero bytes
959 up to the next 512 byte border.
960 """
961 blocks, remainder = divmod(len(payload), BLOCKSIZE)
962 if remainder > 0:
963 payload += (BLOCKSIZE - remainder) * NUL
964 return payload
965
966 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000967 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
969 for name.
970 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000971 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000972
973 info = {}
974 info["name"] = "././@LongLink"
975 info["type"] = type
976 info["size"] = len(name)
977 info["magic"] = GNU_MAGIC
978
979 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000980 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000981 cls._create_payload(name)
982
983 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000984 def _create_pax_generic_header(cls, pax_headers, type, encoding):
985 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000987 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000988 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000989 # Check if one of the fields contains surrogate characters and thereby
990 # forces hdrcharset=BINARY, see _proc_pax() for more information.
991 binary = False
992 for keyword, value in pax_headers.items():
993 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000994 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000995 except UnicodeEncodeError:
996 binary = True
997 break
998
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000999 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001000 if binary:
1001 # Put the hdrcharset field at the beginning of the header.
1002 records += b"21 hdrcharset=BINARY\n"
1003
Guido van Rossumd8faa362007-04-27 19:54:29 +00001004 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001005 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001006 if binary:
1007 # Try to restore the original byte representation of `value'.
1008 # Needless to say, that the encoding must match the string.
1009 value = value.encode(encoding, "surrogateescape")
1010 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001011 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001012
Guido van Rossumd8faa362007-04-27 19:54:29 +00001013 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1014 n = p = 0
1015 while True:
1016 n = l + len(str(p))
1017 if n == p:
1018 break
1019 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001020 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001021
1022 # We use a hardcoded "././@PaxHeader" name like star does
1023 # instead of the one that POSIX recommends.
1024 info = {}
1025 info["name"] = "././@PaxHeader"
1026 info["type"] = type
1027 info["size"] = len(records)
1028 info["magic"] = POSIX_MAGIC
1029
1030 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001031 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001032 cls._create_payload(records)
1033
Guido van Rossum75b64e62005-01-16 00:16:11 +00001034 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001035 def frombuf(cls, buf, encoding, errors):
1036 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001037 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001038 if len(buf) == 0:
1039 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001040 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001041 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001042 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001043 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001044
1045 chksum = nti(buf[148:156])
1046 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001047 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001048
Guido van Rossumd8faa362007-04-27 19:54:29 +00001049 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001050 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001051 obj.mode = nti(buf[100:108])
1052 obj.uid = nti(buf[108:116])
1053 obj.gid = nti(buf[116:124])
1054 obj.size = nti(buf[124:136])
1055 obj.mtime = nti(buf[136:148])
1056 obj.chksum = chksum
1057 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001058 obj.linkname = nts(buf[157:257], encoding, errors)
1059 obj.uname = nts(buf[265:297], encoding, errors)
1060 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001061 obj.devmajor = nti(buf[329:337])
1062 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001063 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001064
Guido van Rossumd8faa362007-04-27 19:54:29 +00001065 # Old V7 tar format represents a directory as a regular
1066 # file with a trailing slash.
1067 if obj.type == AREGTYPE and obj.name.endswith("/"):
1068 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001069
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001070 # The old GNU sparse format occupies some of the unused
1071 # space in the buffer for up to 4 sparse structures.
Mike53f7a7c2017-12-14 14:04:53 +03001072 # Save them for later processing in _proc_sparse().
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001073 if obj.type == GNUTYPE_SPARSE:
1074 pos = 386
1075 structs = []
1076 for i in range(4):
1077 try:
1078 offset = nti(buf[pos:pos + 12])
1079 numbytes = nti(buf[pos + 12:pos + 24])
1080 except ValueError:
1081 break
1082 structs.append((offset, numbytes))
1083 pos += 24
1084 isextended = bool(buf[482])
1085 origsize = nti(buf[483:495])
1086 obj._sparse_structs = (structs, isextended, origsize)
1087
Guido van Rossumd8faa362007-04-27 19:54:29 +00001088 # Remove redundant slashes from directories.
1089 if obj.isdir():
1090 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001091
Guido van Rossumd8faa362007-04-27 19:54:29 +00001092 # Reconstruct a ustar longname.
1093 if prefix and obj.type not in GNU_TYPES:
1094 obj.name = prefix + "/" + obj.name
1095 return obj
1096
1097 @classmethod
1098 def fromtarfile(cls, tarfile):
1099 """Return the next TarInfo object from TarFile object
1100 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001101 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001103 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1105 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 #--------------------------------------------------------------------------
1108 # The following are methods that are called depending on the type of a
1109 # member. The entry point is _proc_member() which can be overridden in a
1110 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1111 # implement the following
1112 # operations:
1113 # 1. Set self.offset_data to the position where the data blocks begin,
1114 # if there is data that follows.
1115 # 2. Set tarfile.offset to the position where the next member's header will
1116 # begin.
1117 # 3. Return self or another valid TarInfo object.
1118 def _proc_member(self, tarfile):
1119 """Choose the right processing method depending on
1120 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001121 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001122 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1123 return self._proc_gnulong(tarfile)
1124 elif self.type == GNUTYPE_SPARSE:
1125 return self._proc_sparse(tarfile)
1126 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1127 return self._proc_pax(tarfile)
1128 else:
1129 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001130
Guido van Rossumd8faa362007-04-27 19:54:29 +00001131 def _proc_builtin(self, tarfile):
1132 """Process a builtin type or an unknown type which
1133 will be treated as a regular file.
1134 """
1135 self.offset_data = tarfile.fileobj.tell()
1136 offset = self.offset_data
1137 if self.isreg() or self.type not in SUPPORTED_TYPES:
1138 # Skip the following data blocks.
1139 offset += self._block(self.size)
1140 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001141
Guido van Rossume7ba4952007-06-06 23:52:48 +00001142 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001144 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145
1146 return self
1147
1148 def _proc_gnulong(self, tarfile):
1149 """Process the blocks that hold a GNU longname
1150 or longlink member.
1151 """
1152 buf = tarfile.fileobj.read(self._block(self.size))
1153
1154 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001155 try:
1156 next = self.fromtarfile(tarfile)
1157 except HeaderError:
1158 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159
1160 # Patch the TarInfo object from the next header with
1161 # the longname information.
1162 next.offset = self.offset
1163 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001164 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001166 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167
1168 return next
1169
1170 def _proc_sparse(self, tarfile):
1171 """Process a GNU sparse header plus extra headers.
1172 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001173 # We already collected some sparse structures in frombuf().
1174 structs, isextended, origsize = self._sparse_structs
1175 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001177 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001178 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001179 buf = tarfile.fileobj.read(BLOCKSIZE)
1180 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001181 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001182 try:
1183 offset = nti(buf[pos:pos + 12])
1184 numbytes = nti(buf[pos + 12:pos + 24])
1185 except ValueError:
1186 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001187 if offset and numbytes:
1188 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001189 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001190 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001191 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001192
1193 self.offset_data = tarfile.fileobj.tell()
1194 tarfile.offset = self.offset_data + self._block(self.size)
1195 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001196 return self
1197
1198 def _proc_pax(self, tarfile):
1199 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001200 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001201 """
1202 # Read the header information.
1203 buf = tarfile.fileobj.read(self._block(self.size))
1204
1205 # A pax header stores supplemental information for either
1206 # the following file (extended) or all following files
1207 # (global).
1208 if self.type == XGLTYPE:
1209 pax_headers = tarfile.pax_headers
1210 else:
1211 pax_headers = tarfile.pax_headers.copy()
1212
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001213 # Check if the pax header contains a hdrcharset field. This tells us
1214 # the encoding of the path, linkpath, uname and gname fields. Normally,
1215 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1216 # implementations are allowed to store them as raw binary strings if
1217 # the translation to UTF-8 fails.
1218 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1219 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001220 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001221
1222 # For the time being, we don't care about anything other than "BINARY".
1223 # The only other value that is currently allowed by the standard is
1224 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1225 hdrcharset = pax_headers.get("hdrcharset")
1226 if hdrcharset == "BINARY":
1227 encoding = tarfile.encoding
1228 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001229 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001230
Guido van Rossumd8faa362007-04-27 19:54:29 +00001231 # Parse pax header information. A record looks like that:
1232 # "%d %s=%s\n" % (length, keyword, value). length is the size
1233 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001234 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001235 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001236 pos = 0
1237 while True:
1238 match = regex.match(buf, pos)
1239 if not match:
1240 break
1241
1242 length, keyword = match.groups()
1243 length = int(length)
1244 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1245
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001246 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001247 # as the error handler, but we better not take the risk. For
1248 # example, GNU tar <= 1.23 is known to store filenames it cannot
1249 # translate to UTF-8 as raw strings (unfortunately without a
1250 # hdrcharset=BINARY header).
1251 # We first try the strict standard encoding, and if that fails we
1252 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001253 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001254 tarfile.errors)
1255 if keyword in PAX_NAME_FIELDS:
1256 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1257 tarfile.errors)
1258 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001259 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001260 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001261
1262 pax_headers[keyword] = value
1263 pos += length
1264
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001266 try:
1267 next = self.fromtarfile(tarfile)
1268 except HeaderError:
1269 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001270
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001271 # Process GNU sparse information.
1272 if "GNU.sparse.map" in pax_headers:
1273 # GNU extended sparse format version 0.1.
1274 self._proc_gnusparse_01(next, pax_headers)
1275
1276 elif "GNU.sparse.size" in pax_headers:
1277 # GNU extended sparse format version 0.0.
1278 self._proc_gnusparse_00(next, pax_headers, buf)
1279
1280 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1281 # GNU extended sparse format version 1.0.
1282 self._proc_gnusparse_10(next, pax_headers, tarfile)
1283
Guido van Rossume7ba4952007-06-06 23:52:48 +00001284 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001285 # Patch the TarInfo object with the extended header info.
1286 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1287 next.offset = self.offset
1288
1289 if "size" in pax_headers:
1290 # If the extended header replaces the size field,
1291 # we need to recalculate the offset where the next
1292 # header starts.
1293 offset = next.offset_data
1294 if next.isreg() or next.type not in SUPPORTED_TYPES:
1295 offset += next._block(next.size)
1296 tarfile.offset = offset
1297
1298 return next
1299
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001300 def _proc_gnusparse_00(self, next, pax_headers, buf):
1301 """Process a GNU tar extended sparse header, version 0.0.
1302 """
1303 offsets = []
1304 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1305 offsets.append(int(match.group(1)))
1306 numbytes = []
1307 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1308 numbytes.append(int(match.group(1)))
1309 next.sparse = list(zip(offsets, numbytes))
1310
1311 def _proc_gnusparse_01(self, next, pax_headers):
1312 """Process a GNU tar extended sparse header, version 0.1.
1313 """
1314 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1315 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1316
1317 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1318 """Process a GNU tar extended sparse header, version 1.0.
1319 """
1320 fields = None
1321 sparse = []
1322 buf = tarfile.fileobj.read(BLOCKSIZE)
1323 fields, buf = buf.split(b"\n", 1)
1324 fields = int(fields)
1325 while len(sparse) < fields * 2:
1326 if b"\n" not in buf:
1327 buf += tarfile.fileobj.read(BLOCKSIZE)
1328 number, buf = buf.split(b"\n", 1)
1329 sparse.append(int(number))
1330 next.offset_data = tarfile.fileobj.tell()
1331 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1332
Guido van Rossume7ba4952007-06-06 23:52:48 +00001333 def _apply_pax_info(self, pax_headers, encoding, errors):
1334 """Replace fields with supplemental information from a previous
1335 pax extended or global header.
1336 """
1337 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001338 if keyword == "GNU.sparse.name":
1339 setattr(self, "path", value)
1340 elif keyword == "GNU.sparse.size":
1341 setattr(self, "size", int(value))
1342 elif keyword == "GNU.sparse.realsize":
1343 setattr(self, "size", int(value))
1344 elif keyword in PAX_FIELDS:
1345 if keyword in PAX_NUMBER_FIELDS:
1346 try:
1347 value = PAX_NUMBER_FIELDS[keyword](value)
1348 except ValueError:
1349 value = 0
1350 if keyword == "path":
1351 value = value.rstrip("/")
1352 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001353
1354 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001355
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001356 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1357 """Decode a single field from a pax record.
1358 """
1359 try:
1360 return value.decode(encoding, "strict")
1361 except UnicodeDecodeError:
1362 return value.decode(fallback_encoding, fallback_errors)
1363
Guido van Rossumd8faa362007-04-27 19:54:29 +00001364 def _block(self, count):
1365 """Round up a byte count by BLOCKSIZE and return it,
1366 e.g. _block(834) => 1024.
1367 """
1368 blocks, remainder = divmod(count, BLOCKSIZE)
1369 if remainder:
1370 blocks += 1
1371 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001372
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def isreg(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001374 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001375 return self.type in REGULAR_TYPES
Raymond Hettingera694f232019-03-27 13:16:34 -07001376
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001377 def isfile(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001378 'Return True if the Tarinfo object is a regular file.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001379 return self.isreg()
Raymond Hettingera694f232019-03-27 13:16:34 -07001380
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001381 def isdir(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001382 'Return True if it is a directory.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001383 return self.type == DIRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001384
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001385 def issym(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001386 'Return True if it is a symbolic link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001387 return self.type == SYMTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001388
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001389 def islnk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001390 'Return True if it is a hard link.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001391 return self.type == LNKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001392
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001393 def ischr(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001394 'Return True if it is a character device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001395 return self.type == CHRTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001396
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397 def isblk(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001398 'Return True if it is a block device.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001399 return self.type == BLKTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001400
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401 def isfifo(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001402 'Return True if it is a FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001403 return self.type == FIFOTYPE
Raymond Hettingera694f232019-03-27 13:16:34 -07001404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001405 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001406 return self.sparse is not None
Raymond Hettingera694f232019-03-27 13:16:34 -07001407
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001408 def isdev(self):
Raymond Hettingera694f232019-03-27 13:16:34 -07001409 'Return True if it is one of character device, block device or FIFO.'
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001410 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1411# class TarInfo
1412
1413class TarFile(object):
1414 """The TarFile Class provides an interface to tar archives.
1415 """
1416
1417 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1418
1419 dereference = False # If true, add content of linked file to the
1420 # tar file, else the link.
1421
1422 ignore_zeros = False # If true, skips empty or invalid blocks and
1423 # continues processing.
1424
Lars Gustäbel365aff32009-12-13 11:42:29 +00001425 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 # messages (if debug >= 0). If > 0, errors
1427 # are passed to the caller as exceptions.
1428
Guido van Rossumd8faa362007-04-27 19:54:29 +00001429 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001430
Guido van Rossume7ba4952007-06-06 23:52:48 +00001431 encoding = ENCODING # Encoding for 8-bit character strings.
1432
1433 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 tarinfo = TarInfo # The default TarInfo class to use.
1436
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001437 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001438
1439 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1440 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001441 errors="surrogateescape", pax_headers=None, debug=None,
1442 errorlevel=None, copybufsize=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001443 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1444 read from an existing archive, 'a' to append data to an existing
1445 file or 'w' to create a new file overwriting an existing one. `mode'
1446 defaults to 'r'.
1447 If `fileobj' is given, it is used for reading or writing data. If it
1448 can be determined, `mode' is overridden by `fileobj's mode.
1449 `fileobj' is not closed, when TarFile is closed.
1450 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001451 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001452 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001453 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001454 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001455 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001456
1457 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001458 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001459 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001460 self.mode = "w"
1461 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001462 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001463 self._extfileobj = False
1464 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001465 if (name is None and hasattr(fileobj, "name") and
1466 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001467 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001468 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001469 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001470 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001471 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472 self.fileobj = fileobj
1473
Guido van Rossumd8faa362007-04-27 19:54:29 +00001474 # Init attributes.
1475 if format is not None:
1476 self.format = format
1477 if tarinfo is not None:
1478 self.tarinfo = tarinfo
1479 if dereference is not None:
1480 self.dereference = dereference
1481 if ignore_zeros is not None:
1482 self.ignore_zeros = ignore_zeros
1483 if encoding is not None:
1484 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001485 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001486
1487 if pax_headers is not None and self.format == PAX_FORMAT:
1488 self.pax_headers = pax_headers
1489 else:
1490 self.pax_headers = {}
1491
Guido van Rossumd8faa362007-04-27 19:54:29 +00001492 if debug is not None:
1493 self.debug = debug
1494 if errorlevel is not None:
1495 self.errorlevel = errorlevel
1496
1497 # Init datastructures.
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001498 self.copybufsize = copybufsize
Thomas Wouters477c8d52006-05-27 19:21:47 +00001499 self.closed = False
1500 self.members = [] # list of members as TarInfo objects
1501 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001502 self.offset = self.fileobj.tell()
1503 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001504 self.inodes = {} # dictionary caching the inodes of
1505 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001506
Lars Gustäbel7b465392009-11-18 20:29:25 +00001507 try:
1508 if self.mode == "r":
1509 self.firstmember = None
1510 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001511
Lars Gustäbel7b465392009-11-18 20:29:25 +00001512 if self.mode == "a":
1513 # Move to the end of the archive,
1514 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001515 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001516 self.fileobj.seek(self.offset)
1517 try:
1518 tarinfo = self.tarinfo.fromtarfile(self)
1519 self.members.append(tarinfo)
1520 except EOFHeaderError:
1521 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001522 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001523 except HeaderError as e:
1524 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001525
Lars Gustäbel20703c62015-05-27 12:53:44 +02001526 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001527 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528
Lars Gustäbel7b465392009-11-18 20:29:25 +00001529 if self.pax_headers:
1530 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1531 self.fileobj.write(buf)
1532 self.offset += len(buf)
1533 except:
1534 if not self._extfileobj:
1535 self.fileobj.close()
1536 self.closed = True
1537 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001538
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001539 #--------------------------------------------------------------------------
1540 # Below are the classmethods which act as alternate constructors to the
1541 # TarFile class. The open() method is the only one that is needed for
1542 # public use; it is the "super"-constructor and is able to select an
1543 # adequate "sub"-constructor for a particular compression using the mapping
1544 # from OPEN_METH.
1545 #
1546 # This concept allows one to subclass TarFile without losing the comfort of
1547 # the super-constructor. A sub-constructor is registered and made available
1548 # by adding it to the mapping in OPEN_METH.
1549
Guido van Rossum75b64e62005-01-16 00:16:11 +00001550 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001551 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001552 """Open a tar archive for reading, writing or appending. Return
1553 an appropriate TarFile class.
1554
1555 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001556 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001557 'r:' open for reading exclusively uncompressed
1558 'r:gz' open for reading with gzip compression
1559 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001560 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001561 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562 'w' or 'w:' open for writing without compression
1563 'w:gz' open for writing with gzip compression
1564 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001565 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566
Berker Peksag0fe63252015-02-13 21:02:12 +02001567 'x' or 'x:' create a tarfile exclusively without compression, raise
1568 an exception if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001569 'x:gz' create a gzip compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001570 if the file is already created
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03001571 'x:bz2' create a bzip2 compressed tarfile, raise an exception
Berker Peksag0fe63252015-02-13 21:02:12 +02001572 if the file is already created
1573 'x:xz' create an lzma compressed tarfile, raise an exception
1574 if the file is already created
1575
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001576 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001577 'r|' open an uncompressed stream of tar blocks for reading
1578 'r|gz' open a gzip compressed stream of tar blocks
1579 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001580 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581 'w|' open an uncompressed stream for writing
1582 'w|gz' open a gzip compressed stream for writing
1583 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001584 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001585 """
1586
1587 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001590 if mode in ("r", "r:*"):
1591 # Find out which *open() is appropriate for opening the file.
Serhiy Storchakaa89d22a2016-10-30 20:52:29 +02001592 def not_compressed(comptype):
1593 return cls.OPEN_METH[comptype] == 'taropen'
1594 for comptype in sorted(cls.OPEN_METH, key=not_compressed):
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001595 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001596 if fileobj is not None:
1597 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001598 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001599 return func(name, "r", fileobj, **kwargs)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001600 except (ReadError, CompressionError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001601 if fileobj is not None:
1602 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001603 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001604 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001605
1606 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 filemode, comptype = mode.split(":", 1)
1608 filemode = filemode or "r"
1609 comptype = comptype or "tar"
1610
1611 # Select the *open() function according to
1612 # given compression.
1613 if comptype in cls.OPEN_METH:
1614 func = getattr(cls, cls.OPEN_METH[comptype])
1615 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001616 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001617 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001618
1619 elif "|" in mode:
1620 filemode, comptype = mode.split("|", 1)
1621 filemode = filemode or "r"
1622 comptype = comptype or "tar"
1623
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001624 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001625 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001626
Antoine Pitrou605c2932010-09-23 20:15:14 +00001627 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1628 try:
1629 t = cls(name, filemode, stream, **kwargs)
1630 except:
1631 stream.close()
1632 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001633 t._extfileobj = False
1634 return t
1635
Berker Peksag0fe63252015-02-13 21:02:12 +02001636 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001637 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001638
Thomas Wouters477c8d52006-05-27 19:21:47 +00001639 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640
Guido van Rossum75b64e62005-01-16 00:16:11 +00001641 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001642 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001643 """Open uncompressed tar archive name for reading or writing.
1644 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001645 if mode not in ("r", "a", "w", "x"):
1646 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001647 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
Guido van Rossum75b64e62005-01-16 00:16:11 +00001649 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001650 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001651 """Open gzip compressed tar archive name for reading or writing.
1652 Appending is not allowed.
1653 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001654 if mode not in ("r", "w", "x"):
1655 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001656
1657 try:
1658 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001659 gzip.GzipFile
1660 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001661 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001662
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001663 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001664 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001665 except OSError:
1666 if fileobj is not None and mode == 'r':
1667 raise ReadError("not a gzip file")
1668 raise
1669
1670 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001671 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001672 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001673 fileobj.close()
1674 if mode == 'r':
1675 raise ReadError("not a gzip file")
1676 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001677 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001678 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001679 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001680 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001681 return t
1682
Guido van Rossum75b64e62005-01-16 00:16:11 +00001683 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001684 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001685 """Open bzip2 compressed tar archive name for reading or writing.
1686 Appending is not allowed.
1687 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001688 if mode not in ("r", "w", "x"):
1689 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690
1691 try:
1692 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001693 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001694 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001695
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001696 fileobj = bz2.BZ2File(fileobj or name, mode,
1697 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001698
1699 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001700 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001701 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001702 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001703 if mode == 'r':
1704 raise ReadError("not a bzip2 file")
1705 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001706 except:
1707 fileobj.close()
1708 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 t._extfileobj = False
1710 return t
1711
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001712 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001713 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001714 """Open lzma compressed tar archive name for reading or writing.
1715 Appending is not allowed.
1716 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001717 if mode not in ("r", "w", "x"):
1718 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001719
1720 try:
1721 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001722 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001723 raise CompressionError("lzma module is not available")
1724
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001725 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001726
1727 try:
1728 t = cls.taropen(name, mode, fileobj, **kwargs)
1729 except (lzma.LZMAError, EOFError):
1730 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001731 if mode == 'r':
1732 raise ReadError("not an lzma file")
1733 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001734 except:
1735 fileobj.close()
1736 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001737 t._extfileobj = False
1738 return t
1739
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001740 # All *open() methods are registered here.
1741 OPEN_METH = {
1742 "tar": "taropen", # uncompressed tar
1743 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001744 "bz2": "bz2open", # bzip2 compressed tar
1745 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746 }
1747
1748 #--------------------------------------------------------------------------
1749 # The public methods which TarFile provides:
1750
1751 def close(self):
1752 """Close the TarFile. In write-mode, two finishing zero blocks are
1753 appended to the archive.
1754 """
1755 if self.closed:
1756 return
1757
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001758 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001759 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001760 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001761 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1762 self.offset += (BLOCKSIZE * 2)
1763 # fill up the end with zero-blocks
1764 # (like option -b20 for tar does)
1765 blocks, remainder = divmod(self.offset, RECORDSIZE)
1766 if remainder > 0:
1767 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1768 finally:
1769 if not self._extfileobj:
1770 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001771
1772 def getmember(self, name):
1773 """Return a TarInfo object for member `name'. If `name' can not be
1774 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001775 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001776 most up-to-date version.
1777 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001778 tarinfo = self._getmember(name)
1779 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001780 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001781 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001782
1783 def getmembers(self):
1784 """Return the members of the archive as a list of TarInfo objects. The
1785 list has the same order as the members in the archive.
1786 """
1787 self._check()
1788 if not self._loaded: # if we want to obtain a list of
1789 self._load() # all members, we first have to
1790 # scan the whole archive.
1791 return self.members
1792
1793 def getnames(self):
1794 """Return the members of the archive as a list of their names. It has
1795 the same order as the list returned by getmembers().
1796 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001797 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001798
1799 def gettarinfo(self, name=None, arcname=None, fileobj=None):
Martin Panterf817a482016-02-19 23:34:56 +00001800 """Create a TarInfo object from the result of os.stat or equivalent
1801 on an existing file. The file is either named by `name', or
1802 specified as a file object `fileobj' with a file descriptor. If
1803 given, `arcname' specifies an alternative name for the file in the
1804 archive, otherwise, the name is taken from the 'name' attribute of
1805 'fileobj', or the 'name' argument. The name should be a text
1806 string.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001807 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001808 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001809
1810 # When fileobj is given, replace name by
1811 # fileobj's real name.
1812 if fileobj is not None:
1813 name = fileobj.name
1814
1815 # Building the name of the member in the archive.
1816 # Backward slashes are converted to forward slashes,
1817 # Absolute paths are turned to relative paths.
1818 if arcname is None:
1819 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001821 arcname = arcname.replace(os.sep, "/")
1822 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001823
1824 # Now, fill the TarInfo object with
1825 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001826 tarinfo = self.tarinfo()
Martin Panterf817a482016-02-19 23:34:56 +00001827 tarinfo.tarfile = self # Not needed
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828
Anthony Sottile8377cd42019-02-25 14:32:27 -08001829 # Use os.stat or os.lstat, depending on if symlinks shall be resolved.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 if fileobj is None:
Anthony Sottile8377cd42019-02-25 14:32:27 -08001831 if not self.dereference:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001832 statres = os.lstat(name)
1833 else:
1834 statres = os.stat(name)
1835 else:
1836 statres = os.fstat(fileobj.fileno())
1837 linkname = ""
1838
1839 stmd = statres.st_mode
1840 if stat.S_ISREG(stmd):
1841 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001842 if not self.dereference and statres.st_nlink > 1 and \
1843 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001844 # Is it a hardlink to an already
1845 # archived file?
1846 type = LNKTYPE
1847 linkname = self.inodes[inode]
1848 else:
1849 # The inode is added only if its valid.
1850 # For win32 it is always 0.
1851 type = REGTYPE
1852 if inode[0]:
1853 self.inodes[inode] = arcname
1854 elif stat.S_ISDIR(stmd):
1855 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001856 elif stat.S_ISFIFO(stmd):
1857 type = FIFOTYPE
1858 elif stat.S_ISLNK(stmd):
1859 type = SYMTYPE
1860 linkname = os.readlink(name)
1861 elif stat.S_ISCHR(stmd):
1862 type = CHRTYPE
1863 elif stat.S_ISBLK(stmd):
1864 type = BLKTYPE
1865 else:
1866 return None
1867
1868 # Fill the TarInfo object with all
1869 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001870 tarinfo.name = arcname
1871 tarinfo.mode = stmd
1872 tarinfo.uid = statres.st_uid
1873 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001874 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001875 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001876 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001877 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001879 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001880 tarinfo.linkname = linkname
1881 if pwd:
1882 try:
1883 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1884 except KeyError:
1885 pass
1886 if grp:
1887 try:
1888 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1889 except KeyError:
1890 pass
1891
1892 if type in (CHRTYPE, BLKTYPE):
1893 if hasattr(os, "major") and hasattr(os, "minor"):
1894 tarinfo.devmajor = os.major(statres.st_rdev)
1895 tarinfo.devminor = os.minor(statres.st_rdev)
1896 return tarinfo
1897
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001898 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 """Print a table of contents to sys.stdout. If `verbose' is False, only
1900 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001901 output is produced. `members' is optional and must be a subset of the
1902 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001903 """
1904 self._check()
1905
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001906 if members is None:
1907 members = self
1908 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001909 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001910 _safe_print(stat.filemode(tarinfo.mode))
1911 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1912 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001913 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001914 _safe_print("%10s" %
1915 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001916 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001917 _safe_print("%10d" % tarinfo.size)
1918 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1919 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001921 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001922
1923 if verbose:
1924 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001925 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001927 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001928 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001930 def add(self, name, arcname=None, recursive=True, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001931 """Add the file `name' to the archive. `name' may be any type of file
1932 (directory, fifo, symbolic link, etc.). If given, `arcname'
1933 specifies an alternative name for the file in the archive.
1934 Directories are added recursively by default. This can be avoided by
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001935 setting `recursive' to False. `filter' is a function
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001936 that expects a TarInfo object argument and returns the changed
1937 TarInfo object, if it returns None the TarInfo object will be
1938 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001939 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001940 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001941
1942 if arcname is None:
1943 arcname = name
1944
1945 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001946 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001947 self._dbg(2, "tarfile: Skipped %r" % name)
1948 return
1949
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950 self._dbg(1, name)
1951
1952 # Create a TarInfo object from the file.
1953 tarinfo = self.gettarinfo(name, arcname)
1954
1955 if tarinfo is None:
1956 self._dbg(1, "tarfile: Unsupported type %r" % name)
1957 return
1958
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001959 # Change or exclude the TarInfo object.
1960 if filter is not None:
1961 tarinfo = filter(tarinfo)
1962 if tarinfo is None:
1963 self._dbg(2, "tarfile: Excluded %r" % name)
1964 return
1965
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001966 # Append the tar header and data to the archive.
1967 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001968 with bltn_open(name, "rb") as f:
1969 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001970
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001971 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001972 self.addfile(tarinfo)
1973 if recursive:
Bernhard M. Wiedemann84521042018-01-31 11:17:10 +01001974 for f in sorted(os.listdir(name)):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001975 self.add(os.path.join(name, f), os.path.join(arcname, f),
Serhiy Storchaka4f76fb12017-01-13 13:25:24 +02001976 recursive, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001977
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001978 else:
1979 self.addfile(tarinfo)
1980
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001981 def addfile(self, tarinfo, fileobj=None):
1982 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
Martin Panterf817a482016-02-19 23:34:56 +00001983 given, it should be a binary file, and tarinfo.size bytes are read
1984 from it and added to the archive. You can create TarInfo objects
1985 directly, or by using gettarinfo().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001986 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001987 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001988
Thomas Wouters89f507f2006-12-13 04:49:30 +00001989 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001990
Guido van Rossume7ba4952007-06-06 23:52:48 +00001991 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001992 self.fileobj.write(buf)
1993 self.offset += len(buf)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001994 bufsize=self.copybufsize
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995 # If there's data to follow, append it.
1996 if fileobj is not None:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07001997 copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001998 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1999 if remainder > 0:
2000 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2001 blocks += 1
2002 self.offset += blocks * BLOCKSIZE
2003
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002004 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002005
Eric V. Smith7a803892015-04-15 10:27:58 -04002006 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002007 """Extract all members from the archive to the current working
2008 directory and set owner, modification time and permissions on
2009 directories afterwards. `path' specifies a different directory
2010 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04002011 list returned by getmembers(). If `numeric_owner` is True, only
2012 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002013 """
2014 directories = []
2015
2016 if members is None:
2017 members = self
2018
2019 for tarinfo in members:
2020 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00002021 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002022 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00002023 tarinfo = copy.copy(tarinfo)
2024 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002025 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04002026 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2027 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002028
2029 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002030 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002031 directories.reverse()
2032
2033 # Set correct owner, mtime and filemode on directories.
2034 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002035 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002036 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002037 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002038 self.utime(tarinfo, dirpath)
2039 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002040 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002041 if self.errorlevel > 1:
2042 raise
2043 else:
2044 self._dbg(1, "tarfile: %s" % e)
2045
Eric V. Smith7a803892015-04-15 10:27:58 -04002046 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002047 """Extract a member from the archive to the current working directory,
2048 using its full name. Its file information is extracted as accurately
2049 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002050 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002051 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2052 is True, only the numbers for user/group names are used and not
2053 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002054 """
2055 self._check("r")
2056
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002057 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002059 else:
2060 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061
Neal Norwitza4f651a2004-07-20 22:07:44 +00002062 # Prepare the link target for makelink().
2063 if tarinfo.islnk():
2064 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2065
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002067 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002068 set_attrs=set_attrs,
2069 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002070 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002071 if self.errorlevel > 0:
2072 raise
2073 else:
2074 if e.filename is None:
2075 self._dbg(1, "tarfile: %s" % e.strerror)
2076 else:
2077 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002078 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002079 if self.errorlevel > 1:
2080 raise
2081 else:
2082 self._dbg(1, "tarfile: %s" % e)
2083
2084 def extractfile(self, member):
2085 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002086 a filename or a TarInfo object. If `member' is a regular file or a
2087 link, an io.BufferedReader object is returned. Otherwise, None is
2088 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002089 """
2090 self._check("r")
2091
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002092 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002093 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002094 else:
2095 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002096
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002097 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2098 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002099 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002100
2101 elif tarinfo.islnk() or tarinfo.issym():
2102 if isinstance(self.fileobj, _Stream):
2103 # A small but ugly workaround for the case that someone tries
2104 # to extract a (sym)link as a file-object from a non-seekable
2105 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002106 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002108 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002109 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002110 else:
2111 # If there's no data associated with the member (directory, chrdev,
2112 # blkdev, etc.), return None instead of a file object.
2113 return None
2114
Eric V. Smith7a803892015-04-15 10:27:58 -04002115 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2116 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117 """Extract the TarInfo object tarinfo to a physical
2118 file called targetpath.
2119 """
2120 # Fetch the TarInfo object for the given name
2121 # and build the destination pathname, replacing
2122 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002123 targetpath = targetpath.rstrip("/")
2124 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002125
2126 # Create all upper directories.
2127 upperdirs = os.path.dirname(targetpath)
2128 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002129 # Create directories that are not part of the archive with
2130 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002131 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002132
2133 if tarinfo.islnk() or tarinfo.issym():
2134 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2135 else:
2136 self._dbg(1, tarinfo.name)
2137
2138 if tarinfo.isreg():
2139 self.makefile(tarinfo, targetpath)
2140 elif tarinfo.isdir():
2141 self.makedir(tarinfo, targetpath)
2142 elif tarinfo.isfifo():
2143 self.makefifo(tarinfo, targetpath)
2144 elif tarinfo.ischr() or tarinfo.isblk():
2145 self.makedev(tarinfo, targetpath)
2146 elif tarinfo.islnk() or tarinfo.issym():
2147 self.makelink(tarinfo, targetpath)
2148 elif tarinfo.type not in SUPPORTED_TYPES:
2149 self.makeunknown(tarinfo, targetpath)
2150 else:
2151 self.makefile(tarinfo, targetpath)
2152
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002153 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002154 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002155 if not tarinfo.issym():
2156 self.chmod(tarinfo, targetpath)
2157 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002158
2159 #--------------------------------------------------------------------------
2160 # Below are the different file methods. They are called via
2161 # _extract_member() when extract() is called. They can be replaced in a
2162 # subclass to implement other functionality.
2163
2164 def makedir(self, tarinfo, targetpath):
2165 """Make a directory called targetpath.
2166 """
2167 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002168 # Use a safe mode for the directory, the real mode is set
2169 # later in _extract_member().
2170 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002171 except FileExistsError:
2172 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002173
2174 def makefile(self, tarinfo, targetpath):
2175 """Make a file called targetpath.
2176 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002177 source = self.fileobj
2178 source.seek(tarinfo.offset_data)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002179 bufsize = self.copybufsize
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002180 with bltn_open(targetpath, "wb") as target:
2181 if tarinfo.sparse is not None:
2182 for offset, size in tarinfo.sparse:
2183 target.seek(offset)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002184 copyfileobj(source, target, size, ReadError, bufsize)
Łukasz Langae7f27482016-06-11 16:42:36 -07002185 target.seek(tarinfo.size)
2186 target.truncate()
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002187 else:
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002188 copyfileobj(source, target, tarinfo.size, ReadError, bufsize)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002189
2190 def makeunknown(self, tarinfo, targetpath):
2191 """Make a file from a TarInfo object with an unknown type
2192 at targetpath.
2193 """
2194 self.makefile(tarinfo, targetpath)
2195 self._dbg(1, "tarfile: Unknown file type %r, " \
2196 "extracted as regular file." % tarinfo.type)
2197
2198 def makefifo(self, tarinfo, targetpath):
2199 """Make a fifo called targetpath.
2200 """
2201 if hasattr(os, "mkfifo"):
2202 os.mkfifo(targetpath)
2203 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002204 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002205
2206 def makedev(self, tarinfo, targetpath):
2207 """Make a character or block device called targetpath.
2208 """
2209 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002210 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002211
2212 mode = tarinfo.mode
2213 if tarinfo.isblk():
2214 mode |= stat.S_IFBLK
2215 else:
2216 mode |= stat.S_IFCHR
2217
2218 os.mknod(targetpath, mode,
2219 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2220
2221 def makelink(self, tarinfo, targetpath):
2222 """Make a (symbolic) link called targetpath. If it cannot be created
2223 (platform limitation), we try to make a copy of the referenced file
2224 instead of a link.
2225 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002226 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002227 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002229 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002231 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002232 if os.path.exists(tarinfo._link_target):
2233 os.link(tarinfo._link_target, targetpath)
2234 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002235 self._extract_member(self._find_link_target(tarinfo),
2236 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002237 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002238 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002239 self._extract_member(self._find_link_target(tarinfo),
2240 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002241 except KeyError:
2242 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243
Eric V. Smith7a803892015-04-15 10:27:58 -04002244 def chown(self, tarinfo, targetpath, numeric_owner):
2245 """Set owner of targetpath according to tarinfo. If numeric_owner
Xavier de Gayef44abda2016-12-09 09:33:09 +01002246 is True, use .gid/.uid instead of .gname/.uname. If numeric_owner
2247 is False, fall back to .gid/.uid when the search based on name
2248 fails.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002249 """
Xavier de Gayef44abda2016-12-09 09:33:09 +01002250 if hasattr(os, "geteuid") and os.geteuid() == 0:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002251 # We have to be root to do so.
Xavier de Gayef44abda2016-12-09 09:33:09 +01002252 g = tarinfo.gid
2253 u = tarinfo.uid
2254 if not numeric_owner:
Eric V. Smith7a803892015-04-15 10:27:58 -04002255 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002256 if grp:
2257 g = grp.getgrnam(tarinfo.gname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002258 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002259 pass
Eric V. Smith7a803892015-04-15 10:27:58 -04002260 try:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002261 if pwd:
2262 u = pwd.getpwnam(tarinfo.uname)[2]
Eric V. Smith7a803892015-04-15 10:27:58 -04002263 except KeyError:
Xavier de Gayef44abda2016-12-09 09:33:09 +01002264 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265 try:
2266 if tarinfo.issym() and hasattr(os, "lchown"):
2267 os.lchown(targetpath, u, g)
2268 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002269 os.chown(targetpath, u, g)
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002270 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002272
2273 def chmod(self, tarinfo, targetpath):
2274 """Set file permissions of targetpath according to tarinfo.
2275 """
Anthony Sottile8377cd42019-02-25 14:32:27 -08002276 try:
2277 os.chmod(targetpath, tarinfo.mode)
2278 except OSError:
2279 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002280
2281 def utime(self, tarinfo, targetpath):
2282 """Set modification time of targetpath according to tarinfo.
2283 """
Jack Jansen834eff62003-03-07 12:47:06 +00002284 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002285 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002286 try:
2287 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Łukasz Langa04bedfa2016-09-09 19:48:14 -07002288 except OSError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002289 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002290
2291 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002292 def next(self):
2293 """Return the next member of the archive as a TarInfo object, when
2294 TarFile is opened for reading. Return None if there is no more
2295 available.
2296 """
2297 self._check("ra")
2298 if self.firstmember is not None:
2299 m = self.firstmember
2300 self.firstmember = None
2301 return m
2302
Lars Gustäbel03572682015-07-06 09:27:24 +02002303 # Advance the file pointer.
2304 if self.offset != self.fileobj.tell():
2305 self.fileobj.seek(self.offset - 1)
2306 if not self.fileobj.read(1):
2307 raise ReadError("unexpected end of data")
2308
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002309 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002310 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002311 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002312 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002313 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002314 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002315 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002316 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 self.offset += BLOCKSIZE
2318 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002319 except InvalidHeaderError as e:
2320 if self.ignore_zeros:
2321 self._dbg(2, "0x%X: %s" % (self.offset, e))
2322 self.offset += BLOCKSIZE
2323 continue
2324 elif self.offset == 0:
2325 raise ReadError(str(e))
2326 except EmptyHeaderError:
2327 if self.offset == 0:
2328 raise ReadError("empty file")
2329 except TruncatedHeaderError as e:
2330 if self.offset == 0:
2331 raise ReadError(str(e))
2332 except SubsequentHeaderError as e:
2333 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002334 break
2335
Lars Gustäbel9520a432009-11-22 18:48:49 +00002336 if tarinfo is not None:
2337 self.members.append(tarinfo)
2338 else:
2339 self._loaded = True
2340
Thomas Wouters477c8d52006-05-27 19:21:47 +00002341 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002342
2343 #--------------------------------------------------------------------------
2344 # Little helper methods:
2345
Lars Gustäbel1b512722010-06-03 12:45:16 +00002346 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002347 """Find an archive member by name from bottom to top.
2348 If tarinfo is given, it is used as the starting point.
2349 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002350 # Ensure that all members have been loaded.
2351 members = self.getmembers()
2352
Lars Gustäbel1b512722010-06-03 12:45:16 +00002353 # Limit the member search list up to tarinfo.
2354 if tarinfo is not None:
2355 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002356
Lars Gustäbel1b512722010-06-03 12:45:16 +00002357 if normalize:
2358 name = os.path.normpath(name)
2359
2360 for member in reversed(members):
2361 if normalize:
2362 member_name = os.path.normpath(member.name)
2363 else:
2364 member_name = member.name
2365
2366 if name == member_name:
2367 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002368
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002369 def _load(self):
2370 """Read through the entire archive file and look for readable
2371 members.
2372 """
2373 while True:
2374 tarinfo = self.next()
2375 if tarinfo is None:
2376 break
2377 self._loaded = True
2378
2379 def _check(self, mode=None):
2380 """Check if TarFile is still open, and if the operation's mode
2381 corresponds to TarFile's mode.
2382 """
2383 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002384 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002385 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002386 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002387
Lars Gustäbel1b512722010-06-03 12:45:16 +00002388 def _find_link_target(self, tarinfo):
2389 """Find the target member of a symlink or hardlink member in the
2390 archive.
2391 """
2392 if tarinfo.issym():
2393 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002394 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002395 limit = None
2396 else:
2397 # Search the archive before the link, because a hard link is
2398 # just a reference to an already archived file.
2399 linkname = tarinfo.linkname
2400 limit = tarinfo
2401
2402 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2403 if member is None:
2404 raise KeyError("linkname %r not found" % linkname)
2405 return member
2406
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002407 def __iter__(self):
2408 """Provide an iterator object.
2409 """
2410 if self._loaded:
Serhiy Storchakaa2549212015-12-19 09:43:14 +02002411 yield from self.members
2412 return
2413
2414 # Yield items using TarFile's next() method.
2415 # When all members have been read, set TarFile as _loaded.
2416 index = 0
2417 # Fix for SF #1100429: Under rare circumstances it can
2418 # happen that getmembers() is called during iteration,
2419 # which will have already exhausted the next() method.
2420 if self.firstmember is not None:
2421 tarinfo = self.next()
2422 index += 1
2423 yield tarinfo
2424
2425 while True:
2426 if index < len(self.members):
2427 tarinfo = self.members[index]
2428 elif not self._loaded:
2429 tarinfo = self.next()
2430 if not tarinfo:
2431 self._loaded = True
2432 return
2433 else:
2434 return
2435 index += 1
2436 yield tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002437
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002438 def _dbg(self, level, msg):
2439 """Write debugging output to sys.stderr.
2440 """
2441 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002442 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002443
2444 def __enter__(self):
2445 self._check()
2446 return self
2447
2448 def __exit__(self, type, value, traceback):
2449 if type is None:
2450 self.close()
2451 else:
2452 # An exception occurred. We must not call close() because
2453 # it would try to write end-of-archive blocks and padding.
2454 if not self._extfileobj:
2455 self.fileobj.close()
2456 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002457
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002458#--------------------
2459# exported functions
2460#--------------------
2461def is_tarfile(name):
2462 """Return True if name points to a tar archive that we
2463 are able to handle, else return False.
2464 """
2465 try:
2466 t = open(name)
2467 t.close()
2468 return True
2469 except TarError:
2470 return False
2471
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002472open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002473
2474
2475def main():
2476 import argparse
2477
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002478 description = 'A simple command-line interface for tarfile module.'
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002479 parser = argparse.ArgumentParser(description=description)
2480 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2481 help='Verbose output')
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002482 group = parser.add_mutually_exclusive_group(required=True)
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002483 group.add_argument('-l', '--list', metavar='<tarfile>',
2484 help='Show listing of a tarfile')
2485 group.add_argument('-e', '--extract', nargs='+',
2486 metavar=('<tarfile>', '<output_dir>'),
2487 help='Extract tarfile into target dir')
2488 group.add_argument('-c', '--create', nargs='+',
2489 metavar=('<name>', '<file>'),
2490 help='Create tarfile from sources')
2491 group.add_argument('-t', '--test', metavar='<tarfile>',
2492 help='Test if a tarfile is valid')
2493 args = parser.parse_args()
2494
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002495 if args.test is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002496 src = args.test
2497 if is_tarfile(src):
2498 with open(src, 'r') as tar:
2499 tar.getmembers()
2500 print(tar.getmembers(), file=sys.stderr)
2501 if args.verbose:
2502 print('{!r} is a tar archive.'.format(src))
2503 else:
2504 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2505
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002506 elif args.list is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002507 src = args.list
2508 if is_tarfile(src):
2509 with TarFile.open(src, 'r:*') as tf:
2510 tf.list(verbose=args.verbose)
2511 else:
2512 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2513
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002514 elif args.extract is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002515 if len(args.extract) == 1:
2516 src = args.extract[0]
2517 curdir = os.curdir
2518 elif len(args.extract) == 2:
2519 src, curdir = args.extract
2520 else:
2521 parser.exit(1, parser.format_help())
2522
2523 if is_tarfile(src):
2524 with TarFile.open(src, 'r:*') as tf:
2525 tf.extractall(path=curdir)
2526 if args.verbose:
2527 if curdir == '.':
2528 msg = '{!r} file is extracted.'.format(src)
2529 else:
2530 msg = ('{!r} file is extracted '
2531 'into {!r} directory.').format(src, curdir)
2532 print(msg)
2533 else:
2534 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2535
Serhiy Storchaka150cd192017-04-07 18:56:12 +03002536 elif args.create is not None:
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002537 tar_name = args.create.pop(0)
2538 _, ext = os.path.splitext(tar_name)
2539 compressions = {
2540 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002541 '.gz': 'gz',
2542 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002543 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002544 '.xz': 'xz',
2545 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002546 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002547 '.bz2': 'bz2',
2548 '.tbz': 'bz2',
2549 '.tbz2': 'bz2',
2550 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002551 }
2552 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2553 tar_files = args.create
2554
2555 with TarFile.open(tar_name, tar_mode) as tf:
2556 for file_name in tar_files:
2557 tf.add(file_name)
2558
2559 if args.verbose:
2560 print('{!r} file created.'.format(tar_name))
2561
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002562if __name__ == '__main__':
2563 main()