blob: ca45126ff1318935c2d19a1641a479cbf46ea102 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020041from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000042import sys
43import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020044import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import shutil
46import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000047import time
48import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000049import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000050import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000051
52try:
53 import grp, pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040054except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055 grp = pwd = None
56
Brian Curtin16633fa2010-07-09 13:54:27 +000057# os.symlink on Windows prior to 6.0 raises NotImplementedError
58symlink_exception = (AttributeError, NotImplementedError)
59try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020060 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000061 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000063except NameError:
64 pass
65
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000066# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000072NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000073BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000074RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000075GNU_MAGIC = b"ustar \0" # magic gnu tar string
76POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077
Guido van Rossumd8faa362007-04-27 19:54:29 +000078LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000081
Lars Gustäbelb506dc32007-08-07 18:36:16 +000082REGTYPE = b"0" # regular file
83AREGTYPE = b"\0" # regular file
84LNKTYPE = b"1" # link (inside tarfile)
85SYMTYPE = b"2" # symbolic link
86CHRTYPE = b"3" # character special device
87BLKTYPE = b"4" # block special device
88DIRTYPE = b"5" # directory
89FIFOTYPE = b"6" # fifo special device
90CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000091
Lars Gustäbelb506dc32007-08-07 18:36:16 +000092GNUTYPE_LONGNAME = b"L" # GNU tar longname
93GNUTYPE_LONGLINK = b"K" # GNU tar longlink
94GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000095
Lars Gustäbelb506dc32007-08-07 18:36:16 +000096XHDTYPE = b"x" # POSIX.1-2001 extended header
97XGLTYPE = b"g" # POSIX.1-2001 global header
98SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000099
100USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
101GNU_FORMAT = 1 # GNU tar format
102PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
103DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000104
105#---------------------------------------------------------
106# tarfile constants
107#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000108# File types that tarfile supports:
109SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
110 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000111 CONTTYPE, CHRTYPE, BLKTYPE,
112 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
113 GNUTYPE_SPARSE)
114
Guido van Rossumd8faa362007-04-27 19:54:29 +0000115# File types that will be treated as a regular file.
116REGULAR_TYPES = (REGTYPE, AREGTYPE,
117 CONTTYPE, GNUTYPE_SPARSE)
118
119# File types that are part of the GNU tar format.
120GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
121 GNUTYPE_SPARSE)
122
123# Fields from a pax header that override a TarInfo attribute.
124PAX_FIELDS = ("path", "linkpath", "size", "mtime",
125 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000126
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000127# Fields from a pax header that are affected by hdrcharset.
128PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
129
Guido van Rossume7ba4952007-06-06 23:52:48 +0000130# Fields in a pax header that are numbers, all other fields
131# are treated as strings.
132PAX_NUMBER_FIELDS = {
133 "atime": float,
134 "ctime": float,
135 "mtime": float,
136 "uid": int,
137 "gid": int,
138 "size": int
139}
140
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000142# initialization
143#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000144if os.name in ("nt", "ce"):
145 ENCODING = "utf-8"
146else:
147 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000148
149#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000150# Some useful functions
151#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000152
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000153def stn(s, length, encoding, errors):
154 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000156 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000157 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000158
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000159def nts(s, encoding, errors):
160 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000161 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000162 p = s.find(b"\0")
163 if p != -1:
164 s = s[:p]
165 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166
Thomas Wouters477c8d52006-05-27 19:21:47 +0000167def nti(s):
168 """Convert a number field to a python number.
169 """
170 # There are two possible encodings for a number field, see
171 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200172 if s[0] in (0o200, 0o377):
173 n = 0
174 for i in range(len(s) - 1):
175 n <<= 8
176 n += s[i + 1]
177 if s[0] == 0o377:
178 n = -(256 ** (len(s) - 1) - n)
179 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000180 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200181 s = nts(s, "ascii", "strict")
182 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000183 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000184 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000185 return n
186
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000188 """Convert a python number to a number field.
189 """
190 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
191 # octal digits followed by a null-byte, this allows values up to
192 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200193 # that if necessary. A leading 0o200 or 0o377 byte indicate this
194 # particular encoding, the following digits-1 bytes are a big-endian
195 # base-256 representation. This allows values up to (256**(digits-1))-1.
196 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
197 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000198 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800199 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200200 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
201 if n >= 0:
202 s = bytearray([0o200])
203 else:
204 s = bytearray([0o377])
205 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206
Guido van Rossum805365e2007-05-07 22:24:25 +0000207 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200208 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200210 else:
211 raise ValueError("overflow in number field")
212
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 return s
214
215def calc_chksums(buf):
216 """Calculate the checksum for a member's header by summing up all
217 characters except for the chksum field which is treated as if
218 it was filled with spaces. According to the GNU tar sources,
219 some tars (Sun and NeXT) calculate chksum with signed char,
220 which will be different if there are chars in the buffer with
221 the high bit set. So we calculate two checksums, unsigned and
222 signed.
223 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200224 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
225 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000226 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227
Lars Gustäbel03572682015-07-06 09:27:24 +0200228def copyfileobj(src, dst, length=None, exception=OSError):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000229 """Copy length bytes from fileobj src to fileobj dst.
230 If length is None, copy the entire content.
231 """
232 if length == 0:
233 return
234 if length is None:
235 shutil.copyfileobj(src, dst)
236 return
237
238 BUFSIZE = 16 * 1024
239 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000240 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000241 buf = src.read(BUFSIZE)
242 if len(buf) < BUFSIZE:
Lars Gustäbel03572682015-07-06 09:27:24 +0200243 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244 dst.write(buf)
245
246 if remainder != 0:
247 buf = src.read(remainder)
248 if len(buf) < remainder:
Lars Gustäbel03572682015-07-06 09:27:24 +0200249 raise exception("unexpected end of data")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251 return
252
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000253def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200254 """Deprecated in this location; use stat.filemode."""
255 import warnings
256 warnings.warn("deprecated in favor of stat.filemode",
257 DeprecationWarning, 2)
258 return stat.filemode(mode)
259
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200260def _safe_print(s):
261 encoding = getattr(sys.stdout, 'encoding', None)
262 if encoding is not None:
263 s = s.encode(encoding, 'backslashreplace').decode(encoding)
264 print(s, end=' ')
265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000267class TarError(Exception):
268 """Base exception."""
269 pass
270class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300274 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000275 pass
276class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000282class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000283 """Base exception for header errors."""
284 pass
285class EmptyHeaderError(HeaderError):
286 """Exception for empty headers."""
287 pass
288class TruncatedHeaderError(HeaderError):
289 """Exception for truncated headers."""
290 pass
291class EOFHeaderError(HeaderError):
292 """Exception for end of file headers."""
293 pass
294class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000295 """Exception for invalid headers."""
296 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000297class SubsequentHeaderError(HeaderError):
298 """Exception for missing and invalid extended headers."""
299 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000300
301#---------------------------
302# internal stream interface
303#---------------------------
304class _LowLevelFile:
305 """Low-level file object. Supports reading and writing.
306 It is used instead of a regular file object for streaming
307 access.
308 """
309
310 def __init__(self, name, mode):
311 mode = {
312 "r": os.O_RDONLY,
313 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
314 }[mode]
315 if hasattr(os, "O_BINARY"):
316 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000317 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319 def close(self):
320 os.close(self.fd)
321
322 def read(self, size):
323 return os.read(self.fd, size)
324
325 def write(self, s):
326 os.write(self.fd, s)
327
328class _Stream:
329 """Class that serves as an adapter between TarFile and
330 a stream-like object. The stream-like object only
331 needs to have a read() or write() method and is accessed
332 blockwise. Use of gzip or bzip2 compression is possible.
333 A stream-like object could be for example: sys.stdin,
334 sys.stdout, a socket, a tape device etc.
335
336 _Stream is intended to be used only internally.
337 """
338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000340 """Construct a _Stream object.
341 """
342 self._extfileobj = True
343 if fileobj is None:
344 fileobj = _LowLevelFile(name, mode)
345 self._extfileobj = False
346
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000347 if comptype == '*':
348 # Enable transparent compression detection for the
349 # stream interface
350 fileobj = _StreamProxy(fileobj)
351 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000353 self.name = name or ""
354 self.mode = mode
355 self.comptype = comptype
356 self.fileobj = fileobj
357 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000358 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000359 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 self.closed = False
361
Antoine Pitrou605c2932010-09-23 20:15:14 +0000362 try:
363 if comptype == "gz":
364 try:
365 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400366 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000367 raise CompressionError("zlib module is not available")
368 self.zlib = zlib
369 self.crc = zlib.crc32(b"")
370 if mode == "r":
371 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100372 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000373 else:
374 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000375
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100376 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000377 try:
378 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400379 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 raise CompressionError("bz2 module is not available")
381 if mode == "r":
382 self.dbuf = b""
383 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200384 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000385 else:
386 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100387
388 elif comptype == "xz":
389 try:
390 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400391 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100392 raise CompressionError("lzma module is not available")
393 if mode == "r":
394 self.dbuf = b""
395 self.cmp = lzma.LZMADecompressor()
396 self.exception = lzma.LZMAError
397 else:
398 self.cmp = lzma.LZMACompressor()
399
400 elif comptype != "tar":
401 raise CompressionError("unknown compression type %r" % comptype)
402
Antoine Pitrou605c2932010-09-23 20:15:14 +0000403 except:
404 if not self._extfileobj:
405 self.fileobj.close()
406 self.closed = True
407 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000408
409 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411 self.close()
412
413 def _init_write_gz(self):
414 """Initialize for writing with gzip compression.
415 """
416 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417 -self.zlib.MAX_WBITS,
418 self.zlib.DEF_MEM_LEVEL,
419 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000420 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000421 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000422 if self.name.endswith(".gz"):
423 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000424 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
425 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 def write(self, s):
428 """Write string s to the stream.
429 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000430 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000431 self.crc = self.zlib.crc32(s, self.crc)
432 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000433 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000434 s = self.cmp.compress(s)
435 self.__write(s)
436
437 def __write(self, s):
438 """Write string s to the stream if a whole new block
439 is ready to be written.
440 """
441 self.buf += s
442 while len(self.buf) > self.bufsize:
443 self.fileobj.write(self.buf[:self.bufsize])
444 self.buf = self.buf[self.bufsize:]
445
446 def close(self):
447 """Close the _Stream object. No operation should be
448 done on it afterwards.
449 """
450 if self.closed:
451 return
452
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000453 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300454 try:
455 if self.mode == "w" and self.comptype != "tar":
456 self.buf += self.cmp.flush()
457
458 if self.mode == "w" and self.buf:
459 self.fileobj.write(self.buf)
460 self.buf = b""
461 if self.comptype == "gz":
462 # The native zlib crc is an unsigned 32-bit integer, but
463 # the Python wrapper implicitly casts that to a signed C
464 # long. So, on a 32-bit box self.crc may "look negative",
465 # while the same crc on a 64-bit box may "look positive".
466 # To avoid irksome warnings from the `struct` module, force
467 # it to look positive on all boxes.
468 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
469 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470 finally:
471 if not self._extfileobj:
472 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473
474 def _init_read_gz(self):
475 """Initialize for reading a gzip compressed fileobj.
476 """
477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000478 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479
480 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000481 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000482 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000483 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000484 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485
486 flag = ord(self.__read(1))
487 self.__read(6)
488
489 if flag & 4:
490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491 self.read(xlen)
492 if flag & 8:
493 while True:
494 s = self.__read(1)
495 if not s or s == NUL:
496 break
497 if flag & 16:
498 while True:
499 s = self.__read(1)
500 if not s or s == NUL:
501 break
502 if flag & 2:
503 self.__read(2)
504
505 def tell(self):
506 """Return the stream's file pointer position.
507 """
508 return self.pos
509
510 def seek(self, pos=0):
511 """Set the stream's file pointer to pos. Negative seeking
512 is forbidden.
513 """
514 if pos - self.pos >= 0:
515 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000516 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517 self.read(self.bufsize)
518 self.read(remainder)
519 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000520 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000521 return self.pos
522
523 def read(self, size=None):
524 """Return the next size number of bytes from the stream.
525 If size is not defined, return all bytes of the stream
526 up to EOF.
527 """
528 if size is None:
529 t = []
530 while True:
531 buf = self._read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
535 buf = "".join(t)
536 else:
537 buf = self._read(size)
538 self.pos += len(buf)
539 return buf
540
541 def _read(self, size):
542 """Return size bytes from the stream.
543 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 return self.__read(size)
546
547 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000548 while c < size:
549 buf = self.__read(self.bufsize)
550 if not buf:
551 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552 try:
553 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100554 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000556 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000557 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000558 buf = self.dbuf[:size]
559 self.dbuf = self.dbuf[size:]
560 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561
562 def __read(self, size):
563 """Return size bytes from stream. If internal buffer is empty,
564 read another block from the stream.
565 """
566 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000567 while c < size:
568 buf = self.fileobj.read(self.bufsize)
569 if not buf:
570 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000572 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000573 buf = self.buf[:size]
574 self.buf = self.buf[size:]
575 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000576# class _Stream
577
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000578class _StreamProxy(object):
579 """Small proxy class that enables transparent compression
580 detection for the Stream interface (mode 'r|*').
581 """
582
583 def __init__(self, fileobj):
584 self.fileobj = fileobj
585 self.buf = self.fileobj.read(BLOCKSIZE)
586
587 def read(self, size):
588 self.read = self.fileobj.read
589 return self.buf
590
591 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100592 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000593 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100594 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000595 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100596 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
597 return "xz"
598 else:
599 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000600
601 def close(self):
602 self.fileobj.close()
603# class StreamProxy
604
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000605#------------------------
606# Extraction file object
607#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000608class _FileInFile(object):
609 """A thin wrapper around an existing file object that
610 provides a part of its data as an individual file
611 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000612 """
613
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000614 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000615 self.fileobj = fileobj
616 self.offset = offset
617 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000618 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200619 self.name = getattr(fileobj, "name", None)
620 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000621
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000622 if blockinfo is None:
623 blockinfo = [(0, size)]
624
625 # Construct a map with data and zero blocks.
626 self.map_index = 0
627 self.map = []
628 lastpos = 0
629 realpos = self.offset
630 for offset, size in blockinfo:
631 if offset > lastpos:
632 self.map.append((False, lastpos, offset, None))
633 self.map.append((True, offset, offset + size, realpos))
634 realpos += size
635 lastpos = offset + size
636 if lastpos < self.size:
637 self.map.append((False, lastpos, self.size, None))
638
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200639 def flush(self):
640 pass
641
642 def readable(self):
643 return True
644
645 def writable(self):
646 return False
647
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000648 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000649 return self.fileobj.seekable()
650
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000651 def tell(self):
652 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000653 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000654 return self.position
655
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200656 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000657 """Seek to a position in the file.
658 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200659 if whence == io.SEEK_SET:
660 self.position = min(max(position, 0), self.size)
661 elif whence == io.SEEK_CUR:
662 if position < 0:
663 self.position = max(self.position + position, 0)
664 else:
665 self.position = min(self.position + position, self.size)
666 elif whence == io.SEEK_END:
667 self.position = max(min(self.size + position, self.size), 0)
668 else:
669 raise ValueError("Invalid argument")
670 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000671
672 def read(self, size=None):
673 """Read data from the file.
674 """
675 if size is None:
676 size = self.size - self.position
677 else:
678 size = min(size, self.size - self.position)
679
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000680 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000681 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000682 while True:
683 data, start, stop, offset = self.map[self.map_index]
684 if start <= self.position < stop:
685 break
686 else:
687 self.map_index += 1
688 if self.map_index == len(self.map):
689 self.map_index = 0
690 length = min(size, stop - self.position)
691 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000692 self.fileobj.seek(offset + (self.position - start))
Lars Gustäbel03572682015-07-06 09:27:24 +0200693 b = self.fileobj.read(length)
694 if len(b) != length:
695 raise ReadError("unexpected end of data")
696 buf += b
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000697 else:
698 buf += NUL * length
699 size -= length
700 self.position += length
701 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000702
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200703 def readinto(self, b):
704 buf = self.read(len(b))
705 b[:len(buf)] = buf
706 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000707
708 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000709 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200710#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000711
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200712class ExFileObject(io.BufferedReader):
713
714 def __init__(self, tarfile, tarinfo):
715 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
716 tarinfo.size, tarinfo.sparse)
717 super().__init__(fileobj)
718#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000719
720#------------------
721# Exported Classes
722#------------------
723class TarInfo(object):
724 """Informational class which holds the details about an
725 archive member given by a tar header block.
726 TarInfo objects are returned by TarFile.getmember(),
727 TarFile.getmembers() and TarFile.gettarinfo() and are
728 usually created internally.
729 """
730
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000731 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
732 "chksum", "type", "linkname", "uname", "gname",
733 "devmajor", "devminor",
734 "offset", "offset_data", "pax_headers", "sparse",
735 "tarfile", "_sparse_structs", "_link_target")
736
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000737 def __init__(self, name=""):
738 """Construct a TarInfo object. name is the optional name
739 of the member.
740 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000741 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000742 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000743 self.uid = 0 # user id
744 self.gid = 0 # group id
745 self.size = 0 # file size
746 self.mtime = 0 # modification time
747 self.chksum = 0 # header checksum
748 self.type = REGTYPE # member type
749 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000750 self.uname = "" # user name
751 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000752 self.devmajor = 0 # device major number
753 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000754
Thomas Wouters477c8d52006-05-27 19:21:47 +0000755 self.offset = 0 # the tar header starts here
756 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000757
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000758 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000759 self.pax_headers = {} # pax header information
760
761 # In pax headers the "name" and "linkname" field are called
762 # "path" and "linkpath".
763 def _getpath(self):
764 return self.name
765 def _setpath(self, name):
766 self.name = name
767 path = property(_getpath, _setpath)
768
769 def _getlinkpath(self):
770 return self.linkname
771 def _setlinkpath(self, linkname):
772 self.linkname = linkname
773 linkpath = property(_getlinkpath, _setlinkpath)
774
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000775 def __repr__(self):
776 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
777
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000778 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779 """Return the TarInfo's attributes as a dictionary.
780 """
781 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000782 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000783 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000784 "uid": self.uid,
785 "gid": self.gid,
786 "size": self.size,
787 "mtime": self.mtime,
788 "chksum": self.chksum,
789 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000790 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000791 "uname": self.uname,
792 "gname": self.gname,
793 "devmajor": self.devmajor,
794 "devminor": self.devminor
795 }
796
797 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
798 info["name"] += "/"
799
800 return info
801
Victor Stinnerde629d42010-05-05 21:43:57 +0000802 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000803 """Return a tar header as a string of 512 byte blocks.
804 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000805 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000806
Guido van Rossumd8faa362007-04-27 19:54:29 +0000807 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000808 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000809 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000810 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000811 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000812 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000813 else:
814 raise ValueError("invalid format")
815
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000816 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000817 """Return the object as a ustar header block.
818 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819 info["magic"] = POSIX_MAGIC
820
821 if len(info["linkname"]) > LENGTH_LINK:
822 raise ValueError("linkname is too long")
823
824 if len(info["name"]) > LENGTH_NAME:
825 info["prefix"], info["name"] = self._posix_split_name(info["name"])
826
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000827 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000829 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830 """Return the object as a GNU header block sequence.
831 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 info["magic"] = GNU_MAGIC
833
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000834 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000836 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
838 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000839 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000840
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000841 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000842
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000843 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000844 """Return the object as a ustar header block. If it cannot be
845 represented this way, prepend a pax extended header sequence
846 with supplement information.
847 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000848 info["magic"] = POSIX_MAGIC
849 pax_headers = self.pax_headers.copy()
850
851 # Test string fields for values that exceed the field length or cannot
852 # be represented in ASCII encoding.
853 for name, hname, length in (
854 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
855 ("uname", "uname", 32), ("gname", "gname", 32)):
856
Guido van Rossume7ba4952007-06-06 23:52:48 +0000857 if hname in pax_headers:
858 # The pax header has priority.
859 continue
860
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 # Try to encode the string as ASCII.
862 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000863 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000864 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000865 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000866 continue
867
Guido van Rossume7ba4952007-06-06 23:52:48 +0000868 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000869 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870
871 # Test number fields for values that exceed the field limit or values
872 # that like to be stored as float.
873 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000874 if name in pax_headers:
875 # The pax header has priority. Avoid overflow.
876 info[name] = 0
877 continue
878
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 val = info[name]
880 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000881 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 info[name] = 0
883
Guido van Rossume7ba4952007-06-06 23:52:48 +0000884 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000886 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000887 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000888 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000890 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891
892 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000893 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894 """Return the object as a pax global header block sequence.
895 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000896 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000897
898 def _posix_split_name(self, name):
899 """Split a name longer than 100 chars into a prefix
900 and a name part.
901 """
902 prefix = name[:LENGTH_PREFIX + 1]
903 while prefix and prefix[-1] != "/":
904 prefix = prefix[:-1]
905
906 name = name[len(prefix):]
907 prefix = prefix[:-1]
908
909 if not prefix or len(name) > LENGTH_NAME:
910 raise ValueError("name is too long")
911 return prefix, name
912
913 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000914 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000915 """Return a header block. info is a dictionary with file
916 information, format must be one of the *_FORMAT constants.
917 """
918 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000919 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000920 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921 itn(info.get("uid", 0), 8, format),
922 itn(info.get("gid", 0), 8, format),
923 itn(info.get("size", 0), 12, format),
924 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000925 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000927 stn(info.get("linkname", ""), 100, encoding, errors),
928 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000929 stn(info.get("uname", ""), 32, encoding, errors),
930 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 itn(info.get("devmajor", 0), 8, format),
932 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000933 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934 ]
935
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000936 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000937 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000938 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000939 return buf
940
941 @staticmethod
942 def _create_payload(payload):
943 """Return the string payload filled with zero bytes
944 up to the next 512 byte border.
945 """
946 blocks, remainder = divmod(len(payload), BLOCKSIZE)
947 if remainder > 0:
948 payload += (BLOCKSIZE - remainder) * NUL
949 return payload
950
951 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000952 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000953 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
954 for name.
955 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000956 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957
958 info = {}
959 info["name"] = "././@LongLink"
960 info["type"] = type
961 info["size"] = len(name)
962 info["magic"] = GNU_MAGIC
963
964 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000965 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000966 cls._create_payload(name)
967
968 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000969 def _create_pax_generic_header(cls, pax_headers, type, encoding):
970 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000971 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000972 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000973 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000974 # Check if one of the fields contains surrogate characters and thereby
975 # forces hdrcharset=BINARY, see _proc_pax() for more information.
976 binary = False
977 for keyword, value in pax_headers.items():
978 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000979 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000980 except UnicodeEncodeError:
981 binary = True
982 break
983
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000984 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000985 if binary:
986 # Put the hdrcharset field at the beginning of the header.
987 records += b"21 hdrcharset=BINARY\n"
988
Guido van Rossumd8faa362007-04-27 19:54:29 +0000989 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000990 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000991 if binary:
992 # Try to restore the original byte representation of `value'.
993 # Needless to say, that the encoding must match the string.
994 value = value.encode(encoding, "surrogateescape")
995 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000996 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000997
Guido van Rossumd8faa362007-04-27 19:54:29 +0000998 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
999 n = p = 0
1000 while True:
1001 n = l + len(str(p))
1002 if n == p:
1003 break
1004 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001005 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001006
1007 # We use a hardcoded "././@PaxHeader" name like star does
1008 # instead of the one that POSIX recommends.
1009 info = {}
1010 info["name"] = "././@PaxHeader"
1011 info["type"] = type
1012 info["size"] = len(records)
1013 info["magic"] = POSIX_MAGIC
1014
1015 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001016 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001017 cls._create_payload(records)
1018
Guido van Rossum75b64e62005-01-16 00:16:11 +00001019 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001020 def frombuf(cls, buf, encoding, errors):
1021 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001022 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001023 if len(buf) == 0:
1024 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001025 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001026 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001027 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001028 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001029
1030 chksum = nti(buf[148:156])
1031 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001032 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001033
Guido van Rossumd8faa362007-04-27 19:54:29 +00001034 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001035 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001036 obj.mode = nti(buf[100:108])
1037 obj.uid = nti(buf[108:116])
1038 obj.gid = nti(buf[116:124])
1039 obj.size = nti(buf[124:136])
1040 obj.mtime = nti(buf[136:148])
1041 obj.chksum = chksum
1042 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001043 obj.linkname = nts(buf[157:257], encoding, errors)
1044 obj.uname = nts(buf[265:297], encoding, errors)
1045 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001046 obj.devmajor = nti(buf[329:337])
1047 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001048 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001049
Guido van Rossumd8faa362007-04-27 19:54:29 +00001050 # Old V7 tar format represents a directory as a regular
1051 # file with a trailing slash.
1052 if obj.type == AREGTYPE and obj.name.endswith("/"):
1053 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001054
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001055 # The old GNU sparse format occupies some of the unused
1056 # space in the buffer for up to 4 sparse structures.
1057 # Save the them for later processing in _proc_sparse().
1058 if obj.type == GNUTYPE_SPARSE:
1059 pos = 386
1060 structs = []
1061 for i in range(4):
1062 try:
1063 offset = nti(buf[pos:pos + 12])
1064 numbytes = nti(buf[pos + 12:pos + 24])
1065 except ValueError:
1066 break
1067 structs.append((offset, numbytes))
1068 pos += 24
1069 isextended = bool(buf[482])
1070 origsize = nti(buf[483:495])
1071 obj._sparse_structs = (structs, isextended, origsize)
1072
Guido van Rossumd8faa362007-04-27 19:54:29 +00001073 # Remove redundant slashes from directories.
1074 if obj.isdir():
1075 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001076
Guido van Rossumd8faa362007-04-27 19:54:29 +00001077 # Reconstruct a ustar longname.
1078 if prefix and obj.type not in GNU_TYPES:
1079 obj.name = prefix + "/" + obj.name
1080 return obj
1081
1082 @classmethod
1083 def fromtarfile(cls, tarfile):
1084 """Return the next TarInfo object from TarFile object
1085 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001086 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001087 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001088 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001089 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1090 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001091
Guido van Rossumd8faa362007-04-27 19:54:29 +00001092 #--------------------------------------------------------------------------
1093 # The following are methods that are called depending on the type of a
1094 # member. The entry point is _proc_member() which can be overridden in a
1095 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1096 # implement the following
1097 # operations:
1098 # 1. Set self.offset_data to the position where the data blocks begin,
1099 # if there is data that follows.
1100 # 2. Set tarfile.offset to the position where the next member's header will
1101 # begin.
1102 # 3. Return self or another valid TarInfo object.
1103 def _proc_member(self, tarfile):
1104 """Choose the right processing method depending on
1105 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001106 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1108 return self._proc_gnulong(tarfile)
1109 elif self.type == GNUTYPE_SPARSE:
1110 return self._proc_sparse(tarfile)
1111 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1112 return self._proc_pax(tarfile)
1113 else:
1114 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001115
Guido van Rossumd8faa362007-04-27 19:54:29 +00001116 def _proc_builtin(self, tarfile):
1117 """Process a builtin type or an unknown type which
1118 will be treated as a regular file.
1119 """
1120 self.offset_data = tarfile.fileobj.tell()
1121 offset = self.offset_data
1122 if self.isreg() or self.type not in SUPPORTED_TYPES:
1123 # Skip the following data blocks.
1124 offset += self._block(self.size)
1125 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001126
Guido van Rossume7ba4952007-06-06 23:52:48 +00001127 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001128 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001129 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001130
1131 return self
1132
1133 def _proc_gnulong(self, tarfile):
1134 """Process the blocks that hold a GNU longname
1135 or longlink member.
1136 """
1137 buf = tarfile.fileobj.read(self._block(self.size))
1138
1139 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001140 try:
1141 next = self.fromtarfile(tarfile)
1142 except HeaderError:
1143 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001144
1145 # Patch the TarInfo object from the next header with
1146 # the longname information.
1147 next.offset = self.offset
1148 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001149 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001150 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001151 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001152
1153 return next
1154
1155 def _proc_sparse(self, tarfile):
1156 """Process a GNU sparse header plus extra headers.
1157 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001158 # We already collected some sparse structures in frombuf().
1159 structs, isextended, origsize = self._sparse_structs
1160 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001161
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001162 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001163 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001164 buf = tarfile.fileobj.read(BLOCKSIZE)
1165 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001166 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001167 try:
1168 offset = nti(buf[pos:pos + 12])
1169 numbytes = nti(buf[pos + 12:pos + 24])
1170 except ValueError:
1171 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001172 if offset and numbytes:
1173 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001174 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001175 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001176 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001177
1178 self.offset_data = tarfile.fileobj.tell()
1179 tarfile.offset = self.offset_data + self._block(self.size)
1180 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181 return self
1182
1183 def _proc_pax(self, tarfile):
1184 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001185 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001186 """
1187 # Read the header information.
1188 buf = tarfile.fileobj.read(self._block(self.size))
1189
1190 # A pax header stores supplemental information for either
1191 # the following file (extended) or all following files
1192 # (global).
1193 if self.type == XGLTYPE:
1194 pax_headers = tarfile.pax_headers
1195 else:
1196 pax_headers = tarfile.pax_headers.copy()
1197
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001198 # Check if the pax header contains a hdrcharset field. This tells us
1199 # the encoding of the path, linkpath, uname and gname fields. Normally,
1200 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1201 # implementations are allowed to store them as raw binary strings if
1202 # the translation to UTF-8 fails.
1203 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1204 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001205 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001206
1207 # For the time being, we don't care about anything other than "BINARY".
1208 # The only other value that is currently allowed by the standard is
1209 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1210 hdrcharset = pax_headers.get("hdrcharset")
1211 if hdrcharset == "BINARY":
1212 encoding = tarfile.encoding
1213 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001214 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001215
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 # Parse pax header information. A record looks like that:
1217 # "%d %s=%s\n" % (length, keyword, value). length is the size
1218 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001219 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001220 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001221 pos = 0
1222 while True:
1223 match = regex.match(buf, pos)
1224 if not match:
1225 break
1226
1227 length, keyword = match.groups()
1228 length = int(length)
1229 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1230
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001231 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001232 # as the error handler, but we better not take the risk. For
1233 # example, GNU tar <= 1.23 is known to store filenames it cannot
1234 # translate to UTF-8 as raw strings (unfortunately without a
1235 # hdrcharset=BINARY header).
1236 # We first try the strict standard encoding, and if that fails we
1237 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001238 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001239 tarfile.errors)
1240 if keyword in PAX_NAME_FIELDS:
1241 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1242 tarfile.errors)
1243 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001244 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001245 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001246
1247 pax_headers[keyword] = value
1248 pos += length
1249
Guido van Rossume7ba4952007-06-06 23:52:48 +00001250 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001251 try:
1252 next = self.fromtarfile(tarfile)
1253 except HeaderError:
1254 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001255
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001256 # Process GNU sparse information.
1257 if "GNU.sparse.map" in pax_headers:
1258 # GNU extended sparse format version 0.1.
1259 self._proc_gnusparse_01(next, pax_headers)
1260
1261 elif "GNU.sparse.size" in pax_headers:
1262 # GNU extended sparse format version 0.0.
1263 self._proc_gnusparse_00(next, pax_headers, buf)
1264
1265 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1266 # GNU extended sparse format version 1.0.
1267 self._proc_gnusparse_10(next, pax_headers, tarfile)
1268
Guido van Rossume7ba4952007-06-06 23:52:48 +00001269 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001270 # Patch the TarInfo object with the extended header info.
1271 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1272 next.offset = self.offset
1273
1274 if "size" in pax_headers:
1275 # If the extended header replaces the size field,
1276 # we need to recalculate the offset where the next
1277 # header starts.
1278 offset = next.offset_data
1279 if next.isreg() or next.type not in SUPPORTED_TYPES:
1280 offset += next._block(next.size)
1281 tarfile.offset = offset
1282
1283 return next
1284
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001285 def _proc_gnusparse_00(self, next, pax_headers, buf):
1286 """Process a GNU tar extended sparse header, version 0.0.
1287 """
1288 offsets = []
1289 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1290 offsets.append(int(match.group(1)))
1291 numbytes = []
1292 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1293 numbytes.append(int(match.group(1)))
1294 next.sparse = list(zip(offsets, numbytes))
1295
1296 def _proc_gnusparse_01(self, next, pax_headers):
1297 """Process a GNU tar extended sparse header, version 0.1.
1298 """
1299 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1300 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1301
1302 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1303 """Process a GNU tar extended sparse header, version 1.0.
1304 """
1305 fields = None
1306 sparse = []
1307 buf = tarfile.fileobj.read(BLOCKSIZE)
1308 fields, buf = buf.split(b"\n", 1)
1309 fields = int(fields)
1310 while len(sparse) < fields * 2:
1311 if b"\n" not in buf:
1312 buf += tarfile.fileobj.read(BLOCKSIZE)
1313 number, buf = buf.split(b"\n", 1)
1314 sparse.append(int(number))
1315 next.offset_data = tarfile.fileobj.tell()
1316 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1317
Guido van Rossume7ba4952007-06-06 23:52:48 +00001318 def _apply_pax_info(self, pax_headers, encoding, errors):
1319 """Replace fields with supplemental information from a previous
1320 pax extended or global header.
1321 """
1322 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001323 if keyword == "GNU.sparse.name":
1324 setattr(self, "path", value)
1325 elif keyword == "GNU.sparse.size":
1326 setattr(self, "size", int(value))
1327 elif keyword == "GNU.sparse.realsize":
1328 setattr(self, "size", int(value))
1329 elif keyword in PAX_FIELDS:
1330 if keyword in PAX_NUMBER_FIELDS:
1331 try:
1332 value = PAX_NUMBER_FIELDS[keyword](value)
1333 except ValueError:
1334 value = 0
1335 if keyword == "path":
1336 value = value.rstrip("/")
1337 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001338
1339 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001340
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001341 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1342 """Decode a single field from a pax record.
1343 """
1344 try:
1345 return value.decode(encoding, "strict")
1346 except UnicodeDecodeError:
1347 return value.decode(fallback_encoding, fallback_errors)
1348
Guido van Rossumd8faa362007-04-27 19:54:29 +00001349 def _block(self, count):
1350 """Round up a byte count by BLOCKSIZE and return it,
1351 e.g. _block(834) => 1024.
1352 """
1353 blocks, remainder = divmod(count, BLOCKSIZE)
1354 if remainder:
1355 blocks += 1
1356 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001357
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001358 def isreg(self):
1359 return self.type in REGULAR_TYPES
1360 def isfile(self):
1361 return self.isreg()
1362 def isdir(self):
1363 return self.type == DIRTYPE
1364 def issym(self):
1365 return self.type == SYMTYPE
1366 def islnk(self):
1367 return self.type == LNKTYPE
1368 def ischr(self):
1369 return self.type == CHRTYPE
1370 def isblk(self):
1371 return self.type == BLKTYPE
1372 def isfifo(self):
1373 return self.type == FIFOTYPE
1374 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001375 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001376 def isdev(self):
1377 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1378# class TarInfo
1379
1380class TarFile(object):
1381 """The TarFile Class provides an interface to tar archives.
1382 """
1383
1384 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1385
1386 dereference = False # If true, add content of linked file to the
1387 # tar file, else the link.
1388
1389 ignore_zeros = False # If true, skips empty or invalid blocks and
1390 # continues processing.
1391
Lars Gustäbel365aff32009-12-13 11:42:29 +00001392 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001393 # messages (if debug >= 0). If > 0, errors
1394 # are passed to the caller as exceptions.
1395
Guido van Rossumd8faa362007-04-27 19:54:29 +00001396 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001397
Guido van Rossume7ba4952007-06-06 23:52:48 +00001398 encoding = ENCODING # Encoding for 8-bit character strings.
1399
1400 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001401
Guido van Rossumd8faa362007-04-27 19:54:29 +00001402 tarinfo = TarInfo # The default TarInfo class to use.
1403
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001404 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001405
1406 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1407 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001408 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001409 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1410 read from an existing archive, 'a' to append data to an existing
1411 file or 'w' to create a new file overwriting an existing one. `mode'
1412 defaults to 'r'.
1413 If `fileobj' is given, it is used for reading or writing data. If it
1414 can be determined, `mode' is overridden by `fileobj's mode.
1415 `fileobj' is not closed, when TarFile is closed.
1416 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001417 modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001418 if mode not in modes:
Berker Peksag0fe63252015-02-13 21:02:12 +02001419 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001420 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001421 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001422
1423 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001424 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001425 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001426 self.mode = "w"
1427 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001428 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001429 self._extfileobj = False
1430 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001431 if (name is None and hasattr(fileobj, "name") and
1432 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001433 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001436 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001437 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001438 self.fileobj = fileobj
1439
Guido van Rossumd8faa362007-04-27 19:54:29 +00001440 # Init attributes.
1441 if format is not None:
1442 self.format = format
1443 if tarinfo is not None:
1444 self.tarinfo = tarinfo
1445 if dereference is not None:
1446 self.dereference = dereference
1447 if ignore_zeros is not None:
1448 self.ignore_zeros = ignore_zeros
1449 if encoding is not None:
1450 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001451 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001452
1453 if pax_headers is not None and self.format == PAX_FORMAT:
1454 self.pax_headers = pax_headers
1455 else:
1456 self.pax_headers = {}
1457
Guido van Rossumd8faa362007-04-27 19:54:29 +00001458 if debug is not None:
1459 self.debug = debug
1460 if errorlevel is not None:
1461 self.errorlevel = errorlevel
1462
1463 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001464 self.closed = False
1465 self.members = [] # list of members as TarInfo objects
1466 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001467 self.offset = self.fileobj.tell()
1468 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001469 self.inodes = {} # dictionary caching the inodes of
1470 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001471
Lars Gustäbel7b465392009-11-18 20:29:25 +00001472 try:
1473 if self.mode == "r":
1474 self.firstmember = None
1475 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001476
Lars Gustäbel7b465392009-11-18 20:29:25 +00001477 if self.mode == "a":
1478 # Move to the end of the archive,
1479 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001480 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001481 self.fileobj.seek(self.offset)
1482 try:
1483 tarinfo = self.tarinfo.fromtarfile(self)
1484 self.members.append(tarinfo)
1485 except EOFHeaderError:
1486 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001487 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001488 except HeaderError as e:
1489 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001490
Lars Gustäbel20703c62015-05-27 12:53:44 +02001491 if self.mode in ("a", "w", "x"):
Lars Gustäbel7b465392009-11-18 20:29:25 +00001492 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001493
Lars Gustäbel7b465392009-11-18 20:29:25 +00001494 if self.pax_headers:
1495 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1496 self.fileobj.write(buf)
1497 self.offset += len(buf)
1498 except:
1499 if not self._extfileobj:
1500 self.fileobj.close()
1501 self.closed = True
1502 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001503
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001504 #--------------------------------------------------------------------------
1505 # Below are the classmethods which act as alternate constructors to the
1506 # TarFile class. The open() method is the only one that is needed for
1507 # public use; it is the "super"-constructor and is able to select an
1508 # adequate "sub"-constructor for a particular compression using the mapping
1509 # from OPEN_METH.
1510 #
1511 # This concept allows one to subclass TarFile without losing the comfort of
1512 # the super-constructor. A sub-constructor is registered and made available
1513 # by adding it to the mapping in OPEN_METH.
1514
Guido van Rossum75b64e62005-01-16 00:16:11 +00001515 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001516 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 """Open a tar archive for reading, writing or appending. Return
1518 an appropriate TarFile class.
1519
1520 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001521 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522 'r:' open for reading exclusively uncompressed
1523 'r:gz' open for reading with gzip compression
1524 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001525 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001526 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001527 'w' or 'w:' open for writing without compression
1528 'w:gz' open for writing with gzip compression
1529 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001530 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001531
Berker Peksag0fe63252015-02-13 21:02:12 +02001532 'x' or 'x:' create a tarfile exclusively without compression, raise
1533 an exception if the file is already created
1534 'x:gz' create an gzip compressed tarfile, raise an exception
1535 if the file is already created
1536 'x:bz2' create an bzip2 compressed tarfile, raise an exception
1537 if the file is already created
1538 'x:xz' create an lzma compressed tarfile, raise an exception
1539 if the file is already created
1540
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001541 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542 'r|' open an uncompressed stream of tar blocks for reading
1543 'r|gz' open a gzip compressed stream of tar blocks
1544 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001545 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001546 'w|' open an uncompressed stream for writing
1547 'w|gz' open a gzip compressed stream for writing
1548 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001549 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001550 """
1551
1552 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001554
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001555 if mode in ("r", "r:*"):
1556 # Find out which *open() is appropriate for opening the file.
1557 for comptype in cls.OPEN_METH:
1558 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001559 if fileobj is not None:
1560 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001561 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001562 return func(name, "r", fileobj, **kwargs)
1563 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001564 if fileobj is not None:
1565 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001566 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001567 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001568
1569 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001570 filemode, comptype = mode.split(":", 1)
1571 filemode = filemode or "r"
1572 comptype = comptype or "tar"
1573
1574 # Select the *open() function according to
1575 # given compression.
1576 if comptype in cls.OPEN_METH:
1577 func = getattr(cls, cls.OPEN_METH[comptype])
1578 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001579 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001580 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001581
1582 elif "|" in mode:
1583 filemode, comptype = mode.split("|", 1)
1584 filemode = filemode or "r"
1585 comptype = comptype or "tar"
1586
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001587 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Antoine Pitrou605c2932010-09-23 20:15:14 +00001590 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1591 try:
1592 t = cls(name, filemode, stream, **kwargs)
1593 except:
1594 stream.close()
1595 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001596 t._extfileobj = False
1597 return t
1598
Berker Peksag0fe63252015-02-13 21:02:12 +02001599 elif mode in ("a", "w", "x"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001600 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001601
Thomas Wouters477c8d52006-05-27 19:21:47 +00001602 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001603
Guido van Rossum75b64e62005-01-16 00:16:11 +00001604 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001605 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606 """Open uncompressed tar archive name for reading or writing.
1607 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001608 if mode not in ("r", "a", "w", "x"):
1609 raise ValueError("mode must be 'r', 'a', 'w' or 'x'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001610 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611
Guido van Rossum75b64e62005-01-16 00:16:11 +00001612 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001613 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001614 """Open gzip compressed tar archive name for reading or writing.
1615 Appending is not allowed.
1616 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001617 if mode not in ("r", "w", "x"):
1618 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001619
1620 try:
1621 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001622 gzip.GzipFile
1623 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001624 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001625
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001626 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001627 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001628 except OSError:
1629 if fileobj is not None and mode == 'r':
1630 raise ReadError("not a gzip file")
1631 raise
1632
1633 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001634 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001635 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001636 fileobj.close()
1637 if mode == 'r':
1638 raise ReadError("not a gzip file")
1639 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001640 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001641 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001642 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001643 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644 return t
1645
Guido van Rossum75b64e62005-01-16 00:16:11 +00001646 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001647 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648 """Open bzip2 compressed tar archive name for reading or writing.
1649 Appending is not allowed.
1650 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001651 if mode not in ("r", "w", "x"):
1652 raise ValueError("mode must be 'r', 'w' or 'x'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653
1654 try:
1655 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001656 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001657 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001659 fileobj = bz2.BZ2File(fileobj or name, mode,
1660 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001661
1662 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001663 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001664 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001665 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001666 if mode == 'r':
1667 raise ReadError("not a bzip2 file")
1668 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001669 except:
1670 fileobj.close()
1671 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001672 t._extfileobj = False
1673 return t
1674
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001675 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001676 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001677 """Open lzma compressed tar archive name for reading or writing.
1678 Appending is not allowed.
1679 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001680 if mode not in ("r", "w", "x"):
1681 raise ValueError("mode must be 'r', 'w' or 'x'")
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001682
1683 try:
1684 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001685 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001686 raise CompressionError("lzma module is not available")
1687
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001688 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001689
1690 try:
1691 t = cls.taropen(name, mode, fileobj, **kwargs)
1692 except (lzma.LZMAError, EOFError):
1693 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001694 if mode == 'r':
1695 raise ReadError("not an lzma file")
1696 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001697 except:
1698 fileobj.close()
1699 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001700 t._extfileobj = False
1701 return t
1702
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 # All *open() methods are registered here.
1704 OPEN_METH = {
1705 "tar": "taropen", # uncompressed tar
1706 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001707 "bz2": "bz2open", # bzip2 compressed tar
1708 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 }
1710
1711 #--------------------------------------------------------------------------
1712 # The public methods which TarFile provides:
1713
1714 def close(self):
1715 """Close the TarFile. In write-mode, two finishing zero blocks are
1716 appended to the archive.
1717 """
1718 if self.closed:
1719 return
1720
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001721 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001722 try:
Lars Gustäbel20703c62015-05-27 12:53:44 +02001723 if self.mode in ("a", "w", "x"):
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001724 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1725 self.offset += (BLOCKSIZE * 2)
1726 # fill up the end with zero-blocks
1727 # (like option -b20 for tar does)
1728 blocks, remainder = divmod(self.offset, RECORDSIZE)
1729 if remainder > 0:
1730 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1731 finally:
1732 if not self._extfileobj:
1733 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001734
1735 def getmember(self, name):
1736 """Return a TarInfo object for member `name'. If `name' can not be
1737 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001738 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001739 most up-to-date version.
1740 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001741 tarinfo = self._getmember(name)
1742 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001743 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001744 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001745
1746 def getmembers(self):
1747 """Return the members of the archive as a list of TarInfo objects. The
1748 list has the same order as the members in the archive.
1749 """
1750 self._check()
1751 if not self._loaded: # if we want to obtain a list of
1752 self._load() # all members, we first have to
1753 # scan the whole archive.
1754 return self.members
1755
1756 def getnames(self):
1757 """Return the members of the archive as a list of their names. It has
1758 the same order as the list returned by getmembers().
1759 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001760 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761
1762 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1763 """Create a TarInfo object for either the file `name' or the file
1764 object `fileobj' (using os.fstat on its file descriptor). You can
1765 modify some of the TarInfo's attributes before you add it using
1766 addfile(). If given, `arcname' specifies an alternative name for the
1767 file in the archive.
1768 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001769 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770
1771 # When fileobj is given, replace name by
1772 # fileobj's real name.
1773 if fileobj is not None:
1774 name = fileobj.name
1775
1776 # Building the name of the member in the archive.
1777 # Backward slashes are converted to forward slashes,
1778 # Absolute paths are turned to relative paths.
1779 if arcname is None:
1780 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001781 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001782 arcname = arcname.replace(os.sep, "/")
1783 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001784
1785 # Now, fill the TarInfo object with
1786 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001787 tarinfo = self.tarinfo()
1788 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001789
1790 # Use os.stat or os.lstat, depending on platform
1791 # and if symlinks shall be resolved.
1792 if fileobj is None:
1793 if hasattr(os, "lstat") and not self.dereference:
1794 statres = os.lstat(name)
1795 else:
1796 statres = os.stat(name)
1797 else:
1798 statres = os.fstat(fileobj.fileno())
1799 linkname = ""
1800
1801 stmd = statres.st_mode
1802 if stat.S_ISREG(stmd):
1803 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001804 if not self.dereference and statres.st_nlink > 1 and \
1805 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 # Is it a hardlink to an already
1807 # archived file?
1808 type = LNKTYPE
1809 linkname = self.inodes[inode]
1810 else:
1811 # The inode is added only if its valid.
1812 # For win32 it is always 0.
1813 type = REGTYPE
1814 if inode[0]:
1815 self.inodes[inode] = arcname
1816 elif stat.S_ISDIR(stmd):
1817 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001818 elif stat.S_ISFIFO(stmd):
1819 type = FIFOTYPE
1820 elif stat.S_ISLNK(stmd):
1821 type = SYMTYPE
1822 linkname = os.readlink(name)
1823 elif stat.S_ISCHR(stmd):
1824 type = CHRTYPE
1825 elif stat.S_ISBLK(stmd):
1826 type = BLKTYPE
1827 else:
1828 return None
1829
1830 # Fill the TarInfo object with all
1831 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001832 tarinfo.name = arcname
1833 tarinfo.mode = stmd
1834 tarinfo.uid = statres.st_uid
1835 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001836 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001837 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001838 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001839 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001840 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001841 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001842 tarinfo.linkname = linkname
1843 if pwd:
1844 try:
1845 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1846 except KeyError:
1847 pass
1848 if grp:
1849 try:
1850 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1851 except KeyError:
1852 pass
1853
1854 if type in (CHRTYPE, BLKTYPE):
1855 if hasattr(os, "major") and hasattr(os, "minor"):
1856 tarinfo.devmajor = os.major(statres.st_rdev)
1857 tarinfo.devminor = os.minor(statres.st_rdev)
1858 return tarinfo
1859
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001860 def list(self, verbose=True, *, members=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 """Print a table of contents to sys.stdout. If `verbose' is False, only
1862 the names of the members are printed. If it is True, an `ls -l'-like
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001863 output is produced. `members' is optional and must be a subset of the
1864 list returned by getmembers().
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 """
1866 self._check()
1867
Serhiy Storchakaa7eb7462014-08-21 10:01:16 +03001868 if members is None:
1869 members = self
1870 for tarinfo in members:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001872 _safe_print(stat.filemode(tarinfo.mode))
1873 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1874 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001876 _safe_print("%10s" %
1877 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001879 _safe_print("%10d" % tarinfo.size)
1880 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1881 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001882
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001883 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884
1885 if verbose:
1886 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001887 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001888 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001889 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001890 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001891
Raymond Hettingera63a3122011-01-26 20:34:14 +00001892 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001893 """Add the file `name' to the archive. `name' may be any type of file
1894 (directory, fifo, symbolic link, etc.). If given, `arcname'
1895 specifies an alternative name for the file in the archive.
1896 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001897 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001898 return True for each filename to be excluded. `filter' is a function
1899 that expects a TarInfo object argument and returns the changed
1900 TarInfo object, if it returns None the TarInfo object will be
1901 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001902 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001903 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904
1905 if arcname is None:
1906 arcname = name
1907
Guido van Rossum486364b2007-06-30 05:01:58 +00001908 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001909 if exclude is not None:
1910 import warnings
1911 warnings.warn("use the filter argument instead",
1912 DeprecationWarning, 2)
1913 if exclude(name):
1914 self._dbg(2, "tarfile: Excluded %r" % name)
1915 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001916
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001917 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001918 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919 self._dbg(2, "tarfile: Skipped %r" % name)
1920 return
1921
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001922 self._dbg(1, name)
1923
1924 # Create a TarInfo object from the file.
1925 tarinfo = self.gettarinfo(name, arcname)
1926
1927 if tarinfo is None:
1928 self._dbg(1, "tarfile: Unsupported type %r" % name)
1929 return
1930
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001931 # Change or exclude the TarInfo object.
1932 if filter is not None:
1933 tarinfo = filter(tarinfo)
1934 if tarinfo is None:
1935 self._dbg(2, "tarfile: Excluded %r" % name)
1936 return
1937
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938 # Append the tar header and data to the archive.
1939 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001940 with bltn_open(name, "rb") as f:
1941 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001942
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001943 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944 self.addfile(tarinfo)
1945 if recursive:
1946 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001947 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001948 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001949
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001950 else:
1951 self.addfile(tarinfo)
1952
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001953 def addfile(self, tarinfo, fileobj=None):
1954 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1955 given, tarinfo.size bytes are read from it and added to the archive.
1956 You can create TarInfo objects using gettarinfo().
1957 On Windows platforms, `fileobj' should always be opened with mode
1958 'rb' to avoid irritation about the file size.
1959 """
Berker Peksag0fe63252015-02-13 21:02:12 +02001960 self._check("awx")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961
Thomas Wouters89f507f2006-12-13 04:49:30 +00001962 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963
Guido van Rossume7ba4952007-06-06 23:52:48 +00001964 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001965 self.fileobj.write(buf)
1966 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001967
1968 # If there's data to follow, append it.
1969 if fileobj is not None:
1970 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1971 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1972 if remainder > 0:
1973 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1974 blocks += 1
1975 self.offset += blocks * BLOCKSIZE
1976
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001977 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001978
Eric V. Smith7a803892015-04-15 10:27:58 -04001979 def extractall(self, path=".", members=None, *, numeric_owner=False):
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001980 """Extract all members from the archive to the current working
1981 directory and set owner, modification time and permissions on
1982 directories afterwards. `path' specifies a different directory
1983 to extract to. `members' is optional and must be a subset of the
Eric V. Smith7a803892015-04-15 10:27:58 -04001984 list returned by getmembers(). If `numeric_owner` is True, only
1985 the numbers for user/group names are used and not the names.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001986 """
1987 directories = []
1988
1989 if members is None:
1990 members = self
1991
1992 for tarinfo in members:
1993 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001994 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001995 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001996 tarinfo = copy.copy(tarinfo)
1997 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001998 # Do not set_attrs directories, as we will do that further down
Eric V. Smith7a803892015-04-15 10:27:58 -04001999 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
2000 numeric_owner=numeric_owner)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002001
2002 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00002003 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002004 directories.reverse()
2005
2006 # Set correct owner, mtime and filemode on directories.
2007 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00002008 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002009 try:
Eric V. Smith7a803892015-04-15 10:27:58 -04002010 self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)
Christian Heimesfaf2f632008-01-06 16:59:19 +00002011 self.utime(tarinfo, dirpath)
2012 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00002013 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00002014 if self.errorlevel > 1:
2015 raise
2016 else:
2017 self._dbg(1, "tarfile: %s" % e)
2018
Eric V. Smith7a803892015-04-15 10:27:58 -04002019 def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 """Extract a member from the archive to the current working directory,
2021 using its full name. Its file information is extracted as accurately
2022 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002023 specify a different directory using `path'. File attributes (owner,
Eric V. Smith7a803892015-04-15 10:27:58 -04002024 mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
2025 is True, only the numbers for user/group names are used and not
2026 the names.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002027 """
2028 self._check("r")
2029
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002030 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002031 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002032 else:
2033 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034
Neal Norwitza4f651a2004-07-20 22:07:44 +00002035 # Prepare the link target for makelink().
2036 if tarinfo.islnk():
2037 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2038
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002040 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
Eric V. Smith7a803892015-04-15 10:27:58 -04002041 set_attrs=set_attrs,
2042 numeric_owner=numeric_owner)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002043 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002044 if self.errorlevel > 0:
2045 raise
2046 else:
2047 if e.filename is None:
2048 self._dbg(1, "tarfile: %s" % e.strerror)
2049 else:
2050 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002051 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 if self.errorlevel > 1:
2053 raise
2054 else:
2055 self._dbg(1, "tarfile: %s" % e)
2056
2057 def extractfile(self, member):
2058 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002059 a filename or a TarInfo object. If `member' is a regular file or a
2060 link, an io.BufferedReader object is returned. Otherwise, None is
2061 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002062 """
2063 self._check("r")
2064
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002065 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002067 else:
2068 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002070 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2071 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002072 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002073
2074 elif tarinfo.islnk() or tarinfo.issym():
2075 if isinstance(self.fileobj, _Stream):
2076 # A small but ugly workaround for the case that someone tries
2077 # to extract a (sym)link as a file-object from a non-seekable
2078 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002079 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002080 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002081 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002082 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002083 else:
2084 # If there's no data associated with the member (directory, chrdev,
2085 # blkdev, etc.), return None instead of a file object.
2086 return None
2087
Eric V. Smith7a803892015-04-15 10:27:58 -04002088 def _extract_member(self, tarinfo, targetpath, set_attrs=True,
2089 numeric_owner=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002090 """Extract the TarInfo object tarinfo to a physical
2091 file called targetpath.
2092 """
2093 # Fetch the TarInfo object for the given name
2094 # and build the destination pathname, replacing
2095 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002096 targetpath = targetpath.rstrip("/")
2097 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002098
2099 # Create all upper directories.
2100 upperdirs = os.path.dirname(targetpath)
2101 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002102 # Create directories that are not part of the archive with
2103 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002104 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002105
2106 if tarinfo.islnk() or tarinfo.issym():
2107 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2108 else:
2109 self._dbg(1, tarinfo.name)
2110
2111 if tarinfo.isreg():
2112 self.makefile(tarinfo, targetpath)
2113 elif tarinfo.isdir():
2114 self.makedir(tarinfo, targetpath)
2115 elif tarinfo.isfifo():
2116 self.makefifo(tarinfo, targetpath)
2117 elif tarinfo.ischr() or tarinfo.isblk():
2118 self.makedev(tarinfo, targetpath)
2119 elif tarinfo.islnk() or tarinfo.issym():
2120 self.makelink(tarinfo, targetpath)
2121 elif tarinfo.type not in SUPPORTED_TYPES:
2122 self.makeunknown(tarinfo, targetpath)
2123 else:
2124 self.makefile(tarinfo, targetpath)
2125
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002126 if set_attrs:
Eric V. Smith7a803892015-04-15 10:27:58 -04002127 self.chown(tarinfo, targetpath, numeric_owner)
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002128 if not tarinfo.issym():
2129 self.chmod(tarinfo, targetpath)
2130 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002131
2132 #--------------------------------------------------------------------------
2133 # Below are the different file methods. They are called via
2134 # _extract_member() when extract() is called. They can be replaced in a
2135 # subclass to implement other functionality.
2136
2137 def makedir(self, tarinfo, targetpath):
2138 """Make a directory called targetpath.
2139 """
2140 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002141 # Use a safe mode for the directory, the real mode is set
2142 # later in _extract_member().
2143 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002144 except FileExistsError:
2145 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002146
2147 def makefile(self, tarinfo, targetpath):
2148 """Make a file called targetpath.
2149 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002150 source = self.fileobj
2151 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002152 with bltn_open(targetpath, "wb") as target:
2153 if tarinfo.sparse is not None:
2154 for offset, size in tarinfo.sparse:
2155 target.seek(offset)
Lars Gustäbel03572682015-07-06 09:27:24 +02002156 copyfileobj(source, target, size, ReadError)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002157 else:
Lars Gustäbel03572682015-07-06 09:27:24 +02002158 copyfileobj(source, target, tarinfo.size, ReadError)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002159 target.seek(tarinfo.size)
2160 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002161
2162 def makeunknown(self, tarinfo, targetpath):
2163 """Make a file from a TarInfo object with an unknown type
2164 at targetpath.
2165 """
2166 self.makefile(tarinfo, targetpath)
2167 self._dbg(1, "tarfile: Unknown file type %r, " \
2168 "extracted as regular file." % tarinfo.type)
2169
2170 def makefifo(self, tarinfo, targetpath):
2171 """Make a fifo called targetpath.
2172 """
2173 if hasattr(os, "mkfifo"):
2174 os.mkfifo(targetpath)
2175 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002176 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002177
2178 def makedev(self, tarinfo, targetpath):
2179 """Make a character or block device called targetpath.
2180 """
2181 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002182 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002183
2184 mode = tarinfo.mode
2185 if tarinfo.isblk():
2186 mode |= stat.S_IFBLK
2187 else:
2188 mode |= stat.S_IFCHR
2189
2190 os.mknod(targetpath, mode,
2191 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2192
2193 def makelink(self, tarinfo, targetpath):
2194 """Make a (symbolic) link called targetpath. If it cannot be created
2195 (platform limitation), we try to make a copy of the referenced file
2196 instead of a link.
2197 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002198 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002199 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002200 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002201 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002202 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002203 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002204 if os.path.exists(tarinfo._link_target):
2205 os.link(tarinfo._link_target, targetpath)
2206 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002207 self._extract_member(self._find_link_target(tarinfo),
2208 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002209 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002210 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002211 self._extract_member(self._find_link_target(tarinfo),
2212 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002213 except KeyError:
2214 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002215
Eric V. Smith7a803892015-04-15 10:27:58 -04002216 def chown(self, tarinfo, targetpath, numeric_owner):
2217 """Set owner of targetpath according to tarinfo. If numeric_owner
2218 is True, use .gid/.uid instead of .gname/.uname.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002219 """
2220 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2221 # We have to be root to do so.
Eric V. Smith7a803892015-04-15 10:27:58 -04002222 if numeric_owner:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002223 g = tarinfo.gid
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002224 u = tarinfo.uid
Eric V. Smith7a803892015-04-15 10:27:58 -04002225 else:
2226 try:
2227 g = grp.getgrnam(tarinfo.gname)[2]
2228 except KeyError:
2229 g = tarinfo.gid
2230 try:
2231 u = pwd.getpwnam(tarinfo.uname)[2]
2232 except KeyError:
2233 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002234 try:
2235 if tarinfo.issym() and hasattr(os, "lchown"):
2236 os.lchown(targetpath, u, g)
2237 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002238 os.chown(targetpath, u, g)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002239 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002240 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002241
2242 def chmod(self, tarinfo, targetpath):
2243 """Set file permissions of targetpath according to tarinfo.
2244 """
Jack Jansen834eff62003-03-07 12:47:06 +00002245 if hasattr(os, 'chmod'):
2246 try:
2247 os.chmod(targetpath, tarinfo.mode)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002248 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002249 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002250
2251 def utime(self, tarinfo, targetpath):
2252 """Set modification time of targetpath according to tarinfo.
2253 """
Jack Jansen834eff62003-03-07 12:47:06 +00002254 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002255 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002256 try:
2257 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002258 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002259 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002260
2261 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002262 def next(self):
2263 """Return the next member of the archive as a TarInfo object, when
2264 TarFile is opened for reading. Return None if there is no more
2265 available.
2266 """
2267 self._check("ra")
2268 if self.firstmember is not None:
2269 m = self.firstmember
2270 self.firstmember = None
2271 return m
2272
Lars Gustäbel03572682015-07-06 09:27:24 +02002273 # Advance the file pointer.
2274 if self.offset != self.fileobj.tell():
2275 self.fileobj.seek(self.offset - 1)
2276 if not self.fileobj.read(1):
2277 raise ReadError("unexpected end of data")
2278
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279 # Read the next block.
Lars Gustäbel9520a432009-11-22 18:48:49 +00002280 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002281 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002282 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002283 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002284 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002285 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002286 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002287 self.offset += BLOCKSIZE
2288 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002289 except InvalidHeaderError as e:
2290 if self.ignore_zeros:
2291 self._dbg(2, "0x%X: %s" % (self.offset, e))
2292 self.offset += BLOCKSIZE
2293 continue
2294 elif self.offset == 0:
2295 raise ReadError(str(e))
2296 except EmptyHeaderError:
2297 if self.offset == 0:
2298 raise ReadError("empty file")
2299 except TruncatedHeaderError as e:
2300 if self.offset == 0:
2301 raise ReadError(str(e))
2302 except SubsequentHeaderError as e:
2303 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002304 break
2305
Lars Gustäbel9520a432009-11-22 18:48:49 +00002306 if tarinfo is not None:
2307 self.members.append(tarinfo)
2308 else:
2309 self._loaded = True
2310
Thomas Wouters477c8d52006-05-27 19:21:47 +00002311 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002312
2313 #--------------------------------------------------------------------------
2314 # Little helper methods:
2315
Lars Gustäbel1b512722010-06-03 12:45:16 +00002316 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002317 """Find an archive member by name from bottom to top.
2318 If tarinfo is given, it is used as the starting point.
2319 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002320 # Ensure that all members have been loaded.
2321 members = self.getmembers()
2322
Lars Gustäbel1b512722010-06-03 12:45:16 +00002323 # Limit the member search list up to tarinfo.
2324 if tarinfo is not None:
2325 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326
Lars Gustäbel1b512722010-06-03 12:45:16 +00002327 if normalize:
2328 name = os.path.normpath(name)
2329
2330 for member in reversed(members):
2331 if normalize:
2332 member_name = os.path.normpath(member.name)
2333 else:
2334 member_name = member.name
2335
2336 if name == member_name:
2337 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002338
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002339 def _load(self):
2340 """Read through the entire archive file and look for readable
2341 members.
2342 """
2343 while True:
2344 tarinfo = self.next()
2345 if tarinfo is None:
2346 break
2347 self._loaded = True
2348
2349 def _check(self, mode=None):
2350 """Check if TarFile is still open, and if the operation's mode
2351 corresponds to TarFile's mode.
2352 """
2353 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002354 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002355 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002356 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002357
Lars Gustäbel1b512722010-06-03 12:45:16 +00002358 def _find_link_target(self, tarinfo):
2359 """Find the target member of a symlink or hardlink member in the
2360 archive.
2361 """
2362 if tarinfo.issym():
2363 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002364 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002365 limit = None
2366 else:
2367 # Search the archive before the link, because a hard link is
2368 # just a reference to an already archived file.
2369 linkname = tarinfo.linkname
2370 limit = tarinfo
2371
2372 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2373 if member is None:
2374 raise KeyError("linkname %r not found" % linkname)
2375 return member
2376
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002377 def __iter__(self):
2378 """Provide an iterator object.
2379 """
2380 if self._loaded:
2381 return iter(self.members)
2382 else:
2383 return TarIter(self)
2384
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002385 def _dbg(self, level, msg):
2386 """Write debugging output to sys.stderr.
2387 """
2388 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002389 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002390
2391 def __enter__(self):
2392 self._check()
2393 return self
2394
2395 def __exit__(self, type, value, traceback):
2396 if type is None:
2397 self.close()
2398 else:
2399 # An exception occurred. We must not call close() because
2400 # it would try to write end-of-archive blocks and padding.
2401 if not self._extfileobj:
2402 self.fileobj.close()
2403 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002404# class TarFile
2405
2406class TarIter:
2407 """Iterator Class.
2408
2409 for tarinfo in TarFile(...):
2410 suite...
2411 """
2412
2413 def __init__(self, tarfile):
2414 """Construct a TarIter object.
2415 """
2416 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002417 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002418 def __iter__(self):
2419 """Return iterator object.
2420 """
2421 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002422 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002423 """Return the next item using TarFile's next() method.
2424 When all members have been read, set TarFile as _loaded.
2425 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002426 # Fix for SF #1100429: Under rare circumstances it can
2427 # happen that getmembers() is called during iteration,
2428 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002429
2430 if self.index == 0 and self.tarfile.firstmember is not None:
2431 tarinfo = self.tarfile.next()
2432 elif self.index < len(self.tarfile.members):
2433 tarinfo = self.tarfile.members[self.index]
2434 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002435 tarinfo = self.tarfile.next()
2436 if not tarinfo:
2437 self.tarfile._loaded = True
2438 raise StopIteration
2439 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002440 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002441 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002442 return tarinfo
2443
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002444#--------------------
2445# exported functions
2446#--------------------
2447def is_tarfile(name):
2448 """Return True if name points to a tar archive that we
2449 are able to handle, else return False.
2450 """
2451 try:
2452 t = open(name)
2453 t.close()
2454 return True
2455 except TarError:
2456 return False
2457
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002458open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002459
2460
2461def main():
2462 import argparse
2463
2464 description = 'A simple command line interface for tarfile module.'
2465 parser = argparse.ArgumentParser(description=description)
2466 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2467 help='Verbose output')
2468 group = parser.add_mutually_exclusive_group()
2469 group.add_argument('-l', '--list', metavar='<tarfile>',
2470 help='Show listing of a tarfile')
2471 group.add_argument('-e', '--extract', nargs='+',
2472 metavar=('<tarfile>', '<output_dir>'),
2473 help='Extract tarfile into target dir')
2474 group.add_argument('-c', '--create', nargs='+',
2475 metavar=('<name>', '<file>'),
2476 help='Create tarfile from sources')
2477 group.add_argument('-t', '--test', metavar='<tarfile>',
2478 help='Test if a tarfile is valid')
2479 args = parser.parse_args()
2480
2481 if args.test:
2482 src = args.test
2483 if is_tarfile(src):
2484 with open(src, 'r') as tar:
2485 tar.getmembers()
2486 print(tar.getmembers(), file=sys.stderr)
2487 if args.verbose:
2488 print('{!r} is a tar archive.'.format(src))
2489 else:
2490 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2491
2492 elif args.list:
2493 src = args.list
2494 if is_tarfile(src):
2495 with TarFile.open(src, 'r:*') as tf:
2496 tf.list(verbose=args.verbose)
2497 else:
2498 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2499
2500 elif args.extract:
2501 if len(args.extract) == 1:
2502 src = args.extract[0]
2503 curdir = os.curdir
2504 elif len(args.extract) == 2:
2505 src, curdir = args.extract
2506 else:
2507 parser.exit(1, parser.format_help())
2508
2509 if is_tarfile(src):
2510 with TarFile.open(src, 'r:*') as tf:
2511 tf.extractall(path=curdir)
2512 if args.verbose:
2513 if curdir == '.':
2514 msg = '{!r} file is extracted.'.format(src)
2515 else:
2516 msg = ('{!r} file is extracted '
2517 'into {!r} directory.').format(src, curdir)
2518 print(msg)
2519 else:
2520 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2521
2522 elif args.create:
2523 tar_name = args.create.pop(0)
2524 _, ext = os.path.splitext(tar_name)
2525 compressions = {
2526 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002527 '.gz': 'gz',
2528 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002529 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002530 '.xz': 'xz',
2531 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002532 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002533 '.bz2': 'bz2',
2534 '.tbz': 'bz2',
2535 '.tbz2': 'bz2',
2536 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002537 }
2538 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2539 tar_files = args.create
2540
2541 with TarFile.open(tar_name, tar_mode) as tf:
2542 for file_name in tar_files:
2543 tf.add(file_name)
2544
2545 if args.verbose:
2546 print('{!r} file created.'.format(tar_name))
2547
2548 else:
2549 parser.exit(1, parser.format_help())
2550
2551if __name__ == '__main__':
2552 main()