blob: 6d8d36cdf8a6177b3f7a77b7e26fe4c877586ecb [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
Serhiy Storchakacf4a2f22015-03-11 17:18:03 +020041from builtins import open as bltn_open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000042import sys
43import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020044import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000045import shutil
46import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000047import time
48import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000049import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000050import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000051
52try:
53 import grp, pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040054except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000055 grp = pwd = None
56
Brian Curtin16633fa2010-07-09 13:54:27 +000057# os.symlink on Windows prior to 6.0 raises NotImplementedError
58symlink_exception = (AttributeError, NotImplementedError)
59try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020060 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000061 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020062 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000063except NameError:
64 pass
65
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000066# from tarfile import *
67__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
68
69#---------------------------------------------------------
70# tar constants
71#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000072NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000073BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000074RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000075GNU_MAGIC = b"ustar \0" # magic gnu tar string
76POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000077
Guido van Rossumd8faa362007-04-27 19:54:29 +000078LENGTH_NAME = 100 # maximum length of a filename
79LENGTH_LINK = 100 # maximum length of a linkname
80LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000081
Lars Gustäbelb506dc32007-08-07 18:36:16 +000082REGTYPE = b"0" # regular file
83AREGTYPE = b"\0" # regular file
84LNKTYPE = b"1" # link (inside tarfile)
85SYMTYPE = b"2" # symbolic link
86CHRTYPE = b"3" # character special device
87BLKTYPE = b"4" # block special device
88DIRTYPE = b"5" # directory
89FIFOTYPE = b"6" # fifo special device
90CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000091
Lars Gustäbelb506dc32007-08-07 18:36:16 +000092GNUTYPE_LONGNAME = b"L" # GNU tar longname
93GNUTYPE_LONGLINK = b"K" # GNU tar longlink
94GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000095
Lars Gustäbelb506dc32007-08-07 18:36:16 +000096XHDTYPE = b"x" # POSIX.1-2001 extended header
97XGLTYPE = b"g" # POSIX.1-2001 global header
98SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000099
100USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
101GNU_FORMAT = 1 # GNU tar format
102PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
103DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000104
105#---------------------------------------------------------
106# tarfile constants
107#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000108# File types that tarfile supports:
109SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
110 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000111 CONTTYPE, CHRTYPE, BLKTYPE,
112 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
113 GNUTYPE_SPARSE)
114
Guido van Rossumd8faa362007-04-27 19:54:29 +0000115# File types that will be treated as a regular file.
116REGULAR_TYPES = (REGTYPE, AREGTYPE,
117 CONTTYPE, GNUTYPE_SPARSE)
118
119# File types that are part of the GNU tar format.
120GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
121 GNUTYPE_SPARSE)
122
123# Fields from a pax header that override a TarInfo attribute.
124PAX_FIELDS = ("path", "linkpath", "size", "mtime",
125 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000126
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000127# Fields from a pax header that are affected by hdrcharset.
128PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
129
Guido van Rossume7ba4952007-06-06 23:52:48 +0000130# Fields in a pax header that are numbers, all other fields
131# are treated as strings.
132PAX_NUMBER_FIELDS = {
133 "atime": float,
134 "ctime": float,
135 "mtime": float,
136 "uid": int,
137 "gid": int,
138 "size": int
139}
140
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000141#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000142# initialization
143#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000144if os.name in ("nt", "ce"):
145 ENCODING = "utf-8"
146else:
147 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000148
149#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000150# Some useful functions
151#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000152
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000153def stn(s, length, encoding, errors):
154 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000155 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000156 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000157 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000158
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000159def nts(s, encoding, errors):
160 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000161 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000162 p = s.find(b"\0")
163 if p != -1:
164 s = s[:p]
165 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000166
Thomas Wouters477c8d52006-05-27 19:21:47 +0000167def nti(s):
168 """Convert a number field to a python number.
169 """
170 # There are two possible encodings for a number field, see
171 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200172 if s[0] in (0o200, 0o377):
173 n = 0
174 for i in range(len(s) - 1):
175 n <<= 8
176 n += s[i + 1]
177 if s[0] == 0o377:
178 n = -(256 ** (len(s) - 1) - n)
179 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000180 try:
Lars Gustäbelb7a688b2015-07-02 19:38:38 +0200181 s = nts(s, "ascii", "strict")
182 n = int(s.strip() or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000183 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000184 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000185 return n
186
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000188 """Convert a python number to a number field.
189 """
190 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
191 # octal digits followed by a null-byte, this allows values up to
192 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200193 # that if necessary. A leading 0o200 or 0o377 byte indicate this
194 # particular encoding, the following digits-1 bytes are a big-endian
195 # base-256 representation. This allows values up to (256**(digits-1))-1.
196 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
197 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000198 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800199 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200200 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
201 if n >= 0:
202 s = bytearray([0o200])
203 else:
204 s = bytearray([0o377])
205 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206
Guido van Rossum805365e2007-05-07 22:24:25 +0000207 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200208 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200210 else:
211 raise ValueError("overflow in number field")
212
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 return s
214
215def calc_chksums(buf):
216 """Calculate the checksum for a member's header by summing up all
217 characters except for the chksum field which is treated as if
218 it was filled with spaces. According to the GNU tar sources,
219 some tars (Sun and NeXT) calculate chksum with signed char,
220 which will be different if there are chars in the buffer with
221 the high bit set. So we calculate two checksums, unsigned and
222 signed.
223 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200224 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
225 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000226 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227
228def copyfileobj(src, dst, length=None):
229 """Copy length bytes from fileobj src to fileobj dst.
230 If length is None, copy the entire content.
231 """
232 if length == 0:
233 return
234 if length is None:
235 shutil.copyfileobj(src, dst)
236 return
237
238 BUFSIZE = 16 * 1024
239 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000240 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000241 buf = src.read(BUFSIZE)
242 if len(buf) < BUFSIZE:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200243 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244 dst.write(buf)
245
246 if remainder != 0:
247 buf = src.read(remainder)
248 if len(buf) < remainder:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251 return
252
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000253def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200254 """Deprecated in this location; use stat.filemode."""
255 import warnings
256 warnings.warn("deprecated in favor of stat.filemode",
257 DeprecationWarning, 2)
258 return stat.filemode(mode)
259
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200260def _safe_print(s):
261 encoding = getattr(sys.stdout, 'encoding', None)
262 if encoding is not None:
263 s = s.encode(encoding, 'backslashreplace').decode(encoding)
264 print(s, end=' ')
265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000267class TarError(Exception):
268 """Base exception."""
269 pass
270class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300274 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000275 pass
276class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000282class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000283 """Base exception for header errors."""
284 pass
285class EmptyHeaderError(HeaderError):
286 """Exception for empty headers."""
287 pass
288class TruncatedHeaderError(HeaderError):
289 """Exception for truncated headers."""
290 pass
291class EOFHeaderError(HeaderError):
292 """Exception for end of file headers."""
293 pass
294class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000295 """Exception for invalid headers."""
296 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000297class SubsequentHeaderError(HeaderError):
298 """Exception for missing and invalid extended headers."""
299 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000300
301#---------------------------
302# internal stream interface
303#---------------------------
304class _LowLevelFile:
305 """Low-level file object. Supports reading and writing.
306 It is used instead of a regular file object for streaming
307 access.
308 """
309
310 def __init__(self, name, mode):
311 mode = {
312 "r": os.O_RDONLY,
313 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
314 }[mode]
315 if hasattr(os, "O_BINARY"):
316 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000317 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319 def close(self):
320 os.close(self.fd)
321
322 def read(self, size):
323 return os.read(self.fd, size)
324
325 def write(self, s):
326 os.write(self.fd, s)
327
328class _Stream:
329 """Class that serves as an adapter between TarFile and
330 a stream-like object. The stream-like object only
331 needs to have a read() or write() method and is accessed
332 blockwise. Use of gzip or bzip2 compression is possible.
333 A stream-like object could be for example: sys.stdin,
334 sys.stdout, a socket, a tape device etc.
335
336 _Stream is intended to be used only internally.
337 """
338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000340 """Construct a _Stream object.
341 """
342 self._extfileobj = True
343 if fileobj is None:
344 fileobj = _LowLevelFile(name, mode)
345 self._extfileobj = False
346
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000347 if comptype == '*':
348 # Enable transparent compression detection for the
349 # stream interface
350 fileobj = _StreamProxy(fileobj)
351 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000353 self.name = name or ""
354 self.mode = mode
355 self.comptype = comptype
356 self.fileobj = fileobj
357 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000358 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000359 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 self.closed = False
361
Antoine Pitrou605c2932010-09-23 20:15:14 +0000362 try:
363 if comptype == "gz":
364 try:
365 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400366 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000367 raise CompressionError("zlib module is not available")
368 self.zlib = zlib
369 self.crc = zlib.crc32(b"")
370 if mode == "r":
371 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100372 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000373 else:
374 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000375
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100376 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000377 try:
378 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400379 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 raise CompressionError("bz2 module is not available")
381 if mode == "r":
382 self.dbuf = b""
383 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200384 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000385 else:
386 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100387
388 elif comptype == "xz":
389 try:
390 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400391 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100392 raise CompressionError("lzma module is not available")
393 if mode == "r":
394 self.dbuf = b""
395 self.cmp = lzma.LZMADecompressor()
396 self.exception = lzma.LZMAError
397 else:
398 self.cmp = lzma.LZMACompressor()
399
400 elif comptype != "tar":
401 raise CompressionError("unknown compression type %r" % comptype)
402
Antoine Pitrou605c2932010-09-23 20:15:14 +0000403 except:
404 if not self._extfileobj:
405 self.fileobj.close()
406 self.closed = True
407 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000408
409 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411 self.close()
412
413 def _init_write_gz(self):
414 """Initialize for writing with gzip compression.
415 """
416 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417 -self.zlib.MAX_WBITS,
418 self.zlib.DEF_MEM_LEVEL,
419 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000420 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000421 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000422 if self.name.endswith(".gz"):
423 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000424 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
425 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 def write(self, s):
428 """Write string s to the stream.
429 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000430 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000431 self.crc = self.zlib.crc32(s, self.crc)
432 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000433 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000434 s = self.cmp.compress(s)
435 self.__write(s)
436
437 def __write(self, s):
438 """Write string s to the stream if a whole new block
439 is ready to be written.
440 """
441 self.buf += s
442 while len(self.buf) > self.bufsize:
443 self.fileobj.write(self.buf[:self.bufsize])
444 self.buf = self.buf[self.bufsize:]
445
446 def close(self):
447 """Close the _Stream object. No operation should be
448 done on it afterwards.
449 """
450 if self.closed:
451 return
452
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000453 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +0300454 try:
455 if self.mode == "w" and self.comptype != "tar":
456 self.buf += self.cmp.flush()
457
458 if self.mode == "w" and self.buf:
459 self.fileobj.write(self.buf)
460 self.buf = b""
461 if self.comptype == "gz":
462 # The native zlib crc is an unsigned 32-bit integer, but
463 # the Python wrapper implicitly casts that to a signed C
464 # long. So, on a 32-bit box self.crc may "look negative",
465 # while the same crc on a 64-bit box may "look positive".
466 # To avoid irksome warnings from the `struct` module, force
467 # it to look positive on all boxes.
468 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
469 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
470 finally:
471 if not self._extfileobj:
472 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473
474 def _init_read_gz(self):
475 """Initialize for reading a gzip compressed fileobj.
476 """
477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000478 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479
480 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000481 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000482 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000483 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000484 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485
486 flag = ord(self.__read(1))
487 self.__read(6)
488
489 if flag & 4:
490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491 self.read(xlen)
492 if flag & 8:
493 while True:
494 s = self.__read(1)
495 if not s or s == NUL:
496 break
497 if flag & 16:
498 while True:
499 s = self.__read(1)
500 if not s or s == NUL:
501 break
502 if flag & 2:
503 self.__read(2)
504
505 def tell(self):
506 """Return the stream's file pointer position.
507 """
508 return self.pos
509
510 def seek(self, pos=0):
511 """Set the stream's file pointer to pos. Negative seeking
512 is forbidden.
513 """
514 if pos - self.pos >= 0:
515 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000516 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517 self.read(self.bufsize)
518 self.read(remainder)
519 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000520 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000521 return self.pos
522
523 def read(self, size=None):
524 """Return the next size number of bytes from the stream.
525 If size is not defined, return all bytes of the stream
526 up to EOF.
527 """
528 if size is None:
529 t = []
530 while True:
531 buf = self._read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
535 buf = "".join(t)
536 else:
537 buf = self._read(size)
538 self.pos += len(buf)
539 return buf
540
541 def _read(self, size):
542 """Return size bytes from the stream.
543 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 return self.__read(size)
546
547 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000548 while c < size:
549 buf = self.__read(self.bufsize)
550 if not buf:
551 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552 try:
553 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100554 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000556 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000557 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000558 buf = self.dbuf[:size]
559 self.dbuf = self.dbuf[size:]
560 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561
562 def __read(self, size):
563 """Return size bytes from stream. If internal buffer is empty,
564 read another block from the stream.
565 """
566 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000567 while c < size:
568 buf = self.fileobj.read(self.bufsize)
569 if not buf:
570 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000572 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000573 buf = self.buf[:size]
574 self.buf = self.buf[size:]
575 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000576# class _Stream
577
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000578class _StreamProxy(object):
579 """Small proxy class that enables transparent compression
580 detection for the Stream interface (mode 'r|*').
581 """
582
583 def __init__(self, fileobj):
584 self.fileobj = fileobj
585 self.buf = self.fileobj.read(BLOCKSIZE)
586
587 def read(self, size):
588 self.read = self.fileobj.read
589 return self.buf
590
591 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100592 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000593 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100594 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000595 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100596 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
597 return "xz"
598 else:
599 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000600
601 def close(self):
602 self.fileobj.close()
603# class StreamProxy
604
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000605#------------------------
606# Extraction file object
607#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000608class _FileInFile(object):
609 """A thin wrapper around an existing file object that
610 provides a part of its data as an individual file
611 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000612 """
613
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000614 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000615 self.fileobj = fileobj
616 self.offset = offset
617 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000618 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200619 self.name = getattr(fileobj, "name", None)
620 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000621
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000622 if blockinfo is None:
623 blockinfo = [(0, size)]
624
625 # Construct a map with data and zero blocks.
626 self.map_index = 0
627 self.map = []
628 lastpos = 0
629 realpos = self.offset
630 for offset, size in blockinfo:
631 if offset > lastpos:
632 self.map.append((False, lastpos, offset, None))
633 self.map.append((True, offset, offset + size, realpos))
634 realpos += size
635 lastpos = offset + size
636 if lastpos < self.size:
637 self.map.append((False, lastpos, self.size, None))
638
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200639 def flush(self):
640 pass
641
642 def readable(self):
643 return True
644
645 def writable(self):
646 return False
647
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000648 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000649 return self.fileobj.seekable()
650
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000651 def tell(self):
652 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000653 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000654 return self.position
655
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200656 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000657 """Seek to a position in the file.
658 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200659 if whence == io.SEEK_SET:
660 self.position = min(max(position, 0), self.size)
661 elif whence == io.SEEK_CUR:
662 if position < 0:
663 self.position = max(self.position + position, 0)
664 else:
665 self.position = min(self.position + position, self.size)
666 elif whence == io.SEEK_END:
667 self.position = max(min(self.size + position, self.size), 0)
668 else:
669 raise ValueError("Invalid argument")
670 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000671
672 def read(self, size=None):
673 """Read data from the file.
674 """
675 if size is None:
676 size = self.size - self.position
677 else:
678 size = min(size, self.size - self.position)
679
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000680 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000681 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000682 while True:
683 data, start, stop, offset = self.map[self.map_index]
684 if start <= self.position < stop:
685 break
686 else:
687 self.map_index += 1
688 if self.map_index == len(self.map):
689 self.map_index = 0
690 length = min(size, stop - self.position)
691 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000692 self.fileobj.seek(offset + (self.position - start))
693 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000694 else:
695 buf += NUL * length
696 size -= length
697 self.position += length
698 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000699
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200700 def readinto(self, b):
701 buf = self.read(len(b))
702 b[:len(buf)] = buf
703 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000704
705 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000706 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200707#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000708
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200709class ExFileObject(io.BufferedReader):
710
711 def __init__(self, tarfile, tarinfo):
712 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
713 tarinfo.size, tarinfo.sparse)
714 super().__init__(fileobj)
715#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
717#------------------
718# Exported Classes
719#------------------
720class TarInfo(object):
721 """Informational class which holds the details about an
722 archive member given by a tar header block.
723 TarInfo objects are returned by TarFile.getmember(),
724 TarFile.getmembers() and TarFile.gettarinfo() and are
725 usually created internally.
726 """
727
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000728 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
729 "chksum", "type", "linkname", "uname", "gname",
730 "devmajor", "devminor",
731 "offset", "offset_data", "pax_headers", "sparse",
732 "tarfile", "_sparse_structs", "_link_target")
733
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000734 def __init__(self, name=""):
735 """Construct a TarInfo object. name is the optional name
736 of the member.
737 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000738 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000739 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000740 self.uid = 0 # user id
741 self.gid = 0 # group id
742 self.size = 0 # file size
743 self.mtime = 0 # modification time
744 self.chksum = 0 # header checksum
745 self.type = REGTYPE # member type
746 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000747 self.uname = "" # user name
748 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000749 self.devmajor = 0 # device major number
750 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000751
Thomas Wouters477c8d52006-05-27 19:21:47 +0000752 self.offset = 0 # the tar header starts here
753 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000754
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000755 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 self.pax_headers = {} # pax header information
757
758 # In pax headers the "name" and "linkname" field are called
759 # "path" and "linkpath".
760 def _getpath(self):
761 return self.name
762 def _setpath(self, name):
763 self.name = name
764 path = property(_getpath, _setpath)
765
766 def _getlinkpath(self):
767 return self.linkname
768 def _setlinkpath(self, linkname):
769 self.linkname = linkname
770 linkpath = property(_getlinkpath, _setlinkpath)
771
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000772 def __repr__(self):
773 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
774
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000775 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 """Return the TarInfo's attributes as a dictionary.
777 """
778 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000779 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000780 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781 "uid": self.uid,
782 "gid": self.gid,
783 "size": self.size,
784 "mtime": self.mtime,
785 "chksum": self.chksum,
786 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000787 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788 "uname": self.uname,
789 "gname": self.gname,
790 "devmajor": self.devmajor,
791 "devminor": self.devminor
792 }
793
794 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
795 info["name"] += "/"
796
797 return info
798
Victor Stinnerde629d42010-05-05 21:43:57 +0000799 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 """Return a tar header as a string of 512 byte blocks.
801 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000802 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000803
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000805 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000807 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000809 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 else:
811 raise ValueError("invalid format")
812
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 """Return the object as a ustar header block.
815 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816 info["magic"] = POSIX_MAGIC
817
818 if len(info["linkname"]) > LENGTH_LINK:
819 raise ValueError("linkname is too long")
820
821 if len(info["name"]) > LENGTH_NAME:
822 info["prefix"], info["name"] = self._posix_split_name(info["name"])
823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000826 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 """Return the object as a GNU header block sequence.
828 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 info["magic"] = GNU_MAGIC
830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000833 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
835 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000836 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000838 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000840 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841 """Return the object as a ustar header block. If it cannot be
842 represented this way, prepend a pax extended header sequence
843 with supplement information.
844 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 info["magic"] = POSIX_MAGIC
846 pax_headers = self.pax_headers.copy()
847
848 # Test string fields for values that exceed the field length or cannot
849 # be represented in ASCII encoding.
850 for name, hname, length in (
851 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
852 ("uname", "uname", 32), ("gname", "gname", 32)):
853
Guido van Rossume7ba4952007-06-06 23:52:48 +0000854 if hname in pax_headers:
855 # The pax header has priority.
856 continue
857
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 # Try to encode the string as ASCII.
859 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000860 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000862 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 continue
864
Guido van Rossume7ba4952007-06-06 23:52:48 +0000865 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000866 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
868 # Test number fields for values that exceed the field limit or values
869 # that like to be stored as float.
870 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if name in pax_headers:
872 # The pax header has priority. Avoid overflow.
873 info[name] = 0
874 continue
875
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 val = info[name]
877 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000878 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 info[name] = 0
880
Guido van Rossume7ba4952007-06-06 23:52:48 +0000881 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000883 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000885 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000887 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
889 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000890 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 """Return the object as a pax global header block sequence.
892 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000893 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
895 def _posix_split_name(self, name):
896 """Split a name longer than 100 chars into a prefix
897 and a name part.
898 """
899 prefix = name[:LENGTH_PREFIX + 1]
900 while prefix and prefix[-1] != "/":
901 prefix = prefix[:-1]
902
903 name = name[len(prefix):]
904 prefix = prefix[:-1]
905
906 if not prefix or len(name) > LENGTH_NAME:
907 raise ValueError("name is too long")
908 return prefix, name
909
910 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000911 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912 """Return a header block. info is a dictionary with file
913 information, format must be one of the *_FORMAT constants.
914 """
915 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000916 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000917 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 itn(info.get("uid", 0), 8, format),
919 itn(info.get("gid", 0), 8, format),
920 itn(info.get("size", 0), 12, format),
921 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000922 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000924 stn(info.get("linkname", ""), 100, encoding, errors),
925 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000926 stn(info.get("uname", ""), 32, encoding, errors),
927 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 itn(info.get("devmajor", 0), 8, format),
929 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000930 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 ]
932
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000933 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000935 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 return buf
937
938 @staticmethod
939 def _create_payload(payload):
940 """Return the string payload filled with zero bytes
941 up to the next 512 byte border.
942 """
943 blocks, remainder = divmod(len(payload), BLOCKSIZE)
944 if remainder > 0:
945 payload += (BLOCKSIZE - remainder) * NUL
946 return payload
947
948 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000949 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000950 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
951 for name.
952 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000953 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954
955 info = {}
956 info["name"] = "././@LongLink"
957 info["type"] = type
958 info["size"] = len(name)
959 info["magic"] = GNU_MAGIC
960
961 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000962 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963 cls._create_payload(name)
964
965 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000966 def _create_pax_generic_header(cls, pax_headers, type, encoding):
967 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000969 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000971 # Check if one of the fields contains surrogate characters and thereby
972 # forces hdrcharset=BINARY, see _proc_pax() for more information.
973 binary = False
974 for keyword, value in pax_headers.items():
975 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000976 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000977 except UnicodeEncodeError:
978 binary = True
979 break
980
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000981 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000982 if binary:
983 # Put the hdrcharset field at the beginning of the header.
984 records += b"21 hdrcharset=BINARY\n"
985
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000987 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000988 if binary:
989 # Try to restore the original byte representation of `value'.
990 # Needless to say, that the encoding must match the string.
991 value = value.encode(encoding, "surrogateescape")
992 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000993 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000994
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
996 n = p = 0
997 while True:
998 n = l + len(str(p))
999 if n == p:
1000 break
1001 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001002 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003
1004 # We use a hardcoded "././@PaxHeader" name like star does
1005 # instead of the one that POSIX recommends.
1006 info = {}
1007 info["name"] = "././@PaxHeader"
1008 info["type"] = type
1009 info["size"] = len(records)
1010 info["magic"] = POSIX_MAGIC
1011
1012 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001013 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 cls._create_payload(records)
1015
Guido van Rossum75b64e62005-01-16 00:16:11 +00001016 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001017 def frombuf(cls, buf, encoding, errors):
1018 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001019 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001020 if len(buf) == 0:
1021 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001022 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001023 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001024 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001025 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001026
1027 chksum = nti(buf[148:156])
1028 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001029 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001030
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001032 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001033 obj.mode = nti(buf[100:108])
1034 obj.uid = nti(buf[108:116])
1035 obj.gid = nti(buf[116:124])
1036 obj.size = nti(buf[124:136])
1037 obj.mtime = nti(buf[136:148])
1038 obj.chksum = chksum
1039 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001040 obj.linkname = nts(buf[157:257], encoding, errors)
1041 obj.uname = nts(buf[265:297], encoding, errors)
1042 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043 obj.devmajor = nti(buf[329:337])
1044 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001045 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001046
Guido van Rossumd8faa362007-04-27 19:54:29 +00001047 # Old V7 tar format represents a directory as a regular
1048 # file with a trailing slash.
1049 if obj.type == AREGTYPE and obj.name.endswith("/"):
1050 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001051
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001052 # The old GNU sparse format occupies some of the unused
1053 # space in the buffer for up to 4 sparse structures.
1054 # Save the them for later processing in _proc_sparse().
1055 if obj.type == GNUTYPE_SPARSE:
1056 pos = 386
1057 structs = []
1058 for i in range(4):
1059 try:
1060 offset = nti(buf[pos:pos + 12])
1061 numbytes = nti(buf[pos + 12:pos + 24])
1062 except ValueError:
1063 break
1064 structs.append((offset, numbytes))
1065 pos += 24
1066 isextended = bool(buf[482])
1067 origsize = nti(buf[483:495])
1068 obj._sparse_structs = (structs, isextended, origsize)
1069
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070 # Remove redundant slashes from directories.
1071 if obj.isdir():
1072 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001073
Guido van Rossumd8faa362007-04-27 19:54:29 +00001074 # Reconstruct a ustar longname.
1075 if prefix and obj.type not in GNU_TYPES:
1076 obj.name = prefix + "/" + obj.name
1077 return obj
1078
1079 @classmethod
1080 def fromtarfile(cls, tarfile):
1081 """Return the next TarInfo object from TarFile object
1082 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001083 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001084 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001085 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001086 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1087 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001088
Guido van Rossumd8faa362007-04-27 19:54:29 +00001089 #--------------------------------------------------------------------------
1090 # The following are methods that are called depending on the type of a
1091 # member. The entry point is _proc_member() which can be overridden in a
1092 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1093 # implement the following
1094 # operations:
1095 # 1. Set self.offset_data to the position where the data blocks begin,
1096 # if there is data that follows.
1097 # 2. Set tarfile.offset to the position where the next member's header will
1098 # begin.
1099 # 3. Return self or another valid TarInfo object.
1100 def _proc_member(self, tarfile):
1101 """Choose the right processing method depending on
1102 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001103 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1105 return self._proc_gnulong(tarfile)
1106 elif self.type == GNUTYPE_SPARSE:
1107 return self._proc_sparse(tarfile)
1108 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1109 return self._proc_pax(tarfile)
1110 else:
1111 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001112
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113 def _proc_builtin(self, tarfile):
1114 """Process a builtin type or an unknown type which
1115 will be treated as a regular file.
1116 """
1117 self.offset_data = tarfile.fileobj.tell()
1118 offset = self.offset_data
1119 if self.isreg() or self.type not in SUPPORTED_TYPES:
1120 # Skip the following data blocks.
1121 offset += self._block(self.size)
1122 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001123
Guido van Rossume7ba4952007-06-06 23:52:48 +00001124 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001125 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001126 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001127
1128 return self
1129
1130 def _proc_gnulong(self, tarfile):
1131 """Process the blocks that hold a GNU longname
1132 or longlink member.
1133 """
1134 buf = tarfile.fileobj.read(self._block(self.size))
1135
1136 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001137 try:
1138 next = self.fromtarfile(tarfile)
1139 except HeaderError:
1140 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001141
1142 # Patch the TarInfo object from the next header with
1143 # the longname information.
1144 next.offset = self.offset
1145 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001146 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001147 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001148 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001149
1150 return next
1151
1152 def _proc_sparse(self, tarfile):
1153 """Process a GNU sparse header plus extra headers.
1154 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001155 # We already collected some sparse structures in frombuf().
1156 structs, isextended, origsize = self._sparse_structs
1157 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001159 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001160 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001161 buf = tarfile.fileobj.read(BLOCKSIZE)
1162 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001163 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001164 try:
1165 offset = nti(buf[pos:pos + 12])
1166 numbytes = nti(buf[pos + 12:pos + 24])
1167 except ValueError:
1168 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001169 if offset and numbytes:
1170 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001171 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001172 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001173 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001174
1175 self.offset_data = tarfile.fileobj.tell()
1176 tarfile.offset = self.offset_data + self._block(self.size)
1177 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001178 return self
1179
1180 def _proc_pax(self, tarfile):
1181 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001182 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001183 """
1184 # Read the header information.
1185 buf = tarfile.fileobj.read(self._block(self.size))
1186
1187 # A pax header stores supplemental information for either
1188 # the following file (extended) or all following files
1189 # (global).
1190 if self.type == XGLTYPE:
1191 pax_headers = tarfile.pax_headers
1192 else:
1193 pax_headers = tarfile.pax_headers.copy()
1194
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001195 # Check if the pax header contains a hdrcharset field. This tells us
1196 # the encoding of the path, linkpath, uname and gname fields. Normally,
1197 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1198 # implementations are allowed to store them as raw binary strings if
1199 # the translation to UTF-8 fails.
1200 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1201 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001202 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001203
1204 # For the time being, we don't care about anything other than "BINARY".
1205 # The only other value that is currently allowed by the standard is
1206 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1207 hdrcharset = pax_headers.get("hdrcharset")
1208 if hdrcharset == "BINARY":
1209 encoding = tarfile.encoding
1210 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001211 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001212
Guido van Rossumd8faa362007-04-27 19:54:29 +00001213 # Parse pax header information. A record looks like that:
1214 # "%d %s=%s\n" % (length, keyword, value). length is the size
1215 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001216 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001217 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001218 pos = 0
1219 while True:
1220 match = regex.match(buf, pos)
1221 if not match:
1222 break
1223
1224 length, keyword = match.groups()
1225 length = int(length)
1226 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1227
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001228 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001229 # as the error handler, but we better not take the risk. For
1230 # example, GNU tar <= 1.23 is known to store filenames it cannot
1231 # translate to UTF-8 as raw strings (unfortunately without a
1232 # hdrcharset=BINARY header).
1233 # We first try the strict standard encoding, and if that fails we
1234 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001235 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001236 tarfile.errors)
1237 if keyword in PAX_NAME_FIELDS:
1238 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1239 tarfile.errors)
1240 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001241 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001242 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001243
1244 pax_headers[keyword] = value
1245 pos += length
1246
Guido van Rossume7ba4952007-06-06 23:52:48 +00001247 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001248 try:
1249 next = self.fromtarfile(tarfile)
1250 except HeaderError:
1251 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001252
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001253 # Process GNU sparse information.
1254 if "GNU.sparse.map" in pax_headers:
1255 # GNU extended sparse format version 0.1.
1256 self._proc_gnusparse_01(next, pax_headers)
1257
1258 elif "GNU.sparse.size" in pax_headers:
1259 # GNU extended sparse format version 0.0.
1260 self._proc_gnusparse_00(next, pax_headers, buf)
1261
1262 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1263 # GNU extended sparse format version 1.0.
1264 self._proc_gnusparse_10(next, pax_headers, tarfile)
1265
Guido van Rossume7ba4952007-06-06 23:52:48 +00001266 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001267 # Patch the TarInfo object with the extended header info.
1268 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1269 next.offset = self.offset
1270
1271 if "size" in pax_headers:
1272 # If the extended header replaces the size field,
1273 # we need to recalculate the offset where the next
1274 # header starts.
1275 offset = next.offset_data
1276 if next.isreg() or next.type not in SUPPORTED_TYPES:
1277 offset += next._block(next.size)
1278 tarfile.offset = offset
1279
1280 return next
1281
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001282 def _proc_gnusparse_00(self, next, pax_headers, buf):
1283 """Process a GNU tar extended sparse header, version 0.0.
1284 """
1285 offsets = []
1286 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1287 offsets.append(int(match.group(1)))
1288 numbytes = []
1289 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1290 numbytes.append(int(match.group(1)))
1291 next.sparse = list(zip(offsets, numbytes))
1292
1293 def _proc_gnusparse_01(self, next, pax_headers):
1294 """Process a GNU tar extended sparse header, version 0.1.
1295 """
1296 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1297 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1298
1299 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1300 """Process a GNU tar extended sparse header, version 1.0.
1301 """
1302 fields = None
1303 sparse = []
1304 buf = tarfile.fileobj.read(BLOCKSIZE)
1305 fields, buf = buf.split(b"\n", 1)
1306 fields = int(fields)
1307 while len(sparse) < fields * 2:
1308 if b"\n" not in buf:
1309 buf += tarfile.fileobj.read(BLOCKSIZE)
1310 number, buf = buf.split(b"\n", 1)
1311 sparse.append(int(number))
1312 next.offset_data = tarfile.fileobj.tell()
1313 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1314
Guido van Rossume7ba4952007-06-06 23:52:48 +00001315 def _apply_pax_info(self, pax_headers, encoding, errors):
1316 """Replace fields with supplemental information from a previous
1317 pax extended or global header.
1318 """
1319 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001320 if keyword == "GNU.sparse.name":
1321 setattr(self, "path", value)
1322 elif keyword == "GNU.sparse.size":
1323 setattr(self, "size", int(value))
1324 elif keyword == "GNU.sparse.realsize":
1325 setattr(self, "size", int(value))
1326 elif keyword in PAX_FIELDS:
1327 if keyword in PAX_NUMBER_FIELDS:
1328 try:
1329 value = PAX_NUMBER_FIELDS[keyword](value)
1330 except ValueError:
1331 value = 0
1332 if keyword == "path":
1333 value = value.rstrip("/")
1334 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001335
1336 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001337
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001338 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1339 """Decode a single field from a pax record.
1340 """
1341 try:
1342 return value.decode(encoding, "strict")
1343 except UnicodeDecodeError:
1344 return value.decode(fallback_encoding, fallback_errors)
1345
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346 def _block(self, count):
1347 """Round up a byte count by BLOCKSIZE and return it,
1348 e.g. _block(834) => 1024.
1349 """
1350 blocks, remainder = divmod(count, BLOCKSIZE)
1351 if remainder:
1352 blocks += 1
1353 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001354
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001355 def isreg(self):
1356 return self.type in REGULAR_TYPES
1357 def isfile(self):
1358 return self.isreg()
1359 def isdir(self):
1360 return self.type == DIRTYPE
1361 def issym(self):
1362 return self.type == SYMTYPE
1363 def islnk(self):
1364 return self.type == LNKTYPE
1365 def ischr(self):
1366 return self.type == CHRTYPE
1367 def isblk(self):
1368 return self.type == BLKTYPE
1369 def isfifo(self):
1370 return self.type == FIFOTYPE
1371 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001372 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def isdev(self):
1374 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1375# class TarInfo
1376
1377class TarFile(object):
1378 """The TarFile Class provides an interface to tar archives.
1379 """
1380
1381 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1382
1383 dereference = False # If true, add content of linked file to the
1384 # tar file, else the link.
1385
1386 ignore_zeros = False # If true, skips empty or invalid blocks and
1387 # continues processing.
1388
Lars Gustäbel365aff32009-12-13 11:42:29 +00001389 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001390 # messages (if debug >= 0). If > 0, errors
1391 # are passed to the caller as exceptions.
1392
Guido van Rossumd8faa362007-04-27 19:54:29 +00001393 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001394
Guido van Rossume7ba4952007-06-06 23:52:48 +00001395 encoding = ENCODING # Encoding for 8-bit character strings.
1396
1397 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001398
Guido van Rossumd8faa362007-04-27 19:54:29 +00001399 tarinfo = TarInfo # The default TarInfo class to use.
1400
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001401 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001402
1403 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1404 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001405 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001406 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1407 read from an existing archive, 'a' to append data to an existing
1408 file or 'w' to create a new file overwriting an existing one. `mode'
1409 defaults to 'r'.
1410 If `fileobj' is given, it is used for reading or writing data. If it
1411 can be determined, `mode' is overridden by `fileobj's mode.
1412 `fileobj' is not closed, when TarFile is closed.
1413 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001414 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1415 if mode not in modes:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001416 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001417 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001418 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001419
1420 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001421 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001422 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001423 self.mode = "w"
1424 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001425 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 self._extfileobj = False
1427 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001428 if (name is None and hasattr(fileobj, "name") and
1429 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001430 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001431 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001432 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001433 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001434 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001435 self.fileobj = fileobj
1436
Guido van Rossumd8faa362007-04-27 19:54:29 +00001437 # Init attributes.
1438 if format is not None:
1439 self.format = format
1440 if tarinfo is not None:
1441 self.tarinfo = tarinfo
1442 if dereference is not None:
1443 self.dereference = dereference
1444 if ignore_zeros is not None:
1445 self.ignore_zeros = ignore_zeros
1446 if encoding is not None:
1447 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001448 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001449
1450 if pax_headers is not None and self.format == PAX_FORMAT:
1451 self.pax_headers = pax_headers
1452 else:
1453 self.pax_headers = {}
1454
Guido van Rossumd8faa362007-04-27 19:54:29 +00001455 if debug is not None:
1456 self.debug = debug
1457 if errorlevel is not None:
1458 self.errorlevel = errorlevel
1459
1460 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001461 self.closed = False
1462 self.members = [] # list of members as TarInfo objects
1463 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001464 self.offset = self.fileobj.tell()
1465 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001466 self.inodes = {} # dictionary caching the inodes of
1467 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001468
Lars Gustäbel7b465392009-11-18 20:29:25 +00001469 try:
1470 if self.mode == "r":
1471 self.firstmember = None
1472 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001473
Lars Gustäbel7b465392009-11-18 20:29:25 +00001474 if self.mode == "a":
1475 # Move to the end of the archive,
1476 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001477 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001478 self.fileobj.seek(self.offset)
1479 try:
1480 tarinfo = self.tarinfo.fromtarfile(self)
1481 self.members.append(tarinfo)
1482 except EOFHeaderError:
1483 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001484 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001485 except HeaderError as e:
1486 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001487
Lars Gustäbel7b465392009-11-18 20:29:25 +00001488 if self.mode in "aw":
1489 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001490
Lars Gustäbel7b465392009-11-18 20:29:25 +00001491 if self.pax_headers:
1492 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1493 self.fileobj.write(buf)
1494 self.offset += len(buf)
1495 except:
1496 if not self._extfileobj:
1497 self.fileobj.close()
1498 self.closed = True
1499 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001500
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001501 #--------------------------------------------------------------------------
1502 # Below are the classmethods which act as alternate constructors to the
1503 # TarFile class. The open() method is the only one that is needed for
1504 # public use; it is the "super"-constructor and is able to select an
1505 # adequate "sub"-constructor for a particular compression using the mapping
1506 # from OPEN_METH.
1507 #
1508 # This concept allows one to subclass TarFile without losing the comfort of
1509 # the super-constructor. A sub-constructor is registered and made available
1510 # by adding it to the mapping in OPEN_METH.
1511
Guido van Rossum75b64e62005-01-16 00:16:11 +00001512 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001513 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001514 """Open a tar archive for reading, writing or appending. Return
1515 an appropriate TarFile class.
1516
1517 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001518 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001519 'r:' open for reading exclusively uncompressed
1520 'r:gz' open for reading with gzip compression
1521 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001522 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001523 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001524 'w' or 'w:' open for writing without compression
1525 'w:gz' open for writing with gzip compression
1526 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001527 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001528
1529 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001530 'r|' open an uncompressed stream of tar blocks for reading
1531 'r|gz' open a gzip compressed stream of tar blocks
1532 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001533 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001534 'w|' open an uncompressed stream for writing
1535 'w|gz' open a gzip compressed stream for writing
1536 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001537 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001538 """
1539
1540 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001541 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001542
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001543 if mode in ("r", "r:*"):
1544 # Find out which *open() is appropriate for opening the file.
1545 for comptype in cls.OPEN_METH:
1546 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001547 if fileobj is not None:
1548 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001549 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001550 return func(name, "r", fileobj, **kwargs)
1551 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001552 if fileobj is not None:
1553 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001554 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001555 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001556
1557 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001558 filemode, comptype = mode.split(":", 1)
1559 filemode = filemode or "r"
1560 comptype = comptype or "tar"
1561
1562 # Select the *open() function according to
1563 # given compression.
1564 if comptype in cls.OPEN_METH:
1565 func = getattr(cls, cls.OPEN_METH[comptype])
1566 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001567 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001568 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001569
1570 elif "|" in mode:
1571 filemode, comptype = mode.split("|", 1)
1572 filemode = filemode or "r"
1573 comptype = comptype or "tar"
1574
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001575 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001576 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001577
Antoine Pitrou605c2932010-09-23 20:15:14 +00001578 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1579 try:
1580 t = cls(name, filemode, stream, **kwargs)
1581 except:
1582 stream.close()
1583 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001584 t._extfileobj = False
1585 return t
1586
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001587 elif mode in ("a", "w"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001588 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Thomas Wouters477c8d52006-05-27 19:21:47 +00001590 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001591
Guido van Rossum75b64e62005-01-16 00:16:11 +00001592 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001593 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001594 """Open uncompressed tar archive name for reading or writing.
1595 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001596 if mode not in ("r", "a", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001597 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001598 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001599
Guido van Rossum75b64e62005-01-16 00:16:11 +00001600 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001601 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001602 """Open gzip compressed tar archive name for reading or writing.
1603 Appending is not allowed.
1604 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001605 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001606 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607
1608 try:
1609 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001610 gzip.GzipFile
1611 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001612 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001614 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001615 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001616 except OSError:
1617 if fileobj is not None and mode == 'r':
1618 raise ReadError("not a gzip file")
1619 raise
1620
1621 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001622 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001623 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001624 fileobj.close()
1625 if mode == 'r':
1626 raise ReadError("not a gzip file")
1627 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001628 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001629 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001630 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001631 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001632 return t
1633
Guido van Rossum75b64e62005-01-16 00:16:11 +00001634 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001635 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001636 """Open bzip2 compressed tar archive name for reading or writing.
1637 Appending is not allowed.
1638 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001639 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001640 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001641
1642 try:
1643 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001644 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001645 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001646
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001647 fileobj = bz2.BZ2File(fileobj or name, mode,
1648 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001649
1650 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001651 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001652 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001653 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001654 if mode == 'r':
1655 raise ReadError("not a bzip2 file")
1656 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001657 except:
1658 fileobj.close()
1659 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001660 t._extfileobj = False
1661 return t
1662
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001663 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001664 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001665 """Open lzma compressed tar archive name for reading or writing.
1666 Appending is not allowed.
1667 """
1668 if mode not in ("r", "w"):
1669 raise ValueError("mode must be 'r' or 'w'")
1670
1671 try:
1672 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001673 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001674 raise CompressionError("lzma module is not available")
1675
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001676 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001677
1678 try:
1679 t = cls.taropen(name, mode, fileobj, **kwargs)
1680 except (lzma.LZMAError, EOFError):
1681 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001682 if mode == 'r':
1683 raise ReadError("not an lzma file")
1684 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001685 except:
1686 fileobj.close()
1687 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001688 t._extfileobj = False
1689 return t
1690
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001691 # All *open() methods are registered here.
1692 OPEN_METH = {
1693 "tar": "taropen", # uncompressed tar
1694 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001695 "bz2": "bz2open", # bzip2 compressed tar
1696 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001697 }
1698
1699 #--------------------------------------------------------------------------
1700 # The public methods which TarFile provides:
1701
1702 def close(self):
1703 """Close the TarFile. In write-mode, two finishing zero blocks are
1704 appended to the archive.
1705 """
1706 if self.closed:
1707 return
1708
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 self.closed = True
Serhiy Storchaka7e7a3db2015-04-10 13:24:41 +03001710 try:
1711 if self.mode in "aw":
1712 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1713 self.offset += (BLOCKSIZE * 2)
1714 # fill up the end with zero-blocks
1715 # (like option -b20 for tar does)
1716 blocks, remainder = divmod(self.offset, RECORDSIZE)
1717 if remainder > 0:
1718 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1719 finally:
1720 if not self._extfileobj:
1721 self.fileobj.close()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001722
1723 def getmember(self, name):
1724 """Return a TarInfo object for member `name'. If `name' can not be
1725 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001726 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001727 most up-to-date version.
1728 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001729 tarinfo = self._getmember(name)
1730 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001731 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001732 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001733
1734 def getmembers(self):
1735 """Return the members of the archive as a list of TarInfo objects. The
1736 list has the same order as the members in the archive.
1737 """
1738 self._check()
1739 if not self._loaded: # if we want to obtain a list of
1740 self._load() # all members, we first have to
1741 # scan the whole archive.
1742 return self.members
1743
1744 def getnames(self):
1745 """Return the members of the archive as a list of their names. It has
1746 the same order as the list returned by getmembers().
1747 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001748 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001749
1750 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1751 """Create a TarInfo object for either the file `name' or the file
1752 object `fileobj' (using os.fstat on its file descriptor). You can
1753 modify some of the TarInfo's attributes before you add it using
1754 addfile(). If given, `arcname' specifies an alternative name for the
1755 file in the archive.
1756 """
1757 self._check("aw")
1758
1759 # When fileobj is given, replace name by
1760 # fileobj's real name.
1761 if fileobj is not None:
1762 name = fileobj.name
1763
1764 # Building the name of the member in the archive.
1765 # Backward slashes are converted to forward slashes,
1766 # Absolute paths are turned to relative paths.
1767 if arcname is None:
1768 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001770 arcname = arcname.replace(os.sep, "/")
1771 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001772
1773 # Now, fill the TarInfo object with
1774 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001775 tarinfo = self.tarinfo()
1776 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001777
1778 # Use os.stat or os.lstat, depending on platform
1779 # and if symlinks shall be resolved.
1780 if fileobj is None:
1781 if hasattr(os, "lstat") and not self.dereference:
1782 statres = os.lstat(name)
1783 else:
1784 statres = os.stat(name)
1785 else:
1786 statres = os.fstat(fileobj.fileno())
1787 linkname = ""
1788
1789 stmd = statres.st_mode
1790 if stat.S_ISREG(stmd):
1791 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001792 if not self.dereference and statres.st_nlink > 1 and \
1793 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001794 # Is it a hardlink to an already
1795 # archived file?
1796 type = LNKTYPE
1797 linkname = self.inodes[inode]
1798 else:
1799 # The inode is added only if its valid.
1800 # For win32 it is always 0.
1801 type = REGTYPE
1802 if inode[0]:
1803 self.inodes[inode] = arcname
1804 elif stat.S_ISDIR(stmd):
1805 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001806 elif stat.S_ISFIFO(stmd):
1807 type = FIFOTYPE
1808 elif stat.S_ISLNK(stmd):
1809 type = SYMTYPE
1810 linkname = os.readlink(name)
1811 elif stat.S_ISCHR(stmd):
1812 type = CHRTYPE
1813 elif stat.S_ISBLK(stmd):
1814 type = BLKTYPE
1815 else:
1816 return None
1817
1818 # Fill the TarInfo object with all
1819 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001820 tarinfo.name = arcname
1821 tarinfo.mode = stmd
1822 tarinfo.uid = statres.st_uid
1823 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001824 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001825 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001826 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001827 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001829 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001830 tarinfo.linkname = linkname
1831 if pwd:
1832 try:
1833 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1834 except KeyError:
1835 pass
1836 if grp:
1837 try:
1838 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1839 except KeyError:
1840 pass
1841
1842 if type in (CHRTYPE, BLKTYPE):
1843 if hasattr(os, "major") and hasattr(os, "minor"):
1844 tarinfo.devmajor = os.major(statres.st_rdev)
1845 tarinfo.devminor = os.minor(statres.st_rdev)
1846 return tarinfo
1847
1848 def list(self, verbose=True):
1849 """Print a table of contents to sys.stdout. If `verbose' is False, only
1850 the names of the members are printed. If it is True, an `ls -l'-like
1851 output is produced.
1852 """
1853 self._check()
1854
1855 for tarinfo in self:
1856 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001857 _safe_print(stat.filemode(tarinfo.mode))
1858 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1859 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001861 _safe_print("%10s" %
1862 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001863 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001864 _safe_print("%10d" % tarinfo.size)
1865 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1866 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001867
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001868 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001869
1870 if verbose:
1871 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001872 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001873 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001874 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001875 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876
Raymond Hettingera63a3122011-01-26 20:34:14 +00001877 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001878 """Add the file `name' to the archive. `name' may be any type of file
1879 (directory, fifo, symbolic link, etc.). If given, `arcname'
1880 specifies an alternative name for the file in the archive.
1881 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001882 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001883 return True for each filename to be excluded. `filter' is a function
1884 that expects a TarInfo object argument and returns the changed
1885 TarInfo object, if it returns None the TarInfo object will be
1886 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001887 """
1888 self._check("aw")
1889
1890 if arcname is None:
1891 arcname = name
1892
Guido van Rossum486364b2007-06-30 05:01:58 +00001893 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001894 if exclude is not None:
1895 import warnings
1896 warnings.warn("use the filter argument instead",
1897 DeprecationWarning, 2)
1898 if exclude(name):
1899 self._dbg(2, "tarfile: Excluded %r" % name)
1900 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001901
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001902 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001903 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904 self._dbg(2, "tarfile: Skipped %r" % name)
1905 return
1906
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001907 self._dbg(1, name)
1908
1909 # Create a TarInfo object from the file.
1910 tarinfo = self.gettarinfo(name, arcname)
1911
1912 if tarinfo is None:
1913 self._dbg(1, "tarfile: Unsupported type %r" % name)
1914 return
1915
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001916 # Change or exclude the TarInfo object.
1917 if filter is not None:
1918 tarinfo = filter(tarinfo)
1919 if tarinfo is None:
1920 self._dbg(2, "tarfile: Excluded %r" % name)
1921 return
1922
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001923 # Append the tar header and data to the archive.
1924 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001925 with bltn_open(name, "rb") as f:
1926 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001928 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001929 self.addfile(tarinfo)
1930 if recursive:
1931 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001932 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001933 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001934
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001935 else:
1936 self.addfile(tarinfo)
1937
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001938 def addfile(self, tarinfo, fileobj=None):
1939 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1940 given, tarinfo.size bytes are read from it and added to the archive.
1941 You can create TarInfo objects using gettarinfo().
1942 On Windows platforms, `fileobj' should always be opened with mode
1943 'rb' to avoid irritation about the file size.
1944 """
1945 self._check("aw")
1946
Thomas Wouters89f507f2006-12-13 04:49:30 +00001947 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001948
Guido van Rossume7ba4952007-06-06 23:52:48 +00001949 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001950 self.fileobj.write(buf)
1951 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001952
1953 # If there's data to follow, append it.
1954 if fileobj is not None:
1955 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1956 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1957 if remainder > 0:
1958 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1959 blocks += 1
1960 self.offset += blocks * BLOCKSIZE
1961
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001962 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001963
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001964 def extractall(self, path=".", members=None):
1965 """Extract all members from the archive to the current working
1966 directory and set owner, modification time and permissions on
1967 directories afterwards. `path' specifies a different directory
1968 to extract to. `members' is optional and must be a subset of the
1969 list returned by getmembers().
1970 """
1971 directories = []
1972
1973 if members is None:
1974 members = self
1975
1976 for tarinfo in members:
1977 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001978 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001979 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001980 tarinfo = copy.copy(tarinfo)
1981 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001982 # Do not set_attrs directories, as we will do that further down
1983 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001984
1985 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001986 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001987 directories.reverse()
1988
1989 # Set correct owner, mtime and filemode on directories.
1990 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001991 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001992 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001993 self.chown(tarinfo, dirpath)
1994 self.utime(tarinfo, dirpath)
1995 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00001996 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001997 if self.errorlevel > 1:
1998 raise
1999 else:
2000 self._dbg(1, "tarfile: %s" % e)
2001
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002002 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002003 """Extract a member from the archive to the current working directory,
2004 using its full name. Its file information is extracted as accurately
2005 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002006 specify a different directory using `path'. File attributes (owner,
2007 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002008 """
2009 self._check("r")
2010
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002011 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002013 else:
2014 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002015
Neal Norwitza4f651a2004-07-20 22:07:44 +00002016 # Prepare the link target for makelink().
2017 if tarinfo.islnk():
2018 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2019
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002020 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002021 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2022 set_attrs=set_attrs)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002023 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002024 if self.errorlevel > 0:
2025 raise
2026 else:
2027 if e.filename is None:
2028 self._dbg(1, "tarfile: %s" % e.strerror)
2029 else:
2030 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002031 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002032 if self.errorlevel > 1:
2033 raise
2034 else:
2035 self._dbg(1, "tarfile: %s" % e)
2036
2037 def extractfile(self, member):
2038 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002039 a filename or a TarInfo object. If `member' is a regular file or a
2040 link, an io.BufferedReader object is returned. Otherwise, None is
2041 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002042 """
2043 self._check("r")
2044
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002045 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002047 else:
2048 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002049
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002050 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2051 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002052 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002053
2054 elif tarinfo.islnk() or tarinfo.issym():
2055 if isinstance(self.fileobj, _Stream):
2056 # A small but ugly workaround for the case that someone tries
2057 # to extract a (sym)link as a file-object from a non-seekable
2058 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002059 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002060 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002061 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002062 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002063 else:
2064 # If there's no data associated with the member (directory, chrdev,
2065 # blkdev, etc.), return None instead of a file object.
2066 return None
2067
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002068 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069 """Extract the TarInfo object tarinfo to a physical
2070 file called targetpath.
2071 """
2072 # Fetch the TarInfo object for the given name
2073 # and build the destination pathname, replacing
2074 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002075 targetpath = targetpath.rstrip("/")
2076 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002077
2078 # Create all upper directories.
2079 upperdirs = os.path.dirname(targetpath)
2080 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002081 # Create directories that are not part of the archive with
2082 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002083 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002084
2085 if tarinfo.islnk() or tarinfo.issym():
2086 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2087 else:
2088 self._dbg(1, tarinfo.name)
2089
2090 if tarinfo.isreg():
2091 self.makefile(tarinfo, targetpath)
2092 elif tarinfo.isdir():
2093 self.makedir(tarinfo, targetpath)
2094 elif tarinfo.isfifo():
2095 self.makefifo(tarinfo, targetpath)
2096 elif tarinfo.ischr() or tarinfo.isblk():
2097 self.makedev(tarinfo, targetpath)
2098 elif tarinfo.islnk() or tarinfo.issym():
2099 self.makelink(tarinfo, targetpath)
2100 elif tarinfo.type not in SUPPORTED_TYPES:
2101 self.makeunknown(tarinfo, targetpath)
2102 else:
2103 self.makefile(tarinfo, targetpath)
2104
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002105 if set_attrs:
2106 self.chown(tarinfo, targetpath)
2107 if not tarinfo.issym():
2108 self.chmod(tarinfo, targetpath)
2109 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002110
2111 #--------------------------------------------------------------------------
2112 # Below are the different file methods. They are called via
2113 # _extract_member() when extract() is called. They can be replaced in a
2114 # subclass to implement other functionality.
2115
2116 def makedir(self, tarinfo, targetpath):
2117 """Make a directory called targetpath.
2118 """
2119 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002120 # Use a safe mode for the directory, the real mode is set
2121 # later in _extract_member().
2122 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002123 except FileExistsError:
2124 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002125
2126 def makefile(self, tarinfo, targetpath):
2127 """Make a file called targetpath.
2128 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002129 source = self.fileobj
2130 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002131 with bltn_open(targetpath, "wb") as target:
2132 if tarinfo.sparse is not None:
2133 for offset, size in tarinfo.sparse:
2134 target.seek(offset)
2135 copyfileobj(source, target, size)
2136 else:
2137 copyfileobj(source, target, tarinfo.size)
2138 target.seek(tarinfo.size)
2139 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002140
2141 def makeunknown(self, tarinfo, targetpath):
2142 """Make a file from a TarInfo object with an unknown type
2143 at targetpath.
2144 """
2145 self.makefile(tarinfo, targetpath)
2146 self._dbg(1, "tarfile: Unknown file type %r, " \
2147 "extracted as regular file." % tarinfo.type)
2148
2149 def makefifo(self, tarinfo, targetpath):
2150 """Make a fifo called targetpath.
2151 """
2152 if hasattr(os, "mkfifo"):
2153 os.mkfifo(targetpath)
2154 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002155 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002156
2157 def makedev(self, tarinfo, targetpath):
2158 """Make a character or block device called targetpath.
2159 """
2160 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002162
2163 mode = tarinfo.mode
2164 if tarinfo.isblk():
2165 mode |= stat.S_IFBLK
2166 else:
2167 mode |= stat.S_IFCHR
2168
2169 os.mknod(targetpath, mode,
2170 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2171
2172 def makelink(self, tarinfo, targetpath):
2173 """Make a (symbolic) link called targetpath. If it cannot be created
2174 (platform limitation), we try to make a copy of the referenced file
2175 instead of a link.
2176 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002177 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002178 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002179 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002180 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002181 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002182 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002183 if os.path.exists(tarinfo._link_target):
2184 os.link(tarinfo._link_target, targetpath)
2185 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002186 self._extract_member(self._find_link_target(tarinfo),
2187 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002188 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002189 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002190 self._extract_member(self._find_link_target(tarinfo),
2191 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002192 except KeyError:
2193 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002194
2195 def chown(self, tarinfo, targetpath):
2196 """Set owner of targetpath according to tarinfo.
2197 """
2198 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2199 # We have to be root to do so.
2200 try:
2201 g = grp.getgrnam(tarinfo.gname)[2]
2202 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002203 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002204 try:
2205 u = pwd.getpwnam(tarinfo.uname)[2]
2206 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002207 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002208 try:
2209 if tarinfo.issym() and hasattr(os, "lchown"):
2210 os.lchown(targetpath, u, g)
2211 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002212 os.chown(targetpath, u, g)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002213 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002214 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002215
2216 def chmod(self, tarinfo, targetpath):
2217 """Set file permissions of targetpath according to tarinfo.
2218 """
Jack Jansen834eff62003-03-07 12:47:06 +00002219 if hasattr(os, 'chmod'):
2220 try:
2221 os.chmod(targetpath, tarinfo.mode)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002222 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002223 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002224
2225 def utime(self, tarinfo, targetpath):
2226 """Set modification time of targetpath according to tarinfo.
2227 """
Jack Jansen834eff62003-03-07 12:47:06 +00002228 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002229 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002230 try:
2231 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002232 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002233 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002234
2235 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002236 def next(self):
2237 """Return the next member of the archive as a TarInfo object, when
2238 TarFile is opened for reading. Return None if there is no more
2239 available.
2240 """
2241 self._check("ra")
2242 if self.firstmember is not None:
2243 m = self.firstmember
2244 self.firstmember = None
2245 return m
2246
2247 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002248 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002249 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002250 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002251 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002252 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002253 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002254 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002255 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002256 self.offset += BLOCKSIZE
2257 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002258 except InvalidHeaderError as e:
2259 if self.ignore_zeros:
2260 self._dbg(2, "0x%X: %s" % (self.offset, e))
2261 self.offset += BLOCKSIZE
2262 continue
2263 elif self.offset == 0:
2264 raise ReadError(str(e))
2265 except EmptyHeaderError:
2266 if self.offset == 0:
2267 raise ReadError("empty file")
2268 except TruncatedHeaderError as e:
2269 if self.offset == 0:
2270 raise ReadError(str(e))
2271 except SubsequentHeaderError as e:
2272 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273 break
2274
Lars Gustäbel9520a432009-11-22 18:48:49 +00002275 if tarinfo is not None:
2276 self.members.append(tarinfo)
2277 else:
2278 self._loaded = True
2279
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002281
2282 #--------------------------------------------------------------------------
2283 # Little helper methods:
2284
Lars Gustäbel1b512722010-06-03 12:45:16 +00002285 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002286 """Find an archive member by name from bottom to top.
2287 If tarinfo is given, it is used as the starting point.
2288 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002289 # Ensure that all members have been loaded.
2290 members = self.getmembers()
2291
Lars Gustäbel1b512722010-06-03 12:45:16 +00002292 # Limit the member search list up to tarinfo.
2293 if tarinfo is not None:
2294 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002295
Lars Gustäbel1b512722010-06-03 12:45:16 +00002296 if normalize:
2297 name = os.path.normpath(name)
2298
2299 for member in reversed(members):
2300 if normalize:
2301 member_name = os.path.normpath(member.name)
2302 else:
2303 member_name = member.name
2304
2305 if name == member_name:
2306 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002307
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002308 def _load(self):
2309 """Read through the entire archive file and look for readable
2310 members.
2311 """
2312 while True:
2313 tarinfo = self.next()
2314 if tarinfo is None:
2315 break
2316 self._loaded = True
2317
2318 def _check(self, mode=None):
2319 """Check if TarFile is still open, and if the operation's mode
2320 corresponds to TarFile's mode.
2321 """
2322 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002323 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002324 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002325 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002326
Lars Gustäbel1b512722010-06-03 12:45:16 +00002327 def _find_link_target(self, tarinfo):
2328 """Find the target member of a symlink or hardlink member in the
2329 archive.
2330 """
2331 if tarinfo.issym():
2332 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002333 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002334 limit = None
2335 else:
2336 # Search the archive before the link, because a hard link is
2337 # just a reference to an already archived file.
2338 linkname = tarinfo.linkname
2339 limit = tarinfo
2340
2341 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2342 if member is None:
2343 raise KeyError("linkname %r not found" % linkname)
2344 return member
2345
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346 def __iter__(self):
2347 """Provide an iterator object.
2348 """
2349 if self._loaded:
2350 return iter(self.members)
2351 else:
2352 return TarIter(self)
2353
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002354 def _dbg(self, level, msg):
2355 """Write debugging output to sys.stderr.
2356 """
2357 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002358 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002359
2360 def __enter__(self):
2361 self._check()
2362 return self
2363
2364 def __exit__(self, type, value, traceback):
2365 if type is None:
2366 self.close()
2367 else:
2368 # An exception occurred. We must not call close() because
2369 # it would try to write end-of-archive blocks and padding.
2370 if not self._extfileobj:
2371 self.fileobj.close()
2372 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002373# class TarFile
2374
2375class TarIter:
2376 """Iterator Class.
2377
2378 for tarinfo in TarFile(...):
2379 suite...
2380 """
2381
2382 def __init__(self, tarfile):
2383 """Construct a TarIter object.
2384 """
2385 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002386 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002387 def __iter__(self):
2388 """Return iterator object.
2389 """
2390 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002391 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002392 """Return the next item using TarFile's next() method.
2393 When all members have been read, set TarFile as _loaded.
2394 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002395 # Fix for SF #1100429: Under rare circumstances it can
2396 # happen that getmembers() is called during iteration,
2397 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002398
2399 if self.index == 0 and self.tarfile.firstmember is not None:
2400 tarinfo = self.tarfile.next()
2401 elif self.index < len(self.tarfile.members):
2402 tarinfo = self.tarfile.members[self.index]
2403 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002404 tarinfo = self.tarfile.next()
2405 if not tarinfo:
2406 self.tarfile._loaded = True
2407 raise StopIteration
2408 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002409 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002410 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002411 return tarinfo
2412
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002413#--------------------
2414# exported functions
2415#--------------------
2416def is_tarfile(name):
2417 """Return True if name points to a tar archive that we
2418 are able to handle, else return False.
2419 """
2420 try:
2421 t = open(name)
2422 t.close()
2423 return True
2424 except TarError:
2425 return False
2426
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002427open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002428
2429
2430def main():
2431 import argparse
2432
2433 description = 'A simple command line interface for tarfile module.'
2434 parser = argparse.ArgumentParser(description=description)
2435 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2436 help='Verbose output')
2437 group = parser.add_mutually_exclusive_group()
2438 group.add_argument('-l', '--list', metavar='<tarfile>',
2439 help='Show listing of a tarfile')
2440 group.add_argument('-e', '--extract', nargs='+',
2441 metavar=('<tarfile>', '<output_dir>'),
2442 help='Extract tarfile into target dir')
2443 group.add_argument('-c', '--create', nargs='+',
2444 metavar=('<name>', '<file>'),
2445 help='Create tarfile from sources')
2446 group.add_argument('-t', '--test', metavar='<tarfile>',
2447 help='Test if a tarfile is valid')
2448 args = parser.parse_args()
2449
2450 if args.test:
2451 src = args.test
2452 if is_tarfile(src):
2453 with open(src, 'r') as tar:
2454 tar.getmembers()
2455 print(tar.getmembers(), file=sys.stderr)
2456 if args.verbose:
2457 print('{!r} is a tar archive.'.format(src))
2458 else:
2459 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2460
2461 elif args.list:
2462 src = args.list
2463 if is_tarfile(src):
2464 with TarFile.open(src, 'r:*') as tf:
2465 tf.list(verbose=args.verbose)
2466 else:
2467 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2468
2469 elif args.extract:
2470 if len(args.extract) == 1:
2471 src = args.extract[0]
2472 curdir = os.curdir
2473 elif len(args.extract) == 2:
2474 src, curdir = args.extract
2475 else:
2476 parser.exit(1, parser.format_help())
2477
2478 if is_tarfile(src):
2479 with TarFile.open(src, 'r:*') as tf:
2480 tf.extractall(path=curdir)
2481 if args.verbose:
2482 if curdir == '.':
2483 msg = '{!r} file is extracted.'.format(src)
2484 else:
2485 msg = ('{!r} file is extracted '
2486 'into {!r} directory.').format(src, curdir)
2487 print(msg)
2488 else:
2489 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2490
2491 elif args.create:
2492 tar_name = args.create.pop(0)
2493 _, ext = os.path.splitext(tar_name)
2494 compressions = {
2495 # gz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002496 '.gz': 'gz',
2497 '.tgz': 'gz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002498 # xz
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002499 '.xz': 'xz',
2500 '.txz': 'xz',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002501 # bz2
Serhiy Storchaka832dd5f2015-02-10 08:45:53 +02002502 '.bz2': 'bz2',
2503 '.tbz': 'bz2',
2504 '.tbz2': 'bz2',
2505 '.tb2': 'bz2',
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002506 }
2507 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2508 tar_files = args.create
2509
2510 with TarFile.open(tar_name, tar_mode) as tf:
2511 for file_name in tar_files:
2512 tf.add(file_name)
2513
2514 if args.verbose:
2515 print('{!r} file created.'.format(tar_name))
2516
2517 else:
2518 parser.exit(1, parser.format_help())
2519
2520if __name__ == '__main__':
2521 main()