blob: 3409efea03e968a7d47937d2c78d4175daaead88 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
41import sys
42import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020043import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000044import shutil
45import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000046import time
47import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000048import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000049import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000050
51try:
52 import grp, pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040053except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054 grp = pwd = None
55
Brian Curtin16633fa2010-07-09 13:54:27 +000056# os.symlink on Windows prior to 6.0 raises NotImplementedError
57symlink_exception = (AttributeError, NotImplementedError)
58try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020059 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000060 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020061 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000062except NameError:
63 pass
64
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000065# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Georg Brandl1a3284e2007-12-02 09:40:06 +000068from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000073NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000074BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076GNU_MAGIC = b"ustar \0" # magic gnu tar string
77POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Guido van Rossumd8faa362007-04-27 19:54:29 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelb506dc32007-08-07 18:36:16 +000083REGTYPE = b"0" # regular file
84AREGTYPE = b"\0" # regular file
85LNKTYPE = b"1" # link (inside tarfile)
86SYMTYPE = b"2" # symbolic link
87CHRTYPE = b"3" # character special device
88BLKTYPE = b"4" # block special device
89DIRTYPE = b"5" # directory
90FIFOTYPE = b"6" # fifo special device
91CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092
Lars Gustäbelb506dc32007-08-07 18:36:16 +000093GNUTYPE_LONGNAME = b"L" # GNU tar longname
94GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097XHDTYPE = b"x" # POSIX.1-2001 extended header
98XGLTYPE = b"g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Guido van Rossumd8faa362007-04-27 19:54:29 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000128# Fields from a pax header that are affected by hdrcharset.
129PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
130
Guido van Rossume7ba4952007-06-06 23:52:48 +0000131# Fields in a pax header that are numbers, all other fields
132# are treated as strings.
133PAX_NUMBER_FIELDS = {
134 "atime": float,
135 "ctime": float,
136 "mtime": float,
137 "uid": int,
138 "gid": int,
139 "size": int
140}
141
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000143# initialization
144#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000145if os.name in ("nt", "ce"):
146 ENCODING = "utf-8"
147else:
148 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000149
150#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000151# Some useful functions
152#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000153
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000154def stn(s, length, encoding, errors):
155 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000156 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000157 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000158 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000159
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000160def nts(s, encoding, errors):
161 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000162 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000163 p = s.find(b"\0")
164 if p != -1:
165 s = s[:p]
166 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000167
Thomas Wouters477c8d52006-05-27 19:21:47 +0000168def nti(s):
169 """Convert a number field to a python number.
170 """
171 # There are two possible encodings for a number field, see
172 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200173 if s[0] in (0o200, 0o377):
174 n = 0
175 for i in range(len(s) - 1):
176 n <<= 8
177 n += s[i + 1]
178 if s[0] == 0o377:
179 n = -(256 ** (len(s) - 1) - n)
180 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000181 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000182 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000183 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000184 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000185 return n
186
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000188 """Convert a python number to a number field.
189 """
190 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
191 # octal digits followed by a null-byte, this allows values up to
192 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200193 # that if necessary. A leading 0o200 or 0o377 byte indicate this
194 # particular encoding, the following digits-1 bytes are a big-endian
195 # base-256 representation. This allows values up to (256**(digits-1))-1.
196 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
197 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000198 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800199 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200200 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
201 if n >= 0:
202 s = bytearray([0o200])
203 else:
204 s = bytearray([0o377])
205 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206
Guido van Rossum805365e2007-05-07 22:24:25 +0000207 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200208 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200210 else:
211 raise ValueError("overflow in number field")
212
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 return s
214
215def calc_chksums(buf):
216 """Calculate the checksum for a member's header by summing up all
217 characters except for the chksum field which is treated as if
218 it was filled with spaces. According to the GNU tar sources,
219 some tars (Sun and NeXT) calculate chksum with signed char,
220 which will be different if there are chars in the buffer with
221 the high bit set. So we calculate two checksums, unsigned and
222 signed.
223 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200224 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
225 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000226 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227
228def copyfileobj(src, dst, length=None):
229 """Copy length bytes from fileobj src to fileobj dst.
230 If length is None, copy the entire content.
231 """
232 if length == 0:
233 return
234 if length is None:
235 shutil.copyfileobj(src, dst)
236 return
237
238 BUFSIZE = 16 * 1024
239 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000240 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000241 buf = src.read(BUFSIZE)
242 if len(buf) < BUFSIZE:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200243 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244 dst.write(buf)
245
246 if remainder != 0:
247 buf = src.read(remainder)
248 if len(buf) < remainder:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251 return
252
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000253def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200254 """Deprecated in this location; use stat.filemode."""
255 import warnings
256 warnings.warn("deprecated in favor of stat.filemode",
257 DeprecationWarning, 2)
258 return stat.filemode(mode)
259
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200260def _safe_print(s):
261 encoding = getattr(sys.stdout, 'encoding', None)
262 if encoding is not None:
263 s = s.encode(encoding, 'backslashreplace').decode(encoding)
264 print(s, end=' ')
265
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000266
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000267class TarError(Exception):
268 """Base exception."""
269 pass
270class ExtractError(TarError):
271 """General exception for extract errors."""
272 pass
273class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300274 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000275 pass
276class CompressionError(TarError):
277 """Exception for unavailable compression methods."""
278 pass
279class StreamError(TarError):
280 """Exception for unsupported operations on stream-like TarFiles."""
281 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000282class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000283 """Base exception for header errors."""
284 pass
285class EmptyHeaderError(HeaderError):
286 """Exception for empty headers."""
287 pass
288class TruncatedHeaderError(HeaderError):
289 """Exception for truncated headers."""
290 pass
291class EOFHeaderError(HeaderError):
292 """Exception for end of file headers."""
293 pass
294class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000295 """Exception for invalid headers."""
296 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000297class SubsequentHeaderError(HeaderError):
298 """Exception for missing and invalid extended headers."""
299 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000300
301#---------------------------
302# internal stream interface
303#---------------------------
304class _LowLevelFile:
305 """Low-level file object. Supports reading and writing.
306 It is used instead of a regular file object for streaming
307 access.
308 """
309
310 def __init__(self, name, mode):
311 mode = {
312 "r": os.O_RDONLY,
313 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
314 }[mode]
315 if hasattr(os, "O_BINARY"):
316 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000317 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000318
319 def close(self):
320 os.close(self.fd)
321
322 def read(self, size):
323 return os.read(self.fd, size)
324
325 def write(self, s):
326 os.write(self.fd, s)
327
328class _Stream:
329 """Class that serves as an adapter between TarFile and
330 a stream-like object. The stream-like object only
331 needs to have a read() or write() method and is accessed
332 blockwise. Use of gzip or bzip2 compression is possible.
333 A stream-like object could be for example: sys.stdin,
334 sys.stdout, a socket, a tape device etc.
335
336 _Stream is intended to be used only internally.
337 """
338
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000339 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000340 """Construct a _Stream object.
341 """
342 self._extfileobj = True
343 if fileobj is None:
344 fileobj = _LowLevelFile(name, mode)
345 self._extfileobj = False
346
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000347 if comptype == '*':
348 # Enable transparent compression detection for the
349 # stream interface
350 fileobj = _StreamProxy(fileobj)
351 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000352
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000353 self.name = name or ""
354 self.mode = mode
355 self.comptype = comptype
356 self.fileobj = fileobj
357 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000358 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000359 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000360 self.closed = False
361
Antoine Pitrou605c2932010-09-23 20:15:14 +0000362 try:
363 if comptype == "gz":
364 try:
365 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400366 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000367 raise CompressionError("zlib module is not available")
368 self.zlib = zlib
369 self.crc = zlib.crc32(b"")
370 if mode == "r":
371 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100372 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000373 else:
374 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000375
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100376 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000377 try:
378 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400379 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000380 raise CompressionError("bz2 module is not available")
381 if mode == "r":
382 self.dbuf = b""
383 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200384 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000385 else:
386 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100387
388 elif comptype == "xz":
389 try:
390 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400391 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100392 raise CompressionError("lzma module is not available")
393 if mode == "r":
394 self.dbuf = b""
395 self.cmp = lzma.LZMADecompressor()
396 self.exception = lzma.LZMAError
397 else:
398 self.cmp = lzma.LZMACompressor()
399
400 elif comptype != "tar":
401 raise CompressionError("unknown compression type %r" % comptype)
402
Antoine Pitrou605c2932010-09-23 20:15:14 +0000403 except:
404 if not self._extfileobj:
405 self.fileobj.close()
406 self.closed = True
407 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000408
409 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000410 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000411 self.close()
412
413 def _init_write_gz(self):
414 """Initialize for writing with gzip compression.
415 """
416 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417 -self.zlib.MAX_WBITS,
418 self.zlib.DEF_MEM_LEVEL,
419 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000420 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000421 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000422 if self.name.endswith(".gz"):
423 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000424 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
425 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000426
427 def write(self, s):
428 """Write string s to the stream.
429 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000430 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000431 self.crc = self.zlib.crc32(s, self.crc)
432 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000433 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000434 s = self.cmp.compress(s)
435 self.__write(s)
436
437 def __write(self, s):
438 """Write string s to the stream if a whole new block
439 is ready to be written.
440 """
441 self.buf += s
442 while len(self.buf) > self.bufsize:
443 self.fileobj.write(self.buf[:self.bufsize])
444 self.buf = self.buf[self.bufsize:]
445
446 def close(self):
447 """Close the _Stream object. No operation should be
448 done on it afterwards.
449 """
450 if self.closed:
451 return
452
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000453 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000454 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000455
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000456 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000457 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000458 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000459 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000460 # The native zlib crc is an unsigned 32-bit integer, but
461 # the Python wrapper implicitly casts that to a signed C
462 # long. So, on a 32-bit box self.crc may "look negative",
463 # while the same crc on a 64-bit box may "look positive".
464 # To avoid irksome warnings from the `struct` module, force
465 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000466 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
467 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000468
469 if not self._extfileobj:
470 self.fileobj.close()
471
472 self.closed = True
473
474 def _init_read_gz(self):
475 """Initialize for reading a gzip compressed fileobj.
476 """
477 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000478 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479
480 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000481 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000482 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000483 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000484 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000485
486 flag = ord(self.__read(1))
487 self.__read(6)
488
489 if flag & 4:
490 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
491 self.read(xlen)
492 if flag & 8:
493 while True:
494 s = self.__read(1)
495 if not s or s == NUL:
496 break
497 if flag & 16:
498 while True:
499 s = self.__read(1)
500 if not s or s == NUL:
501 break
502 if flag & 2:
503 self.__read(2)
504
505 def tell(self):
506 """Return the stream's file pointer position.
507 """
508 return self.pos
509
510 def seek(self, pos=0):
511 """Set the stream's file pointer to pos. Negative seeking
512 is forbidden.
513 """
514 if pos - self.pos >= 0:
515 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000516 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000517 self.read(self.bufsize)
518 self.read(remainder)
519 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000520 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000521 return self.pos
522
523 def read(self, size=None):
524 """Return the next size number of bytes from the stream.
525 If size is not defined, return all bytes of the stream
526 up to EOF.
527 """
528 if size is None:
529 t = []
530 while True:
531 buf = self._read(self.bufsize)
532 if not buf:
533 break
534 t.append(buf)
535 buf = "".join(t)
536 else:
537 buf = self._read(size)
538 self.pos += len(buf)
539 return buf
540
541 def _read(self, size):
542 """Return size bytes from the stream.
543 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000544 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000545 return self.__read(size)
546
547 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000548 while c < size:
549 buf = self.__read(self.bufsize)
550 if not buf:
551 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000552 try:
553 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100554 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000555 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000556 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000557 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000558 buf = self.dbuf[:size]
559 self.dbuf = self.dbuf[size:]
560 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561
562 def __read(self, size):
563 """Return size bytes from stream. If internal buffer is empty,
564 read another block from the stream.
565 """
566 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000567 while c < size:
568 buf = self.fileobj.read(self.bufsize)
569 if not buf:
570 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000572 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000573 buf = self.buf[:size]
574 self.buf = self.buf[size:]
575 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000576# class _Stream
577
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000578class _StreamProxy(object):
579 """Small proxy class that enables transparent compression
580 detection for the Stream interface (mode 'r|*').
581 """
582
583 def __init__(self, fileobj):
584 self.fileobj = fileobj
585 self.buf = self.fileobj.read(BLOCKSIZE)
586
587 def read(self, size):
588 self.read = self.fileobj.read
589 return self.buf
590
591 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100592 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000593 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100594 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000595 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100596 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
597 return "xz"
598 else:
599 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000600
601 def close(self):
602 self.fileobj.close()
603# class StreamProxy
604
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000605#------------------------
606# Extraction file object
607#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000608class _FileInFile(object):
609 """A thin wrapper around an existing file object that
610 provides a part of its data as an individual file
611 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000612 """
613
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000614 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000615 self.fileobj = fileobj
616 self.offset = offset
617 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000618 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200619 self.name = getattr(fileobj, "name", None)
620 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000621
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000622 if blockinfo is None:
623 blockinfo = [(0, size)]
624
625 # Construct a map with data and zero blocks.
626 self.map_index = 0
627 self.map = []
628 lastpos = 0
629 realpos = self.offset
630 for offset, size in blockinfo:
631 if offset > lastpos:
632 self.map.append((False, lastpos, offset, None))
633 self.map.append((True, offset, offset + size, realpos))
634 realpos += size
635 lastpos = offset + size
636 if lastpos < self.size:
637 self.map.append((False, lastpos, self.size, None))
638
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200639 def flush(self):
640 pass
641
642 def readable(self):
643 return True
644
645 def writable(self):
646 return False
647
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000648 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000649 return self.fileobj.seekable()
650
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000651 def tell(self):
652 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000653 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000654 return self.position
655
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200656 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000657 """Seek to a position in the file.
658 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200659 if whence == io.SEEK_SET:
660 self.position = min(max(position, 0), self.size)
661 elif whence == io.SEEK_CUR:
662 if position < 0:
663 self.position = max(self.position + position, 0)
664 else:
665 self.position = min(self.position + position, self.size)
666 elif whence == io.SEEK_END:
667 self.position = max(min(self.size + position, self.size), 0)
668 else:
669 raise ValueError("Invalid argument")
670 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000671
672 def read(self, size=None):
673 """Read data from the file.
674 """
675 if size is None:
676 size = self.size - self.position
677 else:
678 size = min(size, self.size - self.position)
679
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000680 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000681 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000682 while True:
683 data, start, stop, offset = self.map[self.map_index]
684 if start <= self.position < stop:
685 break
686 else:
687 self.map_index += 1
688 if self.map_index == len(self.map):
689 self.map_index = 0
690 length = min(size, stop - self.position)
691 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000692 self.fileobj.seek(offset + (self.position - start))
693 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000694 else:
695 buf += NUL * length
696 size -= length
697 self.position += length
698 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000699
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200700 def readinto(self, b):
701 buf = self.read(len(b))
702 b[:len(buf)] = buf
703 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000704
705 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000706 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200707#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000708
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200709class ExFileObject(io.BufferedReader):
710
711 def __init__(self, tarfile, tarinfo):
712 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
713 tarinfo.size, tarinfo.sparse)
714 super().__init__(fileobj)
715#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000716
717#------------------
718# Exported Classes
719#------------------
720class TarInfo(object):
721 """Informational class which holds the details about an
722 archive member given by a tar header block.
723 TarInfo objects are returned by TarFile.getmember(),
724 TarFile.getmembers() and TarFile.gettarinfo() and are
725 usually created internally.
726 """
727
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000728 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
729 "chksum", "type", "linkname", "uname", "gname",
730 "devmajor", "devminor",
731 "offset", "offset_data", "pax_headers", "sparse",
732 "tarfile", "_sparse_structs", "_link_target")
733
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000734 def __init__(self, name=""):
735 """Construct a TarInfo object. name is the optional name
736 of the member.
737 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000738 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000739 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000740 self.uid = 0 # user id
741 self.gid = 0 # group id
742 self.size = 0 # file size
743 self.mtime = 0 # modification time
744 self.chksum = 0 # header checksum
745 self.type = REGTYPE # member type
746 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000747 self.uname = "" # user name
748 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000749 self.devmajor = 0 # device major number
750 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000751
Thomas Wouters477c8d52006-05-27 19:21:47 +0000752 self.offset = 0 # the tar header starts here
753 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000754
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000755 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000756 self.pax_headers = {} # pax header information
757
758 # In pax headers the "name" and "linkname" field are called
759 # "path" and "linkpath".
760 def _getpath(self):
761 return self.name
762 def _setpath(self, name):
763 self.name = name
764 path = property(_getpath, _setpath)
765
766 def _getlinkpath(self):
767 return self.linkname
768 def _setlinkpath(self, linkname):
769 self.linkname = linkname
770 linkpath = property(_getlinkpath, _setlinkpath)
771
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000772 def __repr__(self):
773 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
774
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000775 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000776 """Return the TarInfo's attributes as a dictionary.
777 """
778 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000779 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000780 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000781 "uid": self.uid,
782 "gid": self.gid,
783 "size": self.size,
784 "mtime": self.mtime,
785 "chksum": self.chksum,
786 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000787 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000788 "uname": self.uname,
789 "gname": self.gname,
790 "devmajor": self.devmajor,
791 "devminor": self.devminor
792 }
793
794 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
795 info["name"] += "/"
796
797 return info
798
Victor Stinnerde629d42010-05-05 21:43:57 +0000799 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 """Return a tar header as a string of 512 byte blocks.
801 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000802 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000803
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000805 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000807 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000809 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 else:
811 raise ValueError("invalid format")
812
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000813 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 """Return the object as a ustar header block.
815 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000816 info["magic"] = POSIX_MAGIC
817
818 if len(info["linkname"]) > LENGTH_LINK:
819 raise ValueError("linkname is too long")
820
821 if len(info["name"]) > LENGTH_NAME:
822 info["prefix"], info["name"] = self._posix_split_name(info["name"])
823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000826 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 """Return the object as a GNU header block sequence.
828 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000829 info["magic"] = GNU_MAGIC
830
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000833 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000834
835 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000836 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000838 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000840 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000841 """Return the object as a ustar header block. If it cannot be
842 represented this way, prepend a pax extended header sequence
843 with supplement information.
844 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000845 info["magic"] = POSIX_MAGIC
846 pax_headers = self.pax_headers.copy()
847
848 # Test string fields for values that exceed the field length or cannot
849 # be represented in ASCII encoding.
850 for name, hname, length in (
851 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
852 ("uname", "uname", 32), ("gname", "gname", 32)):
853
Guido van Rossume7ba4952007-06-06 23:52:48 +0000854 if hname in pax_headers:
855 # The pax header has priority.
856 continue
857
Guido van Rossumd8faa362007-04-27 19:54:29 +0000858 # Try to encode the string as ASCII.
859 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000860 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000862 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000863 continue
864
Guido van Rossume7ba4952007-06-06 23:52:48 +0000865 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000866 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000867
868 # Test number fields for values that exceed the field limit or values
869 # that like to be stored as float.
870 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000871 if name in pax_headers:
872 # The pax header has priority. Avoid overflow.
873 info[name] = 0
874 continue
875
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 val = info[name]
877 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000878 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000879 info[name] = 0
880
Guido van Rossume7ba4952007-06-06 23:52:48 +0000881 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000883 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000885 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000887 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
889 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000890 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000891 """Return the object as a pax global header block sequence.
892 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000893 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000894
895 def _posix_split_name(self, name):
896 """Split a name longer than 100 chars into a prefix
897 and a name part.
898 """
899 prefix = name[:LENGTH_PREFIX + 1]
900 while prefix and prefix[-1] != "/":
901 prefix = prefix[:-1]
902
903 name = name[len(prefix):]
904 prefix = prefix[:-1]
905
906 if not prefix or len(name) > LENGTH_NAME:
907 raise ValueError("name is too long")
908 return prefix, name
909
910 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000911 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912 """Return a header block. info is a dictionary with file
913 information, format must be one of the *_FORMAT constants.
914 """
915 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000916 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000917 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000918 itn(info.get("uid", 0), 8, format),
919 itn(info.get("gid", 0), 8, format),
920 itn(info.get("size", 0), 12, format),
921 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000922 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000923 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000924 stn(info.get("linkname", ""), 100, encoding, errors),
925 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000926 stn(info.get("uname", ""), 32, encoding, errors),
927 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 itn(info.get("devmajor", 0), 8, format),
929 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000930 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000931 ]
932
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000933 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000935 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000936 return buf
937
938 @staticmethod
939 def _create_payload(payload):
940 """Return the string payload filled with zero bytes
941 up to the next 512 byte border.
942 """
943 blocks, remainder = divmod(len(payload), BLOCKSIZE)
944 if remainder > 0:
945 payload += (BLOCKSIZE - remainder) * NUL
946 return payload
947
948 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000949 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000950 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
951 for name.
952 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000953 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000954
955 info = {}
956 info["name"] = "././@LongLink"
957 info["type"] = type
958 info["size"] = len(name)
959 info["magic"] = GNU_MAGIC
960
961 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000962 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000963 cls._create_payload(name)
964
965 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000966 def _create_pax_generic_header(cls, pax_headers, type, encoding):
967 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000969 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000970 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000971 # Check if one of the fields contains surrogate characters and thereby
972 # forces hdrcharset=BINARY, see _proc_pax() for more information.
973 binary = False
974 for keyword, value in pax_headers.items():
975 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000976 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000977 except UnicodeEncodeError:
978 binary = True
979 break
980
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000981 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000982 if binary:
983 # Put the hdrcharset field at the beginning of the header.
984 records += b"21 hdrcharset=BINARY\n"
985
Guido van Rossumd8faa362007-04-27 19:54:29 +0000986 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000987 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000988 if binary:
989 # Try to restore the original byte representation of `value'.
990 # Needless to say, that the encoding must match the string.
991 value = value.encode(encoding, "surrogateescape")
992 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000993 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000994
Guido van Rossumd8faa362007-04-27 19:54:29 +0000995 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
996 n = p = 0
997 while True:
998 n = l + len(str(p))
999 if n == p:
1000 break
1001 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001002 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001003
1004 # We use a hardcoded "././@PaxHeader" name like star does
1005 # instead of the one that POSIX recommends.
1006 info = {}
1007 info["name"] = "././@PaxHeader"
1008 info["type"] = type
1009 info["size"] = len(records)
1010 info["magic"] = POSIX_MAGIC
1011
1012 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001013 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001014 cls._create_payload(records)
1015
Guido van Rossum75b64e62005-01-16 00:16:11 +00001016 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001017 def frombuf(cls, buf, encoding, errors):
1018 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001019 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001020 if len(buf) == 0:
1021 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001022 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001023 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001024 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001025 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001026
1027 chksum = nti(buf[148:156])
1028 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001029 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001030
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001032 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001033 obj.mode = nti(buf[100:108])
1034 obj.uid = nti(buf[108:116])
1035 obj.gid = nti(buf[116:124])
1036 obj.size = nti(buf[124:136])
1037 obj.mtime = nti(buf[136:148])
1038 obj.chksum = chksum
1039 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001040 obj.linkname = nts(buf[157:257], encoding, errors)
1041 obj.uname = nts(buf[265:297], encoding, errors)
1042 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001043 obj.devmajor = nti(buf[329:337])
1044 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001045 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001046
Guido van Rossumd8faa362007-04-27 19:54:29 +00001047 # Old V7 tar format represents a directory as a regular
1048 # file with a trailing slash.
1049 if obj.type == AREGTYPE and obj.name.endswith("/"):
1050 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001051
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001052 # The old GNU sparse format occupies some of the unused
1053 # space in the buffer for up to 4 sparse structures.
1054 # Save the them for later processing in _proc_sparse().
1055 if obj.type == GNUTYPE_SPARSE:
1056 pos = 386
1057 structs = []
1058 for i in range(4):
1059 try:
1060 offset = nti(buf[pos:pos + 12])
1061 numbytes = nti(buf[pos + 12:pos + 24])
1062 except ValueError:
1063 break
1064 structs.append((offset, numbytes))
1065 pos += 24
1066 isextended = bool(buf[482])
1067 origsize = nti(buf[483:495])
1068 obj._sparse_structs = (structs, isextended, origsize)
1069
Guido van Rossumd8faa362007-04-27 19:54:29 +00001070 # Remove redundant slashes from directories.
1071 if obj.isdir():
1072 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001073
Guido van Rossumd8faa362007-04-27 19:54:29 +00001074 # Reconstruct a ustar longname.
1075 if prefix and obj.type not in GNU_TYPES:
1076 obj.name = prefix + "/" + obj.name
1077 return obj
1078
1079 @classmethod
1080 def fromtarfile(cls, tarfile):
1081 """Return the next TarInfo object from TarFile object
1082 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001083 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001084 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001085 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001086 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1087 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001088
Guido van Rossumd8faa362007-04-27 19:54:29 +00001089 #--------------------------------------------------------------------------
1090 # The following are methods that are called depending on the type of a
1091 # member. The entry point is _proc_member() which can be overridden in a
1092 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1093 # implement the following
1094 # operations:
1095 # 1. Set self.offset_data to the position where the data blocks begin,
1096 # if there is data that follows.
1097 # 2. Set tarfile.offset to the position where the next member's header will
1098 # begin.
1099 # 3. Return self or another valid TarInfo object.
1100 def _proc_member(self, tarfile):
1101 """Choose the right processing method depending on
1102 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001103 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001104 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1105 return self._proc_gnulong(tarfile)
1106 elif self.type == GNUTYPE_SPARSE:
1107 return self._proc_sparse(tarfile)
1108 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1109 return self._proc_pax(tarfile)
1110 else:
1111 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001112
Guido van Rossumd8faa362007-04-27 19:54:29 +00001113 def _proc_builtin(self, tarfile):
1114 """Process a builtin type or an unknown type which
1115 will be treated as a regular file.
1116 """
1117 self.offset_data = tarfile.fileobj.tell()
1118 offset = self.offset_data
1119 if self.isreg() or self.type not in SUPPORTED_TYPES:
1120 # Skip the following data blocks.
1121 offset += self._block(self.size)
1122 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001123
Guido van Rossume7ba4952007-06-06 23:52:48 +00001124 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001125 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001126 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001127
1128 return self
1129
1130 def _proc_gnulong(self, tarfile):
1131 """Process the blocks that hold a GNU longname
1132 or longlink member.
1133 """
1134 buf = tarfile.fileobj.read(self._block(self.size))
1135
1136 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001137 try:
1138 next = self.fromtarfile(tarfile)
1139 except HeaderError:
1140 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001141
1142 # Patch the TarInfo object from the next header with
1143 # the longname information.
1144 next.offset = self.offset
1145 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001146 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001147 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001148 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001149
1150 return next
1151
1152 def _proc_sparse(self, tarfile):
1153 """Process a GNU sparse header plus extra headers.
1154 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001155 # We already collected some sparse structures in frombuf().
1156 structs, isextended, origsize = self._sparse_structs
1157 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001159 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001160 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001161 buf = tarfile.fileobj.read(BLOCKSIZE)
1162 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001163 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001164 try:
1165 offset = nti(buf[pos:pos + 12])
1166 numbytes = nti(buf[pos + 12:pos + 24])
1167 except ValueError:
1168 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001169 if offset and numbytes:
1170 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001171 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001172 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001173 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001174
1175 self.offset_data = tarfile.fileobj.tell()
1176 tarfile.offset = self.offset_data + self._block(self.size)
1177 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001178 return self
1179
1180 def _proc_pax(self, tarfile):
1181 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001182 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001183 """
1184 # Read the header information.
1185 buf = tarfile.fileobj.read(self._block(self.size))
1186
1187 # A pax header stores supplemental information for either
1188 # the following file (extended) or all following files
1189 # (global).
1190 if self.type == XGLTYPE:
1191 pax_headers = tarfile.pax_headers
1192 else:
1193 pax_headers = tarfile.pax_headers.copy()
1194
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001195 # Check if the pax header contains a hdrcharset field. This tells us
1196 # the encoding of the path, linkpath, uname and gname fields. Normally,
1197 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1198 # implementations are allowed to store them as raw binary strings if
1199 # the translation to UTF-8 fails.
1200 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1201 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001202 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001203
1204 # For the time being, we don't care about anything other than "BINARY".
1205 # The only other value that is currently allowed by the standard is
1206 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1207 hdrcharset = pax_headers.get("hdrcharset")
1208 if hdrcharset == "BINARY":
1209 encoding = tarfile.encoding
1210 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001211 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001212
Guido van Rossumd8faa362007-04-27 19:54:29 +00001213 # Parse pax header information. A record looks like that:
1214 # "%d %s=%s\n" % (length, keyword, value). length is the size
1215 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001216 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001217 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001218 pos = 0
1219 while True:
1220 match = regex.match(buf, pos)
1221 if not match:
1222 break
1223
1224 length, keyword = match.groups()
1225 length = int(length)
1226 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1227
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001228 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001229 # as the error handler, but we better not take the risk. For
1230 # example, GNU tar <= 1.23 is known to store filenames it cannot
1231 # translate to UTF-8 as raw strings (unfortunately without a
1232 # hdrcharset=BINARY header).
1233 # We first try the strict standard encoding, and if that fails we
1234 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001235 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001236 tarfile.errors)
1237 if keyword in PAX_NAME_FIELDS:
1238 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1239 tarfile.errors)
1240 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001241 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001242 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001243
1244 pax_headers[keyword] = value
1245 pos += length
1246
Guido van Rossume7ba4952007-06-06 23:52:48 +00001247 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001248 try:
1249 next = self.fromtarfile(tarfile)
1250 except HeaderError:
1251 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001252
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001253 # Process GNU sparse information.
1254 if "GNU.sparse.map" in pax_headers:
1255 # GNU extended sparse format version 0.1.
1256 self._proc_gnusparse_01(next, pax_headers)
1257
1258 elif "GNU.sparse.size" in pax_headers:
1259 # GNU extended sparse format version 0.0.
1260 self._proc_gnusparse_00(next, pax_headers, buf)
1261
1262 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1263 # GNU extended sparse format version 1.0.
1264 self._proc_gnusparse_10(next, pax_headers, tarfile)
1265
Guido van Rossume7ba4952007-06-06 23:52:48 +00001266 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001267 # Patch the TarInfo object with the extended header info.
1268 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1269 next.offset = self.offset
1270
1271 if "size" in pax_headers:
1272 # If the extended header replaces the size field,
1273 # we need to recalculate the offset where the next
1274 # header starts.
1275 offset = next.offset_data
1276 if next.isreg() or next.type not in SUPPORTED_TYPES:
1277 offset += next._block(next.size)
1278 tarfile.offset = offset
1279
1280 return next
1281
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001282 def _proc_gnusparse_00(self, next, pax_headers, buf):
1283 """Process a GNU tar extended sparse header, version 0.0.
1284 """
1285 offsets = []
1286 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1287 offsets.append(int(match.group(1)))
1288 numbytes = []
1289 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1290 numbytes.append(int(match.group(1)))
1291 next.sparse = list(zip(offsets, numbytes))
1292
1293 def _proc_gnusparse_01(self, next, pax_headers):
1294 """Process a GNU tar extended sparse header, version 0.1.
1295 """
1296 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1297 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1298
1299 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1300 """Process a GNU tar extended sparse header, version 1.0.
1301 """
1302 fields = None
1303 sparse = []
1304 buf = tarfile.fileobj.read(BLOCKSIZE)
1305 fields, buf = buf.split(b"\n", 1)
1306 fields = int(fields)
1307 while len(sparse) < fields * 2:
1308 if b"\n" not in buf:
1309 buf += tarfile.fileobj.read(BLOCKSIZE)
1310 number, buf = buf.split(b"\n", 1)
1311 sparse.append(int(number))
1312 next.offset_data = tarfile.fileobj.tell()
1313 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1314
Guido van Rossume7ba4952007-06-06 23:52:48 +00001315 def _apply_pax_info(self, pax_headers, encoding, errors):
1316 """Replace fields with supplemental information from a previous
1317 pax extended or global header.
1318 """
1319 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001320 if keyword == "GNU.sparse.name":
1321 setattr(self, "path", value)
1322 elif keyword == "GNU.sparse.size":
1323 setattr(self, "size", int(value))
1324 elif keyword == "GNU.sparse.realsize":
1325 setattr(self, "size", int(value))
1326 elif keyword in PAX_FIELDS:
1327 if keyword in PAX_NUMBER_FIELDS:
1328 try:
1329 value = PAX_NUMBER_FIELDS[keyword](value)
1330 except ValueError:
1331 value = 0
1332 if keyword == "path":
1333 value = value.rstrip("/")
1334 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001335
1336 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001337
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001338 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1339 """Decode a single field from a pax record.
1340 """
1341 try:
1342 return value.decode(encoding, "strict")
1343 except UnicodeDecodeError:
1344 return value.decode(fallback_encoding, fallback_errors)
1345
Guido van Rossumd8faa362007-04-27 19:54:29 +00001346 def _block(self, count):
1347 """Round up a byte count by BLOCKSIZE and return it,
1348 e.g. _block(834) => 1024.
1349 """
1350 blocks, remainder = divmod(count, BLOCKSIZE)
1351 if remainder:
1352 blocks += 1
1353 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001354
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001355 def isreg(self):
1356 return self.type in REGULAR_TYPES
1357 def isfile(self):
1358 return self.isreg()
1359 def isdir(self):
1360 return self.type == DIRTYPE
1361 def issym(self):
1362 return self.type == SYMTYPE
1363 def islnk(self):
1364 return self.type == LNKTYPE
1365 def ischr(self):
1366 return self.type == CHRTYPE
1367 def isblk(self):
1368 return self.type == BLKTYPE
1369 def isfifo(self):
1370 return self.type == FIFOTYPE
1371 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001372 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001373 def isdev(self):
1374 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1375# class TarInfo
1376
1377class TarFile(object):
1378 """The TarFile Class provides an interface to tar archives.
1379 """
1380
1381 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1382
1383 dereference = False # If true, add content of linked file to the
1384 # tar file, else the link.
1385
1386 ignore_zeros = False # If true, skips empty or invalid blocks and
1387 # continues processing.
1388
Lars Gustäbel365aff32009-12-13 11:42:29 +00001389 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001390 # messages (if debug >= 0). If > 0, errors
1391 # are passed to the caller as exceptions.
1392
Guido van Rossumd8faa362007-04-27 19:54:29 +00001393 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001394
Guido van Rossume7ba4952007-06-06 23:52:48 +00001395 encoding = ENCODING # Encoding for 8-bit character strings.
1396
1397 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001398
Guido van Rossumd8faa362007-04-27 19:54:29 +00001399 tarinfo = TarInfo # The default TarInfo class to use.
1400
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001401 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001402
1403 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1404 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001405 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001406 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1407 read from an existing archive, 'a' to append data to an existing
1408 file or 'w' to create a new file overwriting an existing one. `mode'
1409 defaults to 'r'.
1410 If `fileobj' is given, it is used for reading or writing data. If it
1411 can be determined, `mode' is overridden by `fileobj's mode.
1412 `fileobj' is not closed, when TarFile is closed.
1413 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001414 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1415 if mode not in modes:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001416 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001417 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001418 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001419
1420 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001421 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001422 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001423 self.mode = "w"
1424 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001425 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 self._extfileobj = False
1427 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001428 if name is None and hasattr(fileobj, "name"):
1429 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001430 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001431 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001432 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001433 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001434 self.fileobj = fileobj
1435
Guido van Rossumd8faa362007-04-27 19:54:29 +00001436 # Init attributes.
1437 if format is not None:
1438 self.format = format
1439 if tarinfo is not None:
1440 self.tarinfo = tarinfo
1441 if dereference is not None:
1442 self.dereference = dereference
1443 if ignore_zeros is not None:
1444 self.ignore_zeros = ignore_zeros
1445 if encoding is not None:
1446 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001447 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001448
1449 if pax_headers is not None and self.format == PAX_FORMAT:
1450 self.pax_headers = pax_headers
1451 else:
1452 self.pax_headers = {}
1453
Guido van Rossumd8faa362007-04-27 19:54:29 +00001454 if debug is not None:
1455 self.debug = debug
1456 if errorlevel is not None:
1457 self.errorlevel = errorlevel
1458
1459 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001460 self.closed = False
1461 self.members = [] # list of members as TarInfo objects
1462 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001463 self.offset = self.fileobj.tell()
1464 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001465 self.inodes = {} # dictionary caching the inodes of
1466 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001467
Lars Gustäbel7b465392009-11-18 20:29:25 +00001468 try:
1469 if self.mode == "r":
1470 self.firstmember = None
1471 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001472
Lars Gustäbel7b465392009-11-18 20:29:25 +00001473 if self.mode == "a":
1474 # Move to the end of the archive,
1475 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001476 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001477 self.fileobj.seek(self.offset)
1478 try:
1479 tarinfo = self.tarinfo.fromtarfile(self)
1480 self.members.append(tarinfo)
1481 except EOFHeaderError:
1482 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001483 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001484 except HeaderError as e:
1485 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001486
Lars Gustäbel7b465392009-11-18 20:29:25 +00001487 if self.mode in "aw":
1488 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001489
Lars Gustäbel7b465392009-11-18 20:29:25 +00001490 if self.pax_headers:
1491 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1492 self.fileobj.write(buf)
1493 self.offset += len(buf)
1494 except:
1495 if not self._extfileobj:
1496 self.fileobj.close()
1497 self.closed = True
1498 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001499
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001500 #--------------------------------------------------------------------------
1501 # Below are the classmethods which act as alternate constructors to the
1502 # TarFile class. The open() method is the only one that is needed for
1503 # public use; it is the "super"-constructor and is able to select an
1504 # adequate "sub"-constructor for a particular compression using the mapping
1505 # from OPEN_METH.
1506 #
1507 # This concept allows one to subclass TarFile without losing the comfort of
1508 # the super-constructor. A sub-constructor is registered and made available
1509 # by adding it to the mapping in OPEN_METH.
1510
Guido van Rossum75b64e62005-01-16 00:16:11 +00001511 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001512 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001513 """Open a tar archive for reading, writing or appending. Return
1514 an appropriate TarFile class.
1515
1516 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001517 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001518 'r:' open for reading exclusively uncompressed
1519 'r:gz' open for reading with gzip compression
1520 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001521 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001522 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 'w' or 'w:' open for writing without compression
1524 'w:gz' open for writing with gzip compression
1525 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001526 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001527
1528 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001529 'r|' open an uncompressed stream of tar blocks for reading
1530 'r|gz' open a gzip compressed stream of tar blocks
1531 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001532 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001533 'w|' open an uncompressed stream for writing
1534 'w|gz' open a gzip compressed stream for writing
1535 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001536 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001537 """
1538
1539 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001540 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001541
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001542 if mode in ("r", "r:*"):
1543 # Find out which *open() is appropriate for opening the file.
1544 for comptype in cls.OPEN_METH:
1545 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001546 if fileobj is not None:
1547 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001548 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001549 return func(name, "r", fileobj, **kwargs)
1550 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001551 if fileobj is not None:
1552 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001553 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001554 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001555
1556 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001557 filemode, comptype = mode.split(":", 1)
1558 filemode = filemode or "r"
1559 comptype = comptype or "tar"
1560
1561 # Select the *open() function according to
1562 # given compression.
1563 if comptype in cls.OPEN_METH:
1564 func = getattr(cls, cls.OPEN_METH[comptype])
1565 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001567 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001568
1569 elif "|" in mode:
1570 filemode, comptype = mode.split("|", 1)
1571 filemode = filemode or "r"
1572 comptype = comptype or "tar"
1573
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001574 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001575 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001576
Antoine Pitrou605c2932010-09-23 20:15:14 +00001577 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1578 try:
1579 t = cls(name, filemode, stream, **kwargs)
1580 except:
1581 stream.close()
1582 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001583 t._extfileobj = False
1584 return t
1585
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001586 elif mode in ("a", "w"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001587 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001588
Thomas Wouters477c8d52006-05-27 19:21:47 +00001589 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001590
Guido van Rossum75b64e62005-01-16 00:16:11 +00001591 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001592 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001593 """Open uncompressed tar archive name for reading or writing.
1594 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001595 if mode not in ("r", "a", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001597 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001598
Guido van Rossum75b64e62005-01-16 00:16:11 +00001599 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001600 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001601 """Open gzip compressed tar archive name for reading or writing.
1602 Appending is not allowed.
1603 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001604 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001605 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606
1607 try:
1608 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001609 gzip.GzipFile
1610 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001611 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001613 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001614 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001615 except OSError:
1616 if fileobj is not None and mode == 'r':
1617 raise ReadError("not a gzip file")
1618 raise
1619
1620 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001621 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001622 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001623 fileobj.close()
1624 if mode == 'r':
1625 raise ReadError("not a gzip file")
1626 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001627 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001628 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001629 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001630 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001631 return t
1632
Guido van Rossum75b64e62005-01-16 00:16:11 +00001633 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001634 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001635 """Open bzip2 compressed tar archive name for reading or writing.
1636 Appending is not allowed.
1637 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001638 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001639 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001640
1641 try:
1642 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001643 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001644 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001645
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001646 fileobj = bz2.BZ2File(fileobj or name, mode,
1647 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001648
1649 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001650 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001651 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001652 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001653 if mode == 'r':
1654 raise ReadError("not a bzip2 file")
1655 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001656 except:
1657 fileobj.close()
1658 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001659 t._extfileobj = False
1660 return t
1661
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001662 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001663 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001664 """Open lzma compressed tar archive name for reading or writing.
1665 Appending is not allowed.
1666 """
1667 if mode not in ("r", "w"):
1668 raise ValueError("mode must be 'r' or 'w'")
1669
1670 try:
1671 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001672 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001673 raise CompressionError("lzma module is not available")
1674
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001675 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001676
1677 try:
1678 t = cls.taropen(name, mode, fileobj, **kwargs)
1679 except (lzma.LZMAError, EOFError):
1680 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001681 if mode == 'r':
1682 raise ReadError("not an lzma file")
1683 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001684 except:
1685 fileobj.close()
1686 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001687 t._extfileobj = False
1688 return t
1689
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690 # All *open() methods are registered here.
1691 OPEN_METH = {
1692 "tar": "taropen", # uncompressed tar
1693 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001694 "bz2": "bz2open", # bzip2 compressed tar
1695 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001696 }
1697
1698 #--------------------------------------------------------------------------
1699 # The public methods which TarFile provides:
1700
1701 def close(self):
1702 """Close the TarFile. In write-mode, two finishing zero blocks are
1703 appended to the archive.
1704 """
1705 if self.closed:
1706 return
1707
Guido van Rossumd8faa362007-04-27 19:54:29 +00001708 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001709 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1710 self.offset += (BLOCKSIZE * 2)
1711 # fill up the end with zero-blocks
1712 # (like option -b20 for tar does)
1713 blocks, remainder = divmod(self.offset, RECORDSIZE)
1714 if remainder > 0:
1715 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1716
1717 if not self._extfileobj:
1718 self.fileobj.close()
1719 self.closed = True
1720
1721 def getmember(self, name):
1722 """Return a TarInfo object for member `name'. If `name' can not be
1723 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001724 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725 most up-to-date version.
1726 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001727 tarinfo = self._getmember(name)
1728 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001729 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001730 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001731
1732 def getmembers(self):
1733 """Return the members of the archive as a list of TarInfo objects. The
1734 list has the same order as the members in the archive.
1735 """
1736 self._check()
1737 if not self._loaded: # if we want to obtain a list of
1738 self._load() # all members, we first have to
1739 # scan the whole archive.
1740 return self.members
1741
1742 def getnames(self):
1743 """Return the members of the archive as a list of their names. It has
1744 the same order as the list returned by getmembers().
1745 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001746 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001747
1748 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1749 """Create a TarInfo object for either the file `name' or the file
1750 object `fileobj' (using os.fstat on its file descriptor). You can
1751 modify some of the TarInfo's attributes before you add it using
1752 addfile(). If given, `arcname' specifies an alternative name for the
1753 file in the archive.
1754 """
1755 self._check("aw")
1756
1757 # When fileobj is given, replace name by
1758 # fileobj's real name.
1759 if fileobj is not None:
1760 name = fileobj.name
1761
1762 # Building the name of the member in the archive.
1763 # Backward slashes are converted to forward slashes,
1764 # Absolute paths are turned to relative paths.
1765 if arcname is None:
1766 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001767 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001768 arcname = arcname.replace(os.sep, "/")
1769 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001770
1771 # Now, fill the TarInfo object with
1772 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001773 tarinfo = self.tarinfo()
1774 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001775
1776 # Use os.stat or os.lstat, depending on platform
1777 # and if symlinks shall be resolved.
1778 if fileobj is None:
1779 if hasattr(os, "lstat") and not self.dereference:
1780 statres = os.lstat(name)
1781 else:
1782 statres = os.stat(name)
1783 else:
1784 statres = os.fstat(fileobj.fileno())
1785 linkname = ""
1786
1787 stmd = statres.st_mode
1788 if stat.S_ISREG(stmd):
1789 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001790 if not self.dereference and statres.st_nlink > 1 and \
1791 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001792 # Is it a hardlink to an already
1793 # archived file?
1794 type = LNKTYPE
1795 linkname = self.inodes[inode]
1796 else:
1797 # The inode is added only if its valid.
1798 # For win32 it is always 0.
1799 type = REGTYPE
1800 if inode[0]:
1801 self.inodes[inode] = arcname
1802 elif stat.S_ISDIR(stmd):
1803 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001804 elif stat.S_ISFIFO(stmd):
1805 type = FIFOTYPE
1806 elif stat.S_ISLNK(stmd):
1807 type = SYMTYPE
1808 linkname = os.readlink(name)
1809 elif stat.S_ISCHR(stmd):
1810 type = CHRTYPE
1811 elif stat.S_ISBLK(stmd):
1812 type = BLKTYPE
1813 else:
1814 return None
1815
1816 # Fill the TarInfo object with all
1817 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001818 tarinfo.name = arcname
1819 tarinfo.mode = stmd
1820 tarinfo.uid = statres.st_uid
1821 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001822 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001823 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001824 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001825 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001826 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001827 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001828 tarinfo.linkname = linkname
1829 if pwd:
1830 try:
1831 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1832 except KeyError:
1833 pass
1834 if grp:
1835 try:
1836 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1837 except KeyError:
1838 pass
1839
1840 if type in (CHRTYPE, BLKTYPE):
1841 if hasattr(os, "major") and hasattr(os, "minor"):
1842 tarinfo.devmajor = os.major(statres.st_rdev)
1843 tarinfo.devminor = os.minor(statres.st_rdev)
1844 return tarinfo
1845
1846 def list(self, verbose=True):
1847 """Print a table of contents to sys.stdout. If `verbose' is False, only
1848 the names of the members are printed. If it is True, an `ls -l'-like
1849 output is produced.
1850 """
1851 self._check()
1852
1853 for tarinfo in self:
1854 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001855 _safe_print(stat.filemode(tarinfo.mode))
1856 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1857 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001858 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001859 _safe_print("%10s" %
1860 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001862 _safe_print("%10d" % tarinfo.size)
1863 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1864 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001866 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001867
1868 if verbose:
1869 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001870 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001871 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001872 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001873 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001874
Raymond Hettingera63a3122011-01-26 20:34:14 +00001875 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001876 """Add the file `name' to the archive. `name' may be any type of file
1877 (directory, fifo, symbolic link, etc.). If given, `arcname'
1878 specifies an alternative name for the file in the archive.
1879 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001880 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001881 return True for each filename to be excluded. `filter' is a function
1882 that expects a TarInfo object argument and returns the changed
1883 TarInfo object, if it returns None the TarInfo object will be
1884 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001885 """
1886 self._check("aw")
1887
1888 if arcname is None:
1889 arcname = name
1890
Guido van Rossum486364b2007-06-30 05:01:58 +00001891 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001892 if exclude is not None:
1893 import warnings
1894 warnings.warn("use the filter argument instead",
1895 DeprecationWarning, 2)
1896 if exclude(name):
1897 self._dbg(2, "tarfile: Excluded %r" % name)
1898 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001899
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001900 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001901 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001902 self._dbg(2, "tarfile: Skipped %r" % name)
1903 return
1904
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001905 self._dbg(1, name)
1906
1907 # Create a TarInfo object from the file.
1908 tarinfo = self.gettarinfo(name, arcname)
1909
1910 if tarinfo is None:
1911 self._dbg(1, "tarfile: Unsupported type %r" % name)
1912 return
1913
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001914 # Change or exclude the TarInfo object.
1915 if filter is not None:
1916 tarinfo = filter(tarinfo)
1917 if tarinfo is None:
1918 self._dbg(2, "tarfile: Excluded %r" % name)
1919 return
1920
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921 # Append the tar header and data to the archive.
1922 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001923 with bltn_open(name, "rb") as f:
1924 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001925
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001926 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001927 self.addfile(tarinfo)
1928 if recursive:
1929 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001930 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001931 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001932
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001933 else:
1934 self.addfile(tarinfo)
1935
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001936 def addfile(self, tarinfo, fileobj=None):
1937 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1938 given, tarinfo.size bytes are read from it and added to the archive.
1939 You can create TarInfo objects using gettarinfo().
1940 On Windows platforms, `fileobj' should always be opened with mode
1941 'rb' to avoid irritation about the file size.
1942 """
1943 self._check("aw")
1944
Thomas Wouters89f507f2006-12-13 04:49:30 +00001945 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001946
Guido van Rossume7ba4952007-06-06 23:52:48 +00001947 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001948 self.fileobj.write(buf)
1949 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001950
1951 # If there's data to follow, append it.
1952 if fileobj is not None:
1953 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1954 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1955 if remainder > 0:
1956 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1957 blocks += 1
1958 self.offset += blocks * BLOCKSIZE
1959
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001960 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001961
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001962 def extractall(self, path=".", members=None):
1963 """Extract all members from the archive to the current working
1964 directory and set owner, modification time and permissions on
1965 directories afterwards. `path' specifies a different directory
1966 to extract to. `members' is optional and must be a subset of the
1967 list returned by getmembers().
1968 """
1969 directories = []
1970
1971 if members is None:
1972 members = self
1973
1974 for tarinfo in members:
1975 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001976 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001977 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001978 tarinfo = copy.copy(tarinfo)
1979 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001980 # Do not set_attrs directories, as we will do that further down
1981 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001982
1983 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001984 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001985 directories.reverse()
1986
1987 # Set correct owner, mtime and filemode on directories.
1988 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001989 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001990 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001991 self.chown(tarinfo, dirpath)
1992 self.utime(tarinfo, dirpath)
1993 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00001994 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001995 if self.errorlevel > 1:
1996 raise
1997 else:
1998 self._dbg(1, "tarfile: %s" % e)
1999
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002000 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002001 """Extract a member from the archive to the current working directory,
2002 using its full name. Its file information is extracted as accurately
2003 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002004 specify a different directory using `path'. File attributes (owner,
2005 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002006 """
2007 self._check("r")
2008
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002009 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002010 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002011 else:
2012 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002013
Neal Norwitza4f651a2004-07-20 22:07:44 +00002014 # Prepare the link target for makelink().
2015 if tarinfo.islnk():
2016 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2017
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002018 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002019 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2020 set_attrs=set_attrs)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002021 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002022 if self.errorlevel > 0:
2023 raise
2024 else:
2025 if e.filename is None:
2026 self._dbg(1, "tarfile: %s" % e.strerror)
2027 else:
2028 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002029 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002030 if self.errorlevel > 1:
2031 raise
2032 else:
2033 self._dbg(1, "tarfile: %s" % e)
2034
2035 def extractfile(self, member):
2036 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002037 a filename or a TarInfo object. If `member' is a regular file or a
2038 link, an io.BufferedReader object is returned. Otherwise, None is
2039 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002040 """
2041 self._check("r")
2042
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002043 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002044 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002045 else:
2046 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002047
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002048 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2049 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002050 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002051
2052 elif tarinfo.islnk() or tarinfo.issym():
2053 if isinstance(self.fileobj, _Stream):
2054 # A small but ugly workaround for the case that someone tries
2055 # to extract a (sym)link as a file-object from a non-seekable
2056 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002057 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002058 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002059 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002060 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 else:
2062 # If there's no data associated with the member (directory, chrdev,
2063 # blkdev, etc.), return None instead of a file object.
2064 return None
2065
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002066 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002067 """Extract the TarInfo object tarinfo to a physical
2068 file called targetpath.
2069 """
2070 # Fetch the TarInfo object for the given name
2071 # and build the destination pathname, replacing
2072 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002073 targetpath = targetpath.rstrip("/")
2074 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002075
2076 # Create all upper directories.
2077 upperdirs = os.path.dirname(targetpath)
2078 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002079 # Create directories that are not part of the archive with
2080 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002081 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002082
2083 if tarinfo.islnk() or tarinfo.issym():
2084 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2085 else:
2086 self._dbg(1, tarinfo.name)
2087
2088 if tarinfo.isreg():
2089 self.makefile(tarinfo, targetpath)
2090 elif tarinfo.isdir():
2091 self.makedir(tarinfo, targetpath)
2092 elif tarinfo.isfifo():
2093 self.makefifo(tarinfo, targetpath)
2094 elif tarinfo.ischr() or tarinfo.isblk():
2095 self.makedev(tarinfo, targetpath)
2096 elif tarinfo.islnk() or tarinfo.issym():
2097 self.makelink(tarinfo, targetpath)
2098 elif tarinfo.type not in SUPPORTED_TYPES:
2099 self.makeunknown(tarinfo, targetpath)
2100 else:
2101 self.makefile(tarinfo, targetpath)
2102
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002103 if set_attrs:
2104 self.chown(tarinfo, targetpath)
2105 if not tarinfo.issym():
2106 self.chmod(tarinfo, targetpath)
2107 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002108
2109 #--------------------------------------------------------------------------
2110 # Below are the different file methods. They are called via
2111 # _extract_member() when extract() is called. They can be replaced in a
2112 # subclass to implement other functionality.
2113
2114 def makedir(self, tarinfo, targetpath):
2115 """Make a directory called targetpath.
2116 """
2117 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002118 # Use a safe mode for the directory, the real mode is set
2119 # later in _extract_member().
2120 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002121 except FileExistsError:
2122 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002123
2124 def makefile(self, tarinfo, targetpath):
2125 """Make a file called targetpath.
2126 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002127 source = self.fileobj
2128 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002129 with bltn_open(targetpath, "wb") as target:
2130 if tarinfo.sparse is not None:
2131 for offset, size in tarinfo.sparse:
2132 target.seek(offset)
2133 copyfileobj(source, target, size)
2134 else:
2135 copyfileobj(source, target, tarinfo.size)
2136 target.seek(tarinfo.size)
2137 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002138
2139 def makeunknown(self, tarinfo, targetpath):
2140 """Make a file from a TarInfo object with an unknown type
2141 at targetpath.
2142 """
2143 self.makefile(tarinfo, targetpath)
2144 self._dbg(1, "tarfile: Unknown file type %r, " \
2145 "extracted as regular file." % tarinfo.type)
2146
2147 def makefifo(self, tarinfo, targetpath):
2148 """Make a fifo called targetpath.
2149 """
2150 if hasattr(os, "mkfifo"):
2151 os.mkfifo(targetpath)
2152 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002153 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002154
2155 def makedev(self, tarinfo, targetpath):
2156 """Make a character or block device called targetpath.
2157 """
2158 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002159 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002160
2161 mode = tarinfo.mode
2162 if tarinfo.isblk():
2163 mode |= stat.S_IFBLK
2164 else:
2165 mode |= stat.S_IFCHR
2166
2167 os.mknod(targetpath, mode,
2168 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2169
2170 def makelink(self, tarinfo, targetpath):
2171 """Make a (symbolic) link called targetpath. If it cannot be created
2172 (platform limitation), we try to make a copy of the referenced file
2173 instead of a link.
2174 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002175 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002176 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002177 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002178 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002179 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002180 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002181 if os.path.exists(tarinfo._link_target):
2182 os.link(tarinfo._link_target, targetpath)
2183 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002184 self._extract_member(self._find_link_target(tarinfo),
2185 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002186 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002187 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002188 self._extract_member(self._find_link_target(tarinfo),
2189 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002190 except KeyError:
2191 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002192
2193 def chown(self, tarinfo, targetpath):
2194 """Set owner of targetpath according to tarinfo.
2195 """
2196 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2197 # We have to be root to do so.
2198 try:
2199 g = grp.getgrnam(tarinfo.gname)[2]
2200 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002201 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002202 try:
2203 u = pwd.getpwnam(tarinfo.uname)[2]
2204 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002205 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002206 try:
2207 if tarinfo.issym() and hasattr(os, "lchown"):
2208 os.lchown(targetpath, u, g)
2209 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002210 os.chown(targetpath, u, g)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002211 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002212 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002213
2214 def chmod(self, tarinfo, targetpath):
2215 """Set file permissions of targetpath according to tarinfo.
2216 """
Jack Jansen834eff62003-03-07 12:47:06 +00002217 if hasattr(os, 'chmod'):
2218 try:
2219 os.chmod(targetpath, tarinfo.mode)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002220 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002221 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002222
2223 def utime(self, tarinfo, targetpath):
2224 """Set modification time of targetpath according to tarinfo.
2225 """
Jack Jansen834eff62003-03-07 12:47:06 +00002226 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002227 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 try:
2229 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002230 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002231 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002232
2233 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002234 def next(self):
2235 """Return the next member of the archive as a TarInfo object, when
2236 TarFile is opened for reading. Return None if there is no more
2237 available.
2238 """
2239 self._check("ra")
2240 if self.firstmember is not None:
2241 m = self.firstmember
2242 self.firstmember = None
2243 return m
2244
2245 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002246 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002247 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002248 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002249 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002250 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002251 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002252 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002253 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002254 self.offset += BLOCKSIZE
2255 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002256 except InvalidHeaderError as e:
2257 if self.ignore_zeros:
2258 self._dbg(2, "0x%X: %s" % (self.offset, e))
2259 self.offset += BLOCKSIZE
2260 continue
2261 elif self.offset == 0:
2262 raise ReadError(str(e))
2263 except EmptyHeaderError:
2264 if self.offset == 0:
2265 raise ReadError("empty file")
2266 except TruncatedHeaderError as e:
2267 if self.offset == 0:
2268 raise ReadError(str(e))
2269 except SubsequentHeaderError as e:
2270 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002271 break
2272
Lars Gustäbel9520a432009-11-22 18:48:49 +00002273 if tarinfo is not None:
2274 self.members.append(tarinfo)
2275 else:
2276 self._loaded = True
2277
Thomas Wouters477c8d52006-05-27 19:21:47 +00002278 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002279
2280 #--------------------------------------------------------------------------
2281 # Little helper methods:
2282
Lars Gustäbel1b512722010-06-03 12:45:16 +00002283 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002284 """Find an archive member by name from bottom to top.
2285 If tarinfo is given, it is used as the starting point.
2286 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002287 # Ensure that all members have been loaded.
2288 members = self.getmembers()
2289
Lars Gustäbel1b512722010-06-03 12:45:16 +00002290 # Limit the member search list up to tarinfo.
2291 if tarinfo is not None:
2292 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002293
Lars Gustäbel1b512722010-06-03 12:45:16 +00002294 if normalize:
2295 name = os.path.normpath(name)
2296
2297 for member in reversed(members):
2298 if normalize:
2299 member_name = os.path.normpath(member.name)
2300 else:
2301 member_name = member.name
2302
2303 if name == member_name:
2304 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002305
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002306 def _load(self):
2307 """Read through the entire archive file and look for readable
2308 members.
2309 """
2310 while True:
2311 tarinfo = self.next()
2312 if tarinfo is None:
2313 break
2314 self._loaded = True
2315
2316 def _check(self, mode=None):
2317 """Check if TarFile is still open, and if the operation's mode
2318 corresponds to TarFile's mode.
2319 """
2320 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002321 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002322 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002323 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002324
Lars Gustäbel1b512722010-06-03 12:45:16 +00002325 def _find_link_target(self, tarinfo):
2326 """Find the target member of a symlink or hardlink member in the
2327 archive.
2328 """
2329 if tarinfo.issym():
2330 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002331 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002332 limit = None
2333 else:
2334 # Search the archive before the link, because a hard link is
2335 # just a reference to an already archived file.
2336 linkname = tarinfo.linkname
2337 limit = tarinfo
2338
2339 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2340 if member is None:
2341 raise KeyError("linkname %r not found" % linkname)
2342 return member
2343
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002344 def __iter__(self):
2345 """Provide an iterator object.
2346 """
2347 if self._loaded:
2348 return iter(self.members)
2349 else:
2350 return TarIter(self)
2351
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002352 def _dbg(self, level, msg):
2353 """Write debugging output to sys.stderr.
2354 """
2355 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002356 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002357
2358 def __enter__(self):
2359 self._check()
2360 return self
2361
2362 def __exit__(self, type, value, traceback):
2363 if type is None:
2364 self.close()
2365 else:
2366 # An exception occurred. We must not call close() because
2367 # it would try to write end-of-archive blocks and padding.
2368 if not self._extfileobj:
2369 self.fileobj.close()
2370 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002371# class TarFile
2372
2373class TarIter:
2374 """Iterator Class.
2375
2376 for tarinfo in TarFile(...):
2377 suite...
2378 """
2379
2380 def __init__(self, tarfile):
2381 """Construct a TarIter object.
2382 """
2383 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002384 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002385 def __iter__(self):
2386 """Return iterator object.
2387 """
2388 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002389 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002390 """Return the next item using TarFile's next() method.
2391 When all members have been read, set TarFile as _loaded.
2392 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002393 # Fix for SF #1100429: Under rare circumstances it can
2394 # happen that getmembers() is called during iteration,
2395 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002396
2397 if self.index == 0 and self.tarfile.firstmember is not None:
2398 tarinfo = self.tarfile.next()
2399 elif self.index < len(self.tarfile.members):
2400 tarinfo = self.tarfile.members[self.index]
2401 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002402 tarinfo = self.tarfile.next()
2403 if not tarinfo:
2404 self.tarfile._loaded = True
2405 raise StopIteration
2406 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002407 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002408 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002409 return tarinfo
2410
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002411#--------------------
2412# exported functions
2413#--------------------
2414def is_tarfile(name):
2415 """Return True if name points to a tar archive that we
2416 are able to handle, else return False.
2417 """
2418 try:
2419 t = open(name)
2420 t.close()
2421 return True
2422 except TarError:
2423 return False
2424
Guido van Rossume7ba4952007-06-06 23:52:48 +00002425bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002426open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002427
2428
2429def main():
2430 import argparse
2431
2432 description = 'A simple command line interface for tarfile module.'
2433 parser = argparse.ArgumentParser(description=description)
2434 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2435 help='Verbose output')
2436 group = parser.add_mutually_exclusive_group()
2437 group.add_argument('-l', '--list', metavar='<tarfile>',
2438 help='Show listing of a tarfile')
2439 group.add_argument('-e', '--extract', nargs='+',
2440 metavar=('<tarfile>', '<output_dir>'),
2441 help='Extract tarfile into target dir')
2442 group.add_argument('-c', '--create', nargs='+',
2443 metavar=('<name>', '<file>'),
2444 help='Create tarfile from sources')
2445 group.add_argument('-t', '--test', metavar='<tarfile>',
2446 help='Test if a tarfile is valid')
2447 args = parser.parse_args()
2448
2449 if args.test:
2450 src = args.test
2451 if is_tarfile(src):
2452 with open(src, 'r') as tar:
2453 tar.getmembers()
2454 print(tar.getmembers(), file=sys.stderr)
2455 if args.verbose:
2456 print('{!r} is a tar archive.'.format(src))
2457 else:
2458 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2459
2460 elif args.list:
2461 src = args.list
2462 if is_tarfile(src):
2463 with TarFile.open(src, 'r:*') as tf:
2464 tf.list(verbose=args.verbose)
2465 else:
2466 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2467
2468 elif args.extract:
2469 if len(args.extract) == 1:
2470 src = args.extract[0]
2471 curdir = os.curdir
2472 elif len(args.extract) == 2:
2473 src, curdir = args.extract
2474 else:
2475 parser.exit(1, parser.format_help())
2476
2477 if is_tarfile(src):
2478 with TarFile.open(src, 'r:*') as tf:
2479 tf.extractall(path=curdir)
2480 if args.verbose:
2481 if curdir == '.':
2482 msg = '{!r} file is extracted.'.format(src)
2483 else:
2484 msg = ('{!r} file is extracted '
2485 'into {!r} directory.').format(src, curdir)
2486 print(msg)
2487 else:
2488 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2489
2490 elif args.create:
2491 tar_name = args.create.pop(0)
2492 _, ext = os.path.splitext(tar_name)
2493 compressions = {
2494 # gz
2495 'gz': 'gz',
2496 'tgz': 'gz',
2497 # xz
2498 'xz': 'xz',
2499 'txz': 'xz',
2500 # bz2
2501 'bz2': 'bz2',
2502 'tbz': 'bz2',
2503 'tbz2': 'bz2',
2504 'tb2': 'bz2',
2505 }
2506 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2507 tar_files = args.create
2508
2509 with TarFile.open(tar_name, tar_mode) as tf:
2510 for file_name in tar_files:
2511 tf.add(file_name)
2512
2513 if args.verbose:
2514 print('{!r} file created.'.format(tar_name))
2515
2516 else:
2517 parser.exit(1, parser.format_help())
2518
2519if __name__ == '__main__':
2520 main()