blob: 79566e7e97d0e3b8e2f9a9e0306af982159d2568 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
41import sys
42import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020043import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000044import shutil
45import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000046import time
47import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000048import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000049import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000050
51try:
52 import grp, pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040053except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054 grp = pwd = None
55
Brian Curtin16633fa2010-07-09 13:54:27 +000056# os.symlink on Windows prior to 6.0 raises NotImplementedError
57symlink_exception = (AttributeError, NotImplementedError)
58try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020059 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000060 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020061 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000062except NameError:
63 pass
64
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000065# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
Georg Brandl1a3284e2007-12-02 09:40:06 +000068from builtins import open as _open # Since 'open' is TarFile.open
Guido van Rossum8f78fe92006-08-24 04:03:53 +000069
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000070#---------------------------------------------------------
71# tar constants
72#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000073NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000074BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000075RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000076GNU_MAGIC = b"ustar \0" # magic gnu tar string
77POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000078
Guido van Rossumd8faa362007-04-27 19:54:29 +000079LENGTH_NAME = 100 # maximum length of a filename
80LENGTH_LINK = 100 # maximum length of a linkname
81LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000082
Lars Gustäbelb506dc32007-08-07 18:36:16 +000083REGTYPE = b"0" # regular file
84AREGTYPE = b"\0" # regular file
85LNKTYPE = b"1" # link (inside tarfile)
86SYMTYPE = b"2" # symbolic link
87CHRTYPE = b"3" # character special device
88BLKTYPE = b"4" # block special device
89DIRTYPE = b"5" # directory
90FIFOTYPE = b"6" # fifo special device
91CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000092
Lars Gustäbelb506dc32007-08-07 18:36:16 +000093GNUTYPE_LONGNAME = b"L" # GNU tar longname
94GNUTYPE_LONGLINK = b"K" # GNU tar longlink
95GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000096
Lars Gustäbelb506dc32007-08-07 18:36:16 +000097XHDTYPE = b"x" # POSIX.1-2001 extended header
98XGLTYPE = b"g" # POSIX.1-2001 global header
99SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +0000100
101USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
102GNU_FORMAT = 1 # GNU tar format
103PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
104DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000105
106#---------------------------------------------------------
107# tarfile constants
108#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000109# File types that tarfile supports:
110SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
111 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000112 CONTTYPE, CHRTYPE, BLKTYPE,
113 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
114 GNUTYPE_SPARSE)
115
Guido van Rossumd8faa362007-04-27 19:54:29 +0000116# File types that will be treated as a regular file.
117REGULAR_TYPES = (REGTYPE, AREGTYPE,
118 CONTTYPE, GNUTYPE_SPARSE)
119
120# File types that are part of the GNU tar format.
121GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
122 GNUTYPE_SPARSE)
123
124# Fields from a pax header that override a TarInfo attribute.
125PAX_FIELDS = ("path", "linkpath", "size", "mtime",
126 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000127
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000128# Fields from a pax header that are affected by hdrcharset.
129PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
130
Guido van Rossume7ba4952007-06-06 23:52:48 +0000131# Fields in a pax header that are numbers, all other fields
132# are treated as strings.
133PAX_NUMBER_FIELDS = {
134 "atime": float,
135 "ctime": float,
136 "mtime": float,
137 "uid": int,
138 "gid": int,
139 "size": int
140}
141
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000142#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000143# initialization
144#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000145if os.name in ("nt", "ce"):
146 ENCODING = "utf-8"
147else:
148 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000149
150#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000151# Some useful functions
152#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000153
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000154def stn(s, length, encoding, errors):
155 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000156 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000157 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000158 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000159
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000160def nts(s, encoding, errors):
161 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000162 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000163 p = s.find(b"\0")
164 if p != -1:
165 s = s[:p]
166 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000167
Thomas Wouters477c8d52006-05-27 19:21:47 +0000168def nti(s):
169 """Convert a number field to a python number.
170 """
171 # There are two possible encodings for a number field, see
172 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200173 if s[0] in (0o200, 0o377):
174 n = 0
175 for i in range(len(s) - 1):
176 n <<= 8
177 n += s[i + 1]
178 if s[0] == 0o377:
179 n = -(256 ** (len(s) - 1) - n)
180 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000181 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000182 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000183 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000184 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000185 return n
186
Guido van Rossumd8faa362007-04-27 19:54:29 +0000187def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000188 """Convert a python number to a number field.
189 """
190 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
191 # octal digits followed by a null-byte, this allows values up to
192 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200193 # that if necessary. A leading 0o200 or 0o377 byte indicate this
194 # particular encoding, the following digits-1 bytes are a big-endian
195 # base-256 representation. This allows values up to (256**(digits-1))-1.
196 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
197 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000198 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800199 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200200 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
201 if n >= 0:
202 s = bytearray([0o200])
203 else:
204 s = bytearray([0o377])
205 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000206
Guido van Rossum805365e2007-05-07 22:24:25 +0000207 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200208 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000209 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200210 else:
211 raise ValueError("overflow in number field")
212
Thomas Wouters477c8d52006-05-27 19:21:47 +0000213 return s
214
215def calc_chksums(buf):
216 """Calculate the checksum for a member's header by summing up all
217 characters except for the chksum field which is treated as if
218 it was filled with spaces. According to the GNU tar sources,
219 some tars (Sun and NeXT) calculate chksum with signed char,
220 which will be different if there are chars in the buffer with
221 the high bit set. So we calculate two checksums, unsigned and
222 signed.
223 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200224 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
225 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000226 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000227
228def copyfileobj(src, dst, length=None):
229 """Copy length bytes from fileobj src to fileobj dst.
230 If length is None, copy the entire content.
231 """
232 if length == 0:
233 return
234 if length is None:
235 shutil.copyfileobj(src, dst)
236 return
237
238 BUFSIZE = 16 * 1024
239 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000240 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000241 buf = src.read(BUFSIZE)
242 if len(buf) < BUFSIZE:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200243 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000244 dst.write(buf)
245
246 if remainder != 0:
247 buf = src.read(remainder)
248 if len(buf) < remainder:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200249 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000250 dst.write(buf)
251 return
252
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000253def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200254 """Deprecated in this location; use stat.filemode."""
255 import warnings
256 warnings.warn("deprecated in favor of stat.filemode",
257 DeprecationWarning, 2)
258 return stat.filemode(mode)
259
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000260
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000261class TarError(Exception):
262 """Base exception."""
263 pass
264class ExtractError(TarError):
265 """General exception for extract errors."""
266 pass
267class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300268 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000269 pass
270class CompressionError(TarError):
271 """Exception for unavailable compression methods."""
272 pass
273class StreamError(TarError):
274 """Exception for unsupported operations on stream-like TarFiles."""
275 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000276class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000277 """Base exception for header errors."""
278 pass
279class EmptyHeaderError(HeaderError):
280 """Exception for empty headers."""
281 pass
282class TruncatedHeaderError(HeaderError):
283 """Exception for truncated headers."""
284 pass
285class EOFHeaderError(HeaderError):
286 """Exception for end of file headers."""
287 pass
288class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000289 """Exception for invalid headers."""
290 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000291class SubsequentHeaderError(HeaderError):
292 """Exception for missing and invalid extended headers."""
293 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000294
295#---------------------------
296# internal stream interface
297#---------------------------
298class _LowLevelFile:
299 """Low-level file object. Supports reading and writing.
300 It is used instead of a regular file object for streaming
301 access.
302 """
303
304 def __init__(self, name, mode):
305 mode = {
306 "r": os.O_RDONLY,
307 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
308 }[mode]
309 if hasattr(os, "O_BINARY"):
310 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000311 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000312
313 def close(self):
314 os.close(self.fd)
315
316 def read(self, size):
317 return os.read(self.fd, size)
318
319 def write(self, s):
320 os.write(self.fd, s)
321
322class _Stream:
323 """Class that serves as an adapter between TarFile and
324 a stream-like object. The stream-like object only
325 needs to have a read() or write() method and is accessed
326 blockwise. Use of gzip or bzip2 compression is possible.
327 A stream-like object could be for example: sys.stdin,
328 sys.stdout, a socket, a tape device etc.
329
330 _Stream is intended to be used only internally.
331 """
332
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000333 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000334 """Construct a _Stream object.
335 """
336 self._extfileobj = True
337 if fileobj is None:
338 fileobj = _LowLevelFile(name, mode)
339 self._extfileobj = False
340
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000341 if comptype == '*':
342 # Enable transparent compression detection for the
343 # stream interface
344 fileobj = _StreamProxy(fileobj)
345 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000346
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000347 self.name = name or ""
348 self.mode = mode
349 self.comptype = comptype
350 self.fileobj = fileobj
351 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000352 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000353 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000354 self.closed = False
355
Antoine Pitrou605c2932010-09-23 20:15:14 +0000356 try:
357 if comptype == "gz":
358 try:
359 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400360 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000361 raise CompressionError("zlib module is not available")
362 self.zlib = zlib
363 self.crc = zlib.crc32(b"")
364 if mode == "r":
365 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100366 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000367 else:
368 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000369
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100370 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000371 try:
372 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400373 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000374 raise CompressionError("bz2 module is not available")
375 if mode == "r":
376 self.dbuf = b""
377 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200378 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000379 else:
380 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100381
382 elif comptype == "xz":
383 try:
384 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400385 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100386 raise CompressionError("lzma module is not available")
387 if mode == "r":
388 self.dbuf = b""
389 self.cmp = lzma.LZMADecompressor()
390 self.exception = lzma.LZMAError
391 else:
392 self.cmp = lzma.LZMACompressor()
393
394 elif comptype != "tar":
395 raise CompressionError("unknown compression type %r" % comptype)
396
Antoine Pitrou605c2932010-09-23 20:15:14 +0000397 except:
398 if not self._extfileobj:
399 self.fileobj.close()
400 self.closed = True
401 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000402
403 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000404 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000405 self.close()
406
407 def _init_write_gz(self):
408 """Initialize for writing with gzip compression.
409 """
410 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
411 -self.zlib.MAX_WBITS,
412 self.zlib.DEF_MEM_LEVEL,
413 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000414 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000415 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000416 if self.name.endswith(".gz"):
417 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000418 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
419 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420
421 def write(self, s):
422 """Write string s to the stream.
423 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000424 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000425 self.crc = self.zlib.crc32(s, self.crc)
426 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000427 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000428 s = self.cmp.compress(s)
429 self.__write(s)
430
431 def __write(self, s):
432 """Write string s to the stream if a whole new block
433 is ready to be written.
434 """
435 self.buf += s
436 while len(self.buf) > self.bufsize:
437 self.fileobj.write(self.buf[:self.bufsize])
438 self.buf = self.buf[self.bufsize:]
439
440 def close(self):
441 """Close the _Stream object. No operation should be
442 done on it afterwards.
443 """
444 if self.closed:
445 return
446
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000447 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000448 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000449
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000450 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000451 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000452 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000453 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000454 # The native zlib crc is an unsigned 32-bit integer, but
455 # the Python wrapper implicitly casts that to a signed C
456 # long. So, on a 32-bit box self.crc may "look negative",
457 # while the same crc on a 64-bit box may "look positive".
458 # To avoid irksome warnings from the `struct` module, force
459 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000460 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
461 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000462
463 if not self._extfileobj:
464 self.fileobj.close()
465
466 self.closed = True
467
468 def _init_read_gz(self):
469 """Initialize for reading a gzip compressed fileobj.
470 """
471 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000472 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000473
474 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000475 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000476 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000477 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000478 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000479
480 flag = ord(self.__read(1))
481 self.__read(6)
482
483 if flag & 4:
484 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
485 self.read(xlen)
486 if flag & 8:
487 while True:
488 s = self.__read(1)
489 if not s or s == NUL:
490 break
491 if flag & 16:
492 while True:
493 s = self.__read(1)
494 if not s or s == NUL:
495 break
496 if flag & 2:
497 self.__read(2)
498
499 def tell(self):
500 """Return the stream's file pointer position.
501 """
502 return self.pos
503
504 def seek(self, pos=0):
505 """Set the stream's file pointer to pos. Negative seeking
506 is forbidden.
507 """
508 if pos - self.pos >= 0:
509 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000510 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000511 self.read(self.bufsize)
512 self.read(remainder)
513 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000514 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000515 return self.pos
516
517 def read(self, size=None):
518 """Return the next size number of bytes from the stream.
519 If size is not defined, return all bytes of the stream
520 up to EOF.
521 """
522 if size is None:
523 t = []
524 while True:
525 buf = self._read(self.bufsize)
526 if not buf:
527 break
528 t.append(buf)
529 buf = "".join(t)
530 else:
531 buf = self._read(size)
532 self.pos += len(buf)
533 return buf
534
535 def _read(self, size):
536 """Return size bytes from the stream.
537 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000538 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000539 return self.__read(size)
540
541 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000542 while c < size:
543 buf = self.__read(self.bufsize)
544 if not buf:
545 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000546 try:
547 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100548 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000549 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000550 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000551 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000552 buf = self.dbuf[:size]
553 self.dbuf = self.dbuf[size:]
554 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000555
556 def __read(self, size):
557 """Return size bytes from stream. If internal buffer is empty,
558 read another block from the stream.
559 """
560 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000561 while c < size:
562 buf = self.fileobj.read(self.bufsize)
563 if not buf:
564 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000565 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000566 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000567 buf = self.buf[:size]
568 self.buf = self.buf[size:]
569 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000570# class _Stream
571
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000572class _StreamProxy(object):
573 """Small proxy class that enables transparent compression
574 detection for the Stream interface (mode 'r|*').
575 """
576
577 def __init__(self, fileobj):
578 self.fileobj = fileobj
579 self.buf = self.fileobj.read(BLOCKSIZE)
580
581 def read(self, size):
582 self.read = self.fileobj.read
583 return self.buf
584
585 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100586 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000587 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100588 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000589 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100590 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
591 return "xz"
592 else:
593 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000594
595 def close(self):
596 self.fileobj.close()
597# class StreamProxy
598
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000599#------------------------
600# Extraction file object
601#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000602class _FileInFile(object):
603 """A thin wrapper around an existing file object that
604 provides a part of its data as an individual file
605 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000606 """
607
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000608 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000609 self.fileobj = fileobj
610 self.offset = offset
611 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000612 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200613 self.name = getattr(fileobj, "name", None)
614 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000615
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000616 if blockinfo is None:
617 blockinfo = [(0, size)]
618
619 # Construct a map with data and zero blocks.
620 self.map_index = 0
621 self.map = []
622 lastpos = 0
623 realpos = self.offset
624 for offset, size in blockinfo:
625 if offset > lastpos:
626 self.map.append((False, lastpos, offset, None))
627 self.map.append((True, offset, offset + size, realpos))
628 realpos += size
629 lastpos = offset + size
630 if lastpos < self.size:
631 self.map.append((False, lastpos, self.size, None))
632
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200633 def flush(self):
634 pass
635
636 def readable(self):
637 return True
638
639 def writable(self):
640 return False
641
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000642 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000643 return self.fileobj.seekable()
644
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000645 def tell(self):
646 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000647 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000648 return self.position
649
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200650 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000651 """Seek to a position in the file.
652 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200653 if whence == io.SEEK_SET:
654 self.position = min(max(position, 0), self.size)
655 elif whence == io.SEEK_CUR:
656 if position < 0:
657 self.position = max(self.position + position, 0)
658 else:
659 self.position = min(self.position + position, self.size)
660 elif whence == io.SEEK_END:
661 self.position = max(min(self.size + position, self.size), 0)
662 else:
663 raise ValueError("Invalid argument")
664 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000665
666 def read(self, size=None):
667 """Read data from the file.
668 """
669 if size is None:
670 size = self.size - self.position
671 else:
672 size = min(size, self.size - self.position)
673
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000674 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000675 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000676 while True:
677 data, start, stop, offset = self.map[self.map_index]
678 if start <= self.position < stop:
679 break
680 else:
681 self.map_index += 1
682 if self.map_index == len(self.map):
683 self.map_index = 0
684 length = min(size, stop - self.position)
685 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000686 self.fileobj.seek(offset + (self.position - start))
687 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000688 else:
689 buf += NUL * length
690 size -= length
691 self.position += length
692 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000693
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200694 def readinto(self, b):
695 buf = self.read(len(b))
696 b[:len(buf)] = buf
697 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000698
699 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000700 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200701#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000702
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200703class ExFileObject(io.BufferedReader):
704
705 def __init__(self, tarfile, tarinfo):
706 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
707 tarinfo.size, tarinfo.sparse)
708 super().__init__(fileobj)
709#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000710
711#------------------
712# Exported Classes
713#------------------
714class TarInfo(object):
715 """Informational class which holds the details about an
716 archive member given by a tar header block.
717 TarInfo objects are returned by TarFile.getmember(),
718 TarFile.getmembers() and TarFile.gettarinfo() and are
719 usually created internally.
720 """
721
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000722 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
723 "chksum", "type", "linkname", "uname", "gname",
724 "devmajor", "devminor",
725 "offset", "offset_data", "pax_headers", "sparse",
726 "tarfile", "_sparse_structs", "_link_target")
727
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000728 def __init__(self, name=""):
729 """Construct a TarInfo object. name is the optional name
730 of the member.
731 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000732 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000733 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000734 self.uid = 0 # user id
735 self.gid = 0 # group id
736 self.size = 0 # file size
737 self.mtime = 0 # modification time
738 self.chksum = 0 # header checksum
739 self.type = REGTYPE # member type
740 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000741 self.uname = "" # user name
742 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000743 self.devmajor = 0 # device major number
744 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000745
Thomas Wouters477c8d52006-05-27 19:21:47 +0000746 self.offset = 0 # the tar header starts here
747 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000748
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000749 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000750 self.pax_headers = {} # pax header information
751
752 # In pax headers the "name" and "linkname" field are called
753 # "path" and "linkpath".
754 def _getpath(self):
755 return self.name
756 def _setpath(self, name):
757 self.name = name
758 path = property(_getpath, _setpath)
759
760 def _getlinkpath(self):
761 return self.linkname
762 def _setlinkpath(self, linkname):
763 self.linkname = linkname
764 linkpath = property(_getlinkpath, _setlinkpath)
765
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000766 def __repr__(self):
767 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
768
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000769 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000770 """Return the TarInfo's attributes as a dictionary.
771 """
772 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000773 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000774 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000775 "uid": self.uid,
776 "gid": self.gid,
777 "size": self.size,
778 "mtime": self.mtime,
779 "chksum": self.chksum,
780 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000781 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000782 "uname": self.uname,
783 "gname": self.gname,
784 "devmajor": self.devmajor,
785 "devminor": self.devminor
786 }
787
788 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
789 info["name"] += "/"
790
791 return info
792
Victor Stinnerde629d42010-05-05 21:43:57 +0000793 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000794 """Return a tar header as a string of 512 byte blocks.
795 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000796 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000797
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000799 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000800 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000801 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000803 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 else:
805 raise ValueError("invalid format")
806
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000807 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 """Return the object as a ustar header block.
809 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000810 info["magic"] = POSIX_MAGIC
811
812 if len(info["linkname"]) > LENGTH_LINK:
813 raise ValueError("linkname is too long")
814
815 if len(info["name"]) > LENGTH_NAME:
816 info["prefix"], info["name"] = self._posix_split_name(info["name"])
817
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000818 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000819
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000820 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000821 """Return the object as a GNU header block sequence.
822 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823 info["magic"] = GNU_MAGIC
824
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000825 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000826 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000827 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000828
829 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000830 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000831
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000832 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000833
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000834 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835 """Return the object as a ustar header block. If it cannot be
836 represented this way, prepend a pax extended header sequence
837 with supplement information.
838 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839 info["magic"] = POSIX_MAGIC
840 pax_headers = self.pax_headers.copy()
841
842 # Test string fields for values that exceed the field length or cannot
843 # be represented in ASCII encoding.
844 for name, hname, length in (
845 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
846 ("uname", "uname", 32), ("gname", "gname", 32)):
847
Guido van Rossume7ba4952007-06-06 23:52:48 +0000848 if hname in pax_headers:
849 # The pax header has priority.
850 continue
851
Guido van Rossumd8faa362007-04-27 19:54:29 +0000852 # Try to encode the string as ASCII.
853 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000854 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000855 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000856 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000857 continue
858
Guido van Rossume7ba4952007-06-06 23:52:48 +0000859 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000860 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861
862 # Test number fields for values that exceed the field limit or values
863 # that like to be stored as float.
864 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000865 if name in pax_headers:
866 # The pax header has priority. Avoid overflow.
867 info[name] = 0
868 continue
869
Guido van Rossumd8faa362007-04-27 19:54:29 +0000870 val = info[name]
871 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000872 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000873 info[name] = 0
874
Guido van Rossume7ba4952007-06-06 23:52:48 +0000875 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000876 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000877 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000878 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000879 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000881 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882
883 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000884 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000885 """Return the object as a pax global header block sequence.
886 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000887 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000888
889 def _posix_split_name(self, name):
890 """Split a name longer than 100 chars into a prefix
891 and a name part.
892 """
893 prefix = name[:LENGTH_PREFIX + 1]
894 while prefix and prefix[-1] != "/":
895 prefix = prefix[:-1]
896
897 name = name[len(prefix):]
898 prefix = prefix[:-1]
899
900 if not prefix or len(name) > LENGTH_NAME:
901 raise ValueError("name is too long")
902 return prefix, name
903
904 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000905 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000906 """Return a header block. info is a dictionary with file
907 information, format must be one of the *_FORMAT constants.
908 """
909 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000910 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000911 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000912 itn(info.get("uid", 0), 8, format),
913 itn(info.get("gid", 0), 8, format),
914 itn(info.get("size", 0), 12, format),
915 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000916 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000917 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000918 stn(info.get("linkname", ""), 100, encoding, errors),
919 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000920 stn(info.get("uname", ""), 32, encoding, errors),
921 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000922 itn(info.get("devmajor", 0), 8, format),
923 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000924 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000925 ]
926
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000927 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000928 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000929 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000930 return buf
931
932 @staticmethod
933 def _create_payload(payload):
934 """Return the string payload filled with zero bytes
935 up to the next 512 byte border.
936 """
937 blocks, remainder = divmod(len(payload), BLOCKSIZE)
938 if remainder > 0:
939 payload += (BLOCKSIZE - remainder) * NUL
940 return payload
941
942 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000943 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000944 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
945 for name.
946 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000947 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948
949 info = {}
950 info["name"] = "././@LongLink"
951 info["type"] = type
952 info["size"] = len(name)
953 info["magic"] = GNU_MAGIC
954
955 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000956 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000957 cls._create_payload(name)
958
959 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000960 def _create_pax_generic_header(cls, pax_headers, type, encoding):
961 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000962 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000963 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000964 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000965 # Check if one of the fields contains surrogate characters and thereby
966 # forces hdrcharset=BINARY, see _proc_pax() for more information.
967 binary = False
968 for keyword, value in pax_headers.items():
969 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000970 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000971 except UnicodeEncodeError:
972 binary = True
973 break
974
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000975 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000976 if binary:
977 # Put the hdrcharset field at the beginning of the header.
978 records += b"21 hdrcharset=BINARY\n"
979
Guido van Rossumd8faa362007-04-27 19:54:29 +0000980 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000981 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000982 if binary:
983 # Try to restore the original byte representation of `value'.
984 # Needless to say, that the encoding must match the string.
985 value = value.encode(encoding, "surrogateescape")
986 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000987 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000988
Guido van Rossumd8faa362007-04-27 19:54:29 +0000989 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
990 n = p = 0
991 while True:
992 n = l + len(str(p))
993 if n == p:
994 break
995 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +0000996 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +0000997
998 # We use a hardcoded "././@PaxHeader" name like star does
999 # instead of the one that POSIX recommends.
1000 info = {}
1001 info["name"] = "././@PaxHeader"
1002 info["type"] = type
1003 info["size"] = len(records)
1004 info["magic"] = POSIX_MAGIC
1005
1006 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001007 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001008 cls._create_payload(records)
1009
Guido van Rossum75b64e62005-01-16 00:16:11 +00001010 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001011 def frombuf(cls, buf, encoding, errors):
1012 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001013 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001014 if len(buf) == 0:
1015 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001016 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001017 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001018 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001019 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001020
1021 chksum = nti(buf[148:156])
1022 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001023 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001024
Guido van Rossumd8faa362007-04-27 19:54:29 +00001025 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001026 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001027 obj.mode = nti(buf[100:108])
1028 obj.uid = nti(buf[108:116])
1029 obj.gid = nti(buf[116:124])
1030 obj.size = nti(buf[124:136])
1031 obj.mtime = nti(buf[136:148])
1032 obj.chksum = chksum
1033 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001034 obj.linkname = nts(buf[157:257], encoding, errors)
1035 obj.uname = nts(buf[265:297], encoding, errors)
1036 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001037 obj.devmajor = nti(buf[329:337])
1038 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001039 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001040
Guido van Rossumd8faa362007-04-27 19:54:29 +00001041 # Old V7 tar format represents a directory as a regular
1042 # file with a trailing slash.
1043 if obj.type == AREGTYPE and obj.name.endswith("/"):
1044 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001045
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001046 # The old GNU sparse format occupies some of the unused
1047 # space in the buffer for up to 4 sparse structures.
1048 # Save the them for later processing in _proc_sparse().
1049 if obj.type == GNUTYPE_SPARSE:
1050 pos = 386
1051 structs = []
1052 for i in range(4):
1053 try:
1054 offset = nti(buf[pos:pos + 12])
1055 numbytes = nti(buf[pos + 12:pos + 24])
1056 except ValueError:
1057 break
1058 structs.append((offset, numbytes))
1059 pos += 24
1060 isextended = bool(buf[482])
1061 origsize = nti(buf[483:495])
1062 obj._sparse_structs = (structs, isextended, origsize)
1063
Guido van Rossumd8faa362007-04-27 19:54:29 +00001064 # Remove redundant slashes from directories.
1065 if obj.isdir():
1066 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001067
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068 # Reconstruct a ustar longname.
1069 if prefix and obj.type not in GNU_TYPES:
1070 obj.name = prefix + "/" + obj.name
1071 return obj
1072
1073 @classmethod
1074 def fromtarfile(cls, tarfile):
1075 """Return the next TarInfo object from TarFile object
1076 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001077 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001078 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001079 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001080 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1081 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001082
Guido van Rossumd8faa362007-04-27 19:54:29 +00001083 #--------------------------------------------------------------------------
1084 # The following are methods that are called depending on the type of a
1085 # member. The entry point is _proc_member() which can be overridden in a
1086 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1087 # implement the following
1088 # operations:
1089 # 1. Set self.offset_data to the position where the data blocks begin,
1090 # if there is data that follows.
1091 # 2. Set tarfile.offset to the position where the next member's header will
1092 # begin.
1093 # 3. Return self or another valid TarInfo object.
1094 def _proc_member(self, tarfile):
1095 """Choose the right processing method depending on
1096 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001097 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001098 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1099 return self._proc_gnulong(tarfile)
1100 elif self.type == GNUTYPE_SPARSE:
1101 return self._proc_sparse(tarfile)
1102 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1103 return self._proc_pax(tarfile)
1104 else:
1105 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001106
Guido van Rossumd8faa362007-04-27 19:54:29 +00001107 def _proc_builtin(self, tarfile):
1108 """Process a builtin type or an unknown type which
1109 will be treated as a regular file.
1110 """
1111 self.offset_data = tarfile.fileobj.tell()
1112 offset = self.offset_data
1113 if self.isreg() or self.type not in SUPPORTED_TYPES:
1114 # Skip the following data blocks.
1115 offset += self._block(self.size)
1116 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001117
Guido van Rossume7ba4952007-06-06 23:52:48 +00001118 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001119 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001120 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001121
1122 return self
1123
1124 def _proc_gnulong(self, tarfile):
1125 """Process the blocks that hold a GNU longname
1126 or longlink member.
1127 """
1128 buf = tarfile.fileobj.read(self._block(self.size))
1129
1130 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001131 try:
1132 next = self.fromtarfile(tarfile)
1133 except HeaderError:
1134 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001135
1136 # Patch the TarInfo object from the next header with
1137 # the longname information.
1138 next.offset = self.offset
1139 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001140 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001141 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001142 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001143
1144 return next
1145
1146 def _proc_sparse(self, tarfile):
1147 """Process a GNU sparse header plus extra headers.
1148 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001149 # We already collected some sparse structures in frombuf().
1150 structs, isextended, origsize = self._sparse_structs
1151 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001152
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001153 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001154 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001155 buf = tarfile.fileobj.read(BLOCKSIZE)
1156 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001157 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001158 try:
1159 offset = nti(buf[pos:pos + 12])
1160 numbytes = nti(buf[pos + 12:pos + 24])
1161 except ValueError:
1162 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001163 if offset and numbytes:
1164 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001165 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001166 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001167 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001168
1169 self.offset_data = tarfile.fileobj.tell()
1170 tarfile.offset = self.offset_data + self._block(self.size)
1171 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172 return self
1173
1174 def _proc_pax(self, tarfile):
1175 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001176 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001177 """
1178 # Read the header information.
1179 buf = tarfile.fileobj.read(self._block(self.size))
1180
1181 # A pax header stores supplemental information for either
1182 # the following file (extended) or all following files
1183 # (global).
1184 if self.type == XGLTYPE:
1185 pax_headers = tarfile.pax_headers
1186 else:
1187 pax_headers = tarfile.pax_headers.copy()
1188
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001189 # Check if the pax header contains a hdrcharset field. This tells us
1190 # the encoding of the path, linkpath, uname and gname fields. Normally,
1191 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1192 # implementations are allowed to store them as raw binary strings if
1193 # the translation to UTF-8 fails.
1194 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1195 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001196 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001197
1198 # For the time being, we don't care about anything other than "BINARY".
1199 # The only other value that is currently allowed by the standard is
1200 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1201 hdrcharset = pax_headers.get("hdrcharset")
1202 if hdrcharset == "BINARY":
1203 encoding = tarfile.encoding
1204 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001205 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001206
Guido van Rossumd8faa362007-04-27 19:54:29 +00001207 # Parse pax header information. A record looks like that:
1208 # "%d %s=%s\n" % (length, keyword, value). length is the size
1209 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001210 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001211 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001212 pos = 0
1213 while True:
1214 match = regex.match(buf, pos)
1215 if not match:
1216 break
1217
1218 length, keyword = match.groups()
1219 length = int(length)
1220 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1221
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001222 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001223 # as the error handler, but we better not take the risk. For
1224 # example, GNU tar <= 1.23 is known to store filenames it cannot
1225 # translate to UTF-8 as raw strings (unfortunately without a
1226 # hdrcharset=BINARY header).
1227 # We first try the strict standard encoding, and if that fails we
1228 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001229 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001230 tarfile.errors)
1231 if keyword in PAX_NAME_FIELDS:
1232 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1233 tarfile.errors)
1234 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001235 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001236 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001237
1238 pax_headers[keyword] = value
1239 pos += length
1240
Guido van Rossume7ba4952007-06-06 23:52:48 +00001241 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001242 try:
1243 next = self.fromtarfile(tarfile)
1244 except HeaderError:
1245 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001246
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001247 # Process GNU sparse information.
1248 if "GNU.sparse.map" in pax_headers:
1249 # GNU extended sparse format version 0.1.
1250 self._proc_gnusparse_01(next, pax_headers)
1251
1252 elif "GNU.sparse.size" in pax_headers:
1253 # GNU extended sparse format version 0.0.
1254 self._proc_gnusparse_00(next, pax_headers, buf)
1255
1256 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1257 # GNU extended sparse format version 1.0.
1258 self._proc_gnusparse_10(next, pax_headers, tarfile)
1259
Guido van Rossume7ba4952007-06-06 23:52:48 +00001260 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001261 # Patch the TarInfo object with the extended header info.
1262 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1263 next.offset = self.offset
1264
1265 if "size" in pax_headers:
1266 # If the extended header replaces the size field,
1267 # we need to recalculate the offset where the next
1268 # header starts.
1269 offset = next.offset_data
1270 if next.isreg() or next.type not in SUPPORTED_TYPES:
1271 offset += next._block(next.size)
1272 tarfile.offset = offset
1273
1274 return next
1275
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001276 def _proc_gnusparse_00(self, next, pax_headers, buf):
1277 """Process a GNU tar extended sparse header, version 0.0.
1278 """
1279 offsets = []
1280 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1281 offsets.append(int(match.group(1)))
1282 numbytes = []
1283 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1284 numbytes.append(int(match.group(1)))
1285 next.sparse = list(zip(offsets, numbytes))
1286
1287 def _proc_gnusparse_01(self, next, pax_headers):
1288 """Process a GNU tar extended sparse header, version 0.1.
1289 """
1290 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1291 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1292
1293 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1294 """Process a GNU tar extended sparse header, version 1.0.
1295 """
1296 fields = None
1297 sparse = []
1298 buf = tarfile.fileobj.read(BLOCKSIZE)
1299 fields, buf = buf.split(b"\n", 1)
1300 fields = int(fields)
1301 while len(sparse) < fields * 2:
1302 if b"\n" not in buf:
1303 buf += tarfile.fileobj.read(BLOCKSIZE)
1304 number, buf = buf.split(b"\n", 1)
1305 sparse.append(int(number))
1306 next.offset_data = tarfile.fileobj.tell()
1307 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1308
Guido van Rossume7ba4952007-06-06 23:52:48 +00001309 def _apply_pax_info(self, pax_headers, encoding, errors):
1310 """Replace fields with supplemental information from a previous
1311 pax extended or global header.
1312 """
1313 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001314 if keyword == "GNU.sparse.name":
1315 setattr(self, "path", value)
1316 elif keyword == "GNU.sparse.size":
1317 setattr(self, "size", int(value))
1318 elif keyword == "GNU.sparse.realsize":
1319 setattr(self, "size", int(value))
1320 elif keyword in PAX_FIELDS:
1321 if keyword in PAX_NUMBER_FIELDS:
1322 try:
1323 value = PAX_NUMBER_FIELDS[keyword](value)
1324 except ValueError:
1325 value = 0
1326 if keyword == "path":
1327 value = value.rstrip("/")
1328 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001329
1330 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001331
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001332 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1333 """Decode a single field from a pax record.
1334 """
1335 try:
1336 return value.decode(encoding, "strict")
1337 except UnicodeDecodeError:
1338 return value.decode(fallback_encoding, fallback_errors)
1339
Guido van Rossumd8faa362007-04-27 19:54:29 +00001340 def _block(self, count):
1341 """Round up a byte count by BLOCKSIZE and return it,
1342 e.g. _block(834) => 1024.
1343 """
1344 blocks, remainder = divmod(count, BLOCKSIZE)
1345 if remainder:
1346 blocks += 1
1347 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001348
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001349 def isreg(self):
1350 return self.type in REGULAR_TYPES
1351 def isfile(self):
1352 return self.isreg()
1353 def isdir(self):
1354 return self.type == DIRTYPE
1355 def issym(self):
1356 return self.type == SYMTYPE
1357 def islnk(self):
1358 return self.type == LNKTYPE
1359 def ischr(self):
1360 return self.type == CHRTYPE
1361 def isblk(self):
1362 return self.type == BLKTYPE
1363 def isfifo(self):
1364 return self.type == FIFOTYPE
1365 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001366 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001367 def isdev(self):
1368 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1369# class TarInfo
1370
1371class TarFile(object):
1372 """The TarFile Class provides an interface to tar archives.
1373 """
1374
1375 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1376
1377 dereference = False # If true, add content of linked file to the
1378 # tar file, else the link.
1379
1380 ignore_zeros = False # If true, skips empty or invalid blocks and
1381 # continues processing.
1382
Lars Gustäbel365aff32009-12-13 11:42:29 +00001383 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001384 # messages (if debug >= 0). If > 0, errors
1385 # are passed to the caller as exceptions.
1386
Guido van Rossumd8faa362007-04-27 19:54:29 +00001387 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001388
Guido van Rossume7ba4952007-06-06 23:52:48 +00001389 encoding = ENCODING # Encoding for 8-bit character strings.
1390
1391 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001392
Guido van Rossumd8faa362007-04-27 19:54:29 +00001393 tarinfo = TarInfo # The default TarInfo class to use.
1394
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001395 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001396
1397 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1398 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001399 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001400 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1401 read from an existing archive, 'a' to append data to an existing
1402 file or 'w' to create a new file overwriting an existing one. `mode'
1403 defaults to 'r'.
1404 If `fileobj' is given, it is used for reading or writing data. If it
1405 can be determined, `mode' is overridden by `fileobj's mode.
1406 `fileobj' is not closed, when TarFile is closed.
1407 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001408 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1409 if mode not in modes:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001410 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001411 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001412 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001413
1414 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001415 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001416 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001417 self.mode = "w"
1418 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001419 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001420 self._extfileobj = False
1421 else:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001422 if name is None and hasattr(fileobj, "name"):
1423 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001424 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001425 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001426 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001427 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001428 self.fileobj = fileobj
1429
Guido van Rossumd8faa362007-04-27 19:54:29 +00001430 # Init attributes.
1431 if format is not None:
1432 self.format = format
1433 if tarinfo is not None:
1434 self.tarinfo = tarinfo
1435 if dereference is not None:
1436 self.dereference = dereference
1437 if ignore_zeros is not None:
1438 self.ignore_zeros = ignore_zeros
1439 if encoding is not None:
1440 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001441 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001442
1443 if pax_headers is not None and self.format == PAX_FORMAT:
1444 self.pax_headers = pax_headers
1445 else:
1446 self.pax_headers = {}
1447
Guido van Rossumd8faa362007-04-27 19:54:29 +00001448 if debug is not None:
1449 self.debug = debug
1450 if errorlevel is not None:
1451 self.errorlevel = errorlevel
1452
1453 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001454 self.closed = False
1455 self.members = [] # list of members as TarInfo objects
1456 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001457 self.offset = self.fileobj.tell()
1458 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001459 self.inodes = {} # dictionary caching the inodes of
1460 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001461
Lars Gustäbel7b465392009-11-18 20:29:25 +00001462 try:
1463 if self.mode == "r":
1464 self.firstmember = None
1465 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001466
Lars Gustäbel7b465392009-11-18 20:29:25 +00001467 if self.mode == "a":
1468 # Move to the end of the archive,
1469 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001470 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001471 self.fileobj.seek(self.offset)
1472 try:
1473 tarinfo = self.tarinfo.fromtarfile(self)
1474 self.members.append(tarinfo)
1475 except EOFHeaderError:
1476 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001477 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001478 except HeaderError as e:
1479 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001480
Lars Gustäbel7b465392009-11-18 20:29:25 +00001481 if self.mode in "aw":
1482 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001483
Lars Gustäbel7b465392009-11-18 20:29:25 +00001484 if self.pax_headers:
1485 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1486 self.fileobj.write(buf)
1487 self.offset += len(buf)
1488 except:
1489 if not self._extfileobj:
1490 self.fileobj.close()
1491 self.closed = True
1492 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001493
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001494 #--------------------------------------------------------------------------
1495 # Below are the classmethods which act as alternate constructors to the
1496 # TarFile class. The open() method is the only one that is needed for
1497 # public use; it is the "super"-constructor and is able to select an
1498 # adequate "sub"-constructor for a particular compression using the mapping
1499 # from OPEN_METH.
1500 #
1501 # This concept allows one to subclass TarFile without losing the comfort of
1502 # the super-constructor. A sub-constructor is registered and made available
1503 # by adding it to the mapping in OPEN_METH.
1504
Guido van Rossum75b64e62005-01-16 00:16:11 +00001505 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001506 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001507 """Open a tar archive for reading, writing or appending. Return
1508 an appropriate TarFile class.
1509
1510 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001511 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 'r:' open for reading exclusively uncompressed
1513 'r:gz' open for reading with gzip compression
1514 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001515 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001516 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 'w' or 'w:' open for writing without compression
1518 'w:gz' open for writing with gzip compression
1519 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001520 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001521
1522 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001523 'r|' open an uncompressed stream of tar blocks for reading
1524 'r|gz' open a gzip compressed stream of tar blocks
1525 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001526 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001527 'w|' open an uncompressed stream for writing
1528 'w|gz' open a gzip compressed stream for writing
1529 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001530 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001531 """
1532
1533 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001534 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001535
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001536 if mode in ("r", "r:*"):
1537 # Find out which *open() is appropriate for opening the file.
1538 for comptype in cls.OPEN_METH:
1539 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001540 if fileobj is not None:
1541 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001542 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001543 return func(name, "r", fileobj, **kwargs)
1544 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001545 if fileobj is not None:
1546 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001547 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001548 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001549
1550 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001551 filemode, comptype = mode.split(":", 1)
1552 filemode = filemode or "r"
1553 comptype = comptype or "tar"
1554
1555 # Select the *open() function according to
1556 # given compression.
1557 if comptype in cls.OPEN_METH:
1558 func = getattr(cls, cls.OPEN_METH[comptype])
1559 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001560 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001561 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001562
1563 elif "|" in mode:
1564 filemode, comptype = mode.split("|", 1)
1565 filemode = filemode or "r"
1566 comptype = comptype or "tar"
1567
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001568 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001569 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001570
Antoine Pitrou605c2932010-09-23 20:15:14 +00001571 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1572 try:
1573 t = cls(name, filemode, stream, **kwargs)
1574 except:
1575 stream.close()
1576 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001577 t._extfileobj = False
1578 return t
1579
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001580 elif mode in ("a", "w"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001581 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582
Thomas Wouters477c8d52006-05-27 19:21:47 +00001583 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001584
Guido van Rossum75b64e62005-01-16 00:16:11 +00001585 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587 """Open uncompressed tar archive name for reading or writing.
1588 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001589 if mode not in ("r", "a", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001590 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001591 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592
Guido van Rossum75b64e62005-01-16 00:16:11 +00001593 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001594 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001595 """Open gzip compressed tar archive name for reading or writing.
1596 Appending is not allowed.
1597 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001598 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001599 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600
1601 try:
1602 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001603 gzip.GzipFile
1604 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001605 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001606
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001607 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001608 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001609 except OSError:
1610 if fileobj is not None and mode == 'r':
1611 raise ReadError("not a gzip file")
1612 raise
1613
1614 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001615 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001616 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001617 fileobj.close()
1618 if mode == 'r':
1619 raise ReadError("not a gzip file")
1620 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001621 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001622 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001623 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001624 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001625 return t
1626
Guido van Rossum75b64e62005-01-16 00:16:11 +00001627 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001628 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001629 """Open bzip2 compressed tar archive name for reading or writing.
1630 Appending is not allowed.
1631 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001632 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001633 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634
1635 try:
1636 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001637 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001638 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001640 fileobj = bz2.BZ2File(fileobj or name, mode,
1641 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001642
1643 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001644 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001645 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001646 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001647 if mode == 'r':
1648 raise ReadError("not a bzip2 file")
1649 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001650 except:
1651 fileobj.close()
1652 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001653 t._extfileobj = False
1654 return t
1655
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001656 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001657 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001658 """Open lzma compressed tar archive name for reading or writing.
1659 Appending is not allowed.
1660 """
1661 if mode not in ("r", "w"):
1662 raise ValueError("mode must be 'r' or 'w'")
1663
1664 try:
1665 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001666 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001667 raise CompressionError("lzma module is not available")
1668
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001669 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001670
1671 try:
1672 t = cls.taropen(name, mode, fileobj, **kwargs)
1673 except (lzma.LZMAError, EOFError):
1674 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001675 if mode == 'r':
1676 raise ReadError("not an lzma file")
1677 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001678 except:
1679 fileobj.close()
1680 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001681 t._extfileobj = False
1682 return t
1683
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001684 # All *open() methods are registered here.
1685 OPEN_METH = {
1686 "tar": "taropen", # uncompressed tar
1687 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001688 "bz2": "bz2open", # bzip2 compressed tar
1689 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001690 }
1691
1692 #--------------------------------------------------------------------------
1693 # The public methods which TarFile provides:
1694
1695 def close(self):
1696 """Close the TarFile. In write-mode, two finishing zero blocks are
1697 appended to the archive.
1698 """
1699 if self.closed:
1700 return
1701
Guido van Rossumd8faa362007-04-27 19:54:29 +00001702 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001703 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1704 self.offset += (BLOCKSIZE * 2)
1705 # fill up the end with zero-blocks
1706 # (like option -b20 for tar does)
1707 blocks, remainder = divmod(self.offset, RECORDSIZE)
1708 if remainder > 0:
1709 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1710
1711 if not self._extfileobj:
1712 self.fileobj.close()
1713 self.closed = True
1714
1715 def getmember(self, name):
1716 """Return a TarInfo object for member `name'. If `name' can not be
1717 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001718 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001719 most up-to-date version.
1720 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001721 tarinfo = self._getmember(name)
1722 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001723 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001724 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001725
1726 def getmembers(self):
1727 """Return the members of the archive as a list of TarInfo objects. The
1728 list has the same order as the members in the archive.
1729 """
1730 self._check()
1731 if not self._loaded: # if we want to obtain a list of
1732 self._load() # all members, we first have to
1733 # scan the whole archive.
1734 return self.members
1735
1736 def getnames(self):
1737 """Return the members of the archive as a list of their names. It has
1738 the same order as the list returned by getmembers().
1739 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001740 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001741
1742 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1743 """Create a TarInfo object for either the file `name' or the file
1744 object `fileobj' (using os.fstat on its file descriptor). You can
1745 modify some of the TarInfo's attributes before you add it using
1746 addfile(). If given, `arcname' specifies an alternative name for the
1747 file in the archive.
1748 """
1749 self._check("aw")
1750
1751 # When fileobj is given, replace name by
1752 # fileobj's real name.
1753 if fileobj is not None:
1754 name = fileobj.name
1755
1756 # Building the name of the member in the archive.
1757 # Backward slashes are converted to forward slashes,
1758 # Absolute paths are turned to relative paths.
1759 if arcname is None:
1760 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001761 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001762 arcname = arcname.replace(os.sep, "/")
1763 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001764
1765 # Now, fill the TarInfo object with
1766 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001767 tarinfo = self.tarinfo()
1768 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 # Use os.stat or os.lstat, depending on platform
1771 # and if symlinks shall be resolved.
1772 if fileobj is None:
1773 if hasattr(os, "lstat") and not self.dereference:
1774 statres = os.lstat(name)
1775 else:
1776 statres = os.stat(name)
1777 else:
1778 statres = os.fstat(fileobj.fileno())
1779 linkname = ""
1780
1781 stmd = statres.st_mode
1782 if stat.S_ISREG(stmd):
1783 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001784 if not self.dereference and statres.st_nlink > 1 and \
1785 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001786 # Is it a hardlink to an already
1787 # archived file?
1788 type = LNKTYPE
1789 linkname = self.inodes[inode]
1790 else:
1791 # The inode is added only if its valid.
1792 # For win32 it is always 0.
1793 type = REGTYPE
1794 if inode[0]:
1795 self.inodes[inode] = arcname
1796 elif stat.S_ISDIR(stmd):
1797 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001798 elif stat.S_ISFIFO(stmd):
1799 type = FIFOTYPE
1800 elif stat.S_ISLNK(stmd):
1801 type = SYMTYPE
1802 linkname = os.readlink(name)
1803 elif stat.S_ISCHR(stmd):
1804 type = CHRTYPE
1805 elif stat.S_ISBLK(stmd):
1806 type = BLKTYPE
1807 else:
1808 return None
1809
1810 # Fill the TarInfo object with all
1811 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001812 tarinfo.name = arcname
1813 tarinfo.mode = stmd
1814 tarinfo.uid = statres.st_uid
1815 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001816 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001817 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001818 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001819 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001820 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001821 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001822 tarinfo.linkname = linkname
1823 if pwd:
1824 try:
1825 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1826 except KeyError:
1827 pass
1828 if grp:
1829 try:
1830 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1831 except KeyError:
1832 pass
1833
1834 if type in (CHRTYPE, BLKTYPE):
1835 if hasattr(os, "major") and hasattr(os, "minor"):
1836 tarinfo.devmajor = os.major(statres.st_rdev)
1837 tarinfo.devminor = os.minor(statres.st_rdev)
1838 return tarinfo
1839
1840 def list(self, verbose=True):
1841 """Print a table of contents to sys.stdout. If `verbose' is False, only
1842 the names of the members are printed. If it is True, an `ls -l'-like
1843 output is produced.
1844 """
1845 self._check()
1846
1847 for tarinfo in self:
1848 if verbose:
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +02001849 print(stat.filemode(tarinfo.mode), end=' ')
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001850 print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1851 tarinfo.gname or tarinfo.gid), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001852 if tarinfo.ischr() or tarinfo.isblk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001853 print("%10s" % ("%d,%d" \
1854 % (tarinfo.devmajor, tarinfo.devminor)), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001855 else:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001856 print("%10d" % tarinfo.size, end=' ')
1857 print("%d-%02d-%02d %02d:%02d:%02d" \
1858 % time.localtime(tarinfo.mtime)[:6], end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001859
Guido van Rossumd8faa362007-04-27 19:54:29 +00001860 print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001861
1862 if verbose:
1863 if tarinfo.issym():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001864 print("->", tarinfo.linkname, end=' ')
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001865 if tarinfo.islnk():
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001866 print("link to", tarinfo.linkname, end=' ')
1867 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001868
Raymond Hettingera63a3122011-01-26 20:34:14 +00001869 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001870 """Add the file `name' to the archive. `name' may be any type of file
1871 (directory, fifo, symbolic link, etc.). If given, `arcname'
1872 specifies an alternative name for the file in the archive.
1873 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001874 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001875 return True for each filename to be excluded. `filter' is a function
1876 that expects a TarInfo object argument and returns the changed
1877 TarInfo object, if it returns None the TarInfo object will be
1878 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001879 """
1880 self._check("aw")
1881
1882 if arcname is None:
1883 arcname = name
1884
Guido van Rossum486364b2007-06-30 05:01:58 +00001885 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001886 if exclude is not None:
1887 import warnings
1888 warnings.warn("use the filter argument instead",
1889 DeprecationWarning, 2)
1890 if exclude(name):
1891 self._dbg(2, "tarfile: Excluded %r" % name)
1892 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001893
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001894 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001895 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001896 self._dbg(2, "tarfile: Skipped %r" % name)
1897 return
1898
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 self._dbg(1, name)
1900
1901 # Create a TarInfo object from the file.
1902 tarinfo = self.gettarinfo(name, arcname)
1903
1904 if tarinfo is None:
1905 self._dbg(1, "tarfile: Unsupported type %r" % name)
1906 return
1907
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001908 # Change or exclude the TarInfo object.
1909 if filter is not None:
1910 tarinfo = filter(tarinfo)
1911 if tarinfo is None:
1912 self._dbg(2, "tarfile: Excluded %r" % name)
1913 return
1914
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001915 # Append the tar header and data to the archive.
1916 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001917 with bltn_open(name, "rb") as f:
1918 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001919
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001920 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001921 self.addfile(tarinfo)
1922 if recursive:
1923 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001924 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001925 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001927 else:
1928 self.addfile(tarinfo)
1929
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001930 def addfile(self, tarinfo, fileobj=None):
1931 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1932 given, tarinfo.size bytes are read from it and added to the archive.
1933 You can create TarInfo objects using gettarinfo().
1934 On Windows platforms, `fileobj' should always be opened with mode
1935 'rb' to avoid irritation about the file size.
1936 """
1937 self._check("aw")
1938
Thomas Wouters89f507f2006-12-13 04:49:30 +00001939 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001940
Guido van Rossume7ba4952007-06-06 23:52:48 +00001941 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001942 self.fileobj.write(buf)
1943 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001944
1945 # If there's data to follow, append it.
1946 if fileobj is not None:
1947 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1948 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1949 if remainder > 0:
1950 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1951 blocks += 1
1952 self.offset += blocks * BLOCKSIZE
1953
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001954 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001955
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001956 def extractall(self, path=".", members=None):
1957 """Extract all members from the archive to the current working
1958 directory and set owner, modification time and permissions on
1959 directories afterwards. `path' specifies a different directory
1960 to extract to. `members' is optional and must be a subset of the
1961 list returned by getmembers().
1962 """
1963 directories = []
1964
1965 if members is None:
1966 members = self
1967
1968 for tarinfo in members:
1969 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001970 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001971 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001972 tarinfo = copy.copy(tarinfo)
1973 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001974 # Do not set_attrs directories, as we will do that further down
1975 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001976
1977 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001978 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001979 directories.reverse()
1980
1981 # Set correct owner, mtime and filemode on directories.
1982 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001983 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001984 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001985 self.chown(tarinfo, dirpath)
1986 self.utime(tarinfo, dirpath)
1987 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00001988 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001989 if self.errorlevel > 1:
1990 raise
1991 else:
1992 self._dbg(1, "tarfile: %s" % e)
1993
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001994 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001995 """Extract a member from the archive to the current working directory,
1996 using its full name. Its file information is extracted as accurately
1997 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001998 specify a different directory using `path'. File attributes (owner,
1999 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002000 """
2001 self._check("r")
2002
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002003 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002004 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002005 else:
2006 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002007
Neal Norwitza4f651a2004-07-20 22:07:44 +00002008 # Prepare the link target for makelink().
2009 if tarinfo.islnk():
2010 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2011
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002013 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2014 set_attrs=set_attrs)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002015 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002016 if self.errorlevel > 0:
2017 raise
2018 else:
2019 if e.filename is None:
2020 self._dbg(1, "tarfile: %s" % e.strerror)
2021 else:
2022 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002023 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002024 if self.errorlevel > 1:
2025 raise
2026 else:
2027 self._dbg(1, "tarfile: %s" % e)
2028
2029 def extractfile(self, member):
2030 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002031 a filename or a TarInfo object. If `member' is a regular file or a
2032 link, an io.BufferedReader object is returned. Otherwise, None is
2033 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002034 """
2035 self._check("r")
2036
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002037 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002038 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002039 else:
2040 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002041
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002042 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2043 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002044 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002045
2046 elif tarinfo.islnk() or tarinfo.issym():
2047 if isinstance(self.fileobj, _Stream):
2048 # A small but ugly workaround for the case that someone tries
2049 # to extract a (sym)link as a file-object from a non-seekable
2050 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002051 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002052 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002053 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002054 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002055 else:
2056 # If there's no data associated with the member (directory, chrdev,
2057 # blkdev, etc.), return None instead of a file object.
2058 return None
2059
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002060 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002061 """Extract the TarInfo object tarinfo to a physical
2062 file called targetpath.
2063 """
2064 # Fetch the TarInfo object for the given name
2065 # and build the destination pathname, replacing
2066 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002067 targetpath = targetpath.rstrip("/")
2068 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002069
2070 # Create all upper directories.
2071 upperdirs = os.path.dirname(targetpath)
2072 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002073 # Create directories that are not part of the archive with
2074 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002075 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002076
2077 if tarinfo.islnk() or tarinfo.issym():
2078 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2079 else:
2080 self._dbg(1, tarinfo.name)
2081
2082 if tarinfo.isreg():
2083 self.makefile(tarinfo, targetpath)
2084 elif tarinfo.isdir():
2085 self.makedir(tarinfo, targetpath)
2086 elif tarinfo.isfifo():
2087 self.makefifo(tarinfo, targetpath)
2088 elif tarinfo.ischr() or tarinfo.isblk():
2089 self.makedev(tarinfo, targetpath)
2090 elif tarinfo.islnk() or tarinfo.issym():
2091 self.makelink(tarinfo, targetpath)
2092 elif tarinfo.type not in SUPPORTED_TYPES:
2093 self.makeunknown(tarinfo, targetpath)
2094 else:
2095 self.makefile(tarinfo, targetpath)
2096
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002097 if set_attrs:
2098 self.chown(tarinfo, targetpath)
2099 if not tarinfo.issym():
2100 self.chmod(tarinfo, targetpath)
2101 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002102
2103 #--------------------------------------------------------------------------
2104 # Below are the different file methods. They are called via
2105 # _extract_member() when extract() is called. They can be replaced in a
2106 # subclass to implement other functionality.
2107
2108 def makedir(self, tarinfo, targetpath):
2109 """Make a directory called targetpath.
2110 """
2111 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002112 # Use a safe mode for the directory, the real mode is set
2113 # later in _extract_member().
2114 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002115 except FileExistsError:
2116 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002117
2118 def makefile(self, tarinfo, targetpath):
2119 """Make a file called targetpath.
2120 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002121 source = self.fileobj
2122 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002123 with bltn_open(targetpath, "wb") as target:
2124 if tarinfo.sparse is not None:
2125 for offset, size in tarinfo.sparse:
2126 target.seek(offset)
2127 copyfileobj(source, target, size)
2128 else:
2129 copyfileobj(source, target, tarinfo.size)
2130 target.seek(tarinfo.size)
2131 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002132
2133 def makeunknown(self, tarinfo, targetpath):
2134 """Make a file from a TarInfo object with an unknown type
2135 at targetpath.
2136 """
2137 self.makefile(tarinfo, targetpath)
2138 self._dbg(1, "tarfile: Unknown file type %r, " \
2139 "extracted as regular file." % tarinfo.type)
2140
2141 def makefifo(self, tarinfo, targetpath):
2142 """Make a fifo called targetpath.
2143 """
2144 if hasattr(os, "mkfifo"):
2145 os.mkfifo(targetpath)
2146 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002147 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002148
2149 def makedev(self, tarinfo, targetpath):
2150 """Make a character or block device called targetpath.
2151 """
2152 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002153 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002154
2155 mode = tarinfo.mode
2156 if tarinfo.isblk():
2157 mode |= stat.S_IFBLK
2158 else:
2159 mode |= stat.S_IFCHR
2160
2161 os.mknod(targetpath, mode,
2162 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2163
2164 def makelink(self, tarinfo, targetpath):
2165 """Make a (symbolic) link called targetpath. If it cannot be created
2166 (platform limitation), we try to make a copy of the referenced file
2167 instead of a link.
2168 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002169 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002170 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002171 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002172 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002173 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002174 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002175 if os.path.exists(tarinfo._link_target):
2176 os.link(tarinfo._link_target, targetpath)
2177 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002178 self._extract_member(self._find_link_target(tarinfo),
2179 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002180 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002181 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002182 self._extract_member(self._find_link_target(tarinfo),
2183 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002184 except KeyError:
2185 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002186
2187 def chown(self, tarinfo, targetpath):
2188 """Set owner of targetpath according to tarinfo.
2189 """
2190 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2191 # We have to be root to do so.
2192 try:
2193 g = grp.getgrnam(tarinfo.gname)[2]
2194 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002195 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002196 try:
2197 u = pwd.getpwnam(tarinfo.uname)[2]
2198 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002199 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002200 try:
2201 if tarinfo.issym() and hasattr(os, "lchown"):
2202 os.lchown(targetpath, u, g)
2203 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002204 os.chown(targetpath, u, g)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002205 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002206 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002207
2208 def chmod(self, tarinfo, targetpath):
2209 """Set file permissions of targetpath according to tarinfo.
2210 """
Jack Jansen834eff62003-03-07 12:47:06 +00002211 if hasattr(os, 'chmod'):
2212 try:
2213 os.chmod(targetpath, tarinfo.mode)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002214 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002215 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002216
2217 def utime(self, tarinfo, targetpath):
2218 """Set modification time of targetpath according to tarinfo.
2219 """
Jack Jansen834eff62003-03-07 12:47:06 +00002220 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002221 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002222 try:
2223 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002224 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002225 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002226
2227 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002228 def next(self):
2229 """Return the next member of the archive as a TarInfo object, when
2230 TarFile is opened for reading. Return None if there is no more
2231 available.
2232 """
2233 self._check("ra")
2234 if self.firstmember is not None:
2235 m = self.firstmember
2236 self.firstmember = None
2237 return m
2238
2239 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002240 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002241 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002242 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002243 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002244 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002245 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002246 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002247 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002248 self.offset += BLOCKSIZE
2249 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002250 except InvalidHeaderError as e:
2251 if self.ignore_zeros:
2252 self._dbg(2, "0x%X: %s" % (self.offset, e))
2253 self.offset += BLOCKSIZE
2254 continue
2255 elif self.offset == 0:
2256 raise ReadError(str(e))
2257 except EmptyHeaderError:
2258 if self.offset == 0:
2259 raise ReadError("empty file")
2260 except TruncatedHeaderError as e:
2261 if self.offset == 0:
2262 raise ReadError(str(e))
2263 except SubsequentHeaderError as e:
2264 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002265 break
2266
Lars Gustäbel9520a432009-11-22 18:48:49 +00002267 if tarinfo is not None:
2268 self.members.append(tarinfo)
2269 else:
2270 self._loaded = True
2271
Thomas Wouters477c8d52006-05-27 19:21:47 +00002272 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002273
2274 #--------------------------------------------------------------------------
2275 # Little helper methods:
2276
Lars Gustäbel1b512722010-06-03 12:45:16 +00002277 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278 """Find an archive member by name from bottom to top.
2279 If tarinfo is given, it is used as the starting point.
2280 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002281 # Ensure that all members have been loaded.
2282 members = self.getmembers()
2283
Lars Gustäbel1b512722010-06-03 12:45:16 +00002284 # Limit the member search list up to tarinfo.
2285 if tarinfo is not None:
2286 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002287
Lars Gustäbel1b512722010-06-03 12:45:16 +00002288 if normalize:
2289 name = os.path.normpath(name)
2290
2291 for member in reversed(members):
2292 if normalize:
2293 member_name = os.path.normpath(member.name)
2294 else:
2295 member_name = member.name
2296
2297 if name == member_name:
2298 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002299
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002300 def _load(self):
2301 """Read through the entire archive file and look for readable
2302 members.
2303 """
2304 while True:
2305 tarinfo = self.next()
2306 if tarinfo is None:
2307 break
2308 self._loaded = True
2309
2310 def _check(self, mode=None):
2311 """Check if TarFile is still open, and if the operation's mode
2312 corresponds to TarFile's mode.
2313 """
2314 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002315 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002316 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002317 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002318
Lars Gustäbel1b512722010-06-03 12:45:16 +00002319 def _find_link_target(self, tarinfo):
2320 """Find the target member of a symlink or hardlink member in the
2321 archive.
2322 """
2323 if tarinfo.issym():
2324 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002325 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002326 limit = None
2327 else:
2328 # Search the archive before the link, because a hard link is
2329 # just a reference to an already archived file.
2330 linkname = tarinfo.linkname
2331 limit = tarinfo
2332
2333 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2334 if member is None:
2335 raise KeyError("linkname %r not found" % linkname)
2336 return member
2337
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002338 def __iter__(self):
2339 """Provide an iterator object.
2340 """
2341 if self._loaded:
2342 return iter(self.members)
2343 else:
2344 return TarIter(self)
2345
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002346 def _dbg(self, level, msg):
2347 """Write debugging output to sys.stderr.
2348 """
2349 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002350 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002351
2352 def __enter__(self):
2353 self._check()
2354 return self
2355
2356 def __exit__(self, type, value, traceback):
2357 if type is None:
2358 self.close()
2359 else:
2360 # An exception occurred. We must not call close() because
2361 # it would try to write end-of-archive blocks and padding.
2362 if not self._extfileobj:
2363 self.fileobj.close()
2364 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002365# class TarFile
2366
2367class TarIter:
2368 """Iterator Class.
2369
2370 for tarinfo in TarFile(...):
2371 suite...
2372 """
2373
2374 def __init__(self, tarfile):
2375 """Construct a TarIter object.
2376 """
2377 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002378 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002379 def __iter__(self):
2380 """Return iterator object.
2381 """
2382 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002383 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002384 """Return the next item using TarFile's next() method.
2385 When all members have been read, set TarFile as _loaded.
2386 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002387 # Fix for SF #1100429: Under rare circumstances it can
2388 # happen that getmembers() is called during iteration,
2389 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002390
2391 if self.index == 0 and self.tarfile.firstmember is not None:
2392 tarinfo = self.tarfile.next()
2393 elif self.index < len(self.tarfile.members):
2394 tarinfo = self.tarfile.members[self.index]
2395 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002396 tarinfo = self.tarfile.next()
2397 if not tarinfo:
2398 self.tarfile._loaded = True
2399 raise StopIteration
2400 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002401 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002402 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002403 return tarinfo
2404
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002405#--------------------
2406# exported functions
2407#--------------------
2408def is_tarfile(name):
2409 """Return True if name points to a tar archive that we
2410 are able to handle, else return False.
2411 """
2412 try:
2413 t = open(name)
2414 t.close()
2415 return True
2416 except TarError:
2417 return False
2418
Guido van Rossume7ba4952007-06-06 23:52:48 +00002419bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002420open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002421
2422
2423def main():
2424 import argparse
2425
2426 description = 'A simple command line interface for tarfile module.'
2427 parser = argparse.ArgumentParser(description=description)
2428 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2429 help='Verbose output')
2430 group = parser.add_mutually_exclusive_group()
2431 group.add_argument('-l', '--list', metavar='<tarfile>',
2432 help='Show listing of a tarfile')
2433 group.add_argument('-e', '--extract', nargs='+',
2434 metavar=('<tarfile>', '<output_dir>'),
2435 help='Extract tarfile into target dir')
2436 group.add_argument('-c', '--create', nargs='+',
2437 metavar=('<name>', '<file>'),
2438 help='Create tarfile from sources')
2439 group.add_argument('-t', '--test', metavar='<tarfile>',
2440 help='Test if a tarfile is valid')
2441 args = parser.parse_args()
2442
2443 if args.test:
2444 src = args.test
2445 if is_tarfile(src):
2446 with open(src, 'r') as tar:
2447 tar.getmembers()
2448 print(tar.getmembers(), file=sys.stderr)
2449 if args.verbose:
2450 print('{!r} is a tar archive.'.format(src))
2451 else:
2452 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2453
2454 elif args.list:
2455 src = args.list
2456 if is_tarfile(src):
2457 with TarFile.open(src, 'r:*') as tf:
2458 tf.list(verbose=args.verbose)
2459 else:
2460 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2461
2462 elif args.extract:
2463 if len(args.extract) == 1:
2464 src = args.extract[0]
2465 curdir = os.curdir
2466 elif len(args.extract) == 2:
2467 src, curdir = args.extract
2468 else:
2469 parser.exit(1, parser.format_help())
2470
2471 if is_tarfile(src):
2472 with TarFile.open(src, 'r:*') as tf:
2473 tf.extractall(path=curdir)
2474 if args.verbose:
2475 if curdir == '.':
2476 msg = '{!r} file is extracted.'.format(src)
2477 else:
2478 msg = ('{!r} file is extracted '
2479 'into {!r} directory.').format(src, curdir)
2480 print(msg)
2481 else:
2482 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2483
2484 elif args.create:
2485 tar_name = args.create.pop(0)
2486 _, ext = os.path.splitext(tar_name)
2487 compressions = {
2488 # gz
2489 'gz': 'gz',
2490 'tgz': 'gz',
2491 # xz
2492 'xz': 'xz',
2493 'txz': 'xz',
2494 # bz2
2495 'bz2': 'bz2',
2496 'tbz': 'bz2',
2497 'tbz2': 'bz2',
2498 'tb2': 'bz2',
2499 }
2500 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2501 tar_files = args.create
2502
2503 with TarFile.open(tar_name, tar_mode) as tf:
2504 for file_name in tar_files:
2505 tf.add(file_name)
2506
2507 if args.verbose:
2508 print('{!r} file created.'.format(tar_name))
2509
2510 else:
2511 parser.exit(1, parser.format_help())
2512
2513if __name__ == '__main__':
2514 main()