blob: 9e291c2a584609fec742862075da078ad8f5f665 [file] [log] [blame]
Benjamin Peterson90f5ba52010-03-11 22:53:45 +00001#!/usr/bin/env python3
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002#-------------------------------------------------------------------
3# tarfile.py
4#-------------------------------------------------------------------
Christian Heimes9c1257e2007-11-04 11:37:22 +00005# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00006# All rights reserved.
7#
8# Permission is hereby granted, free of charge, to any person
9# obtaining a copy of this software and associated documentation
10# files (the "Software"), to deal in the Software without
11# restriction, including without limitation the rights to use,
12# copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the
14# Software is furnished to do so, subject to the following
15# conditions:
16#
17# The above copyright notice and this permission notice shall be
18# included in all copies or substantial portions of the Software.
19#
20# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
22# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
25# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
27# OTHER DEALINGS IN THE SOFTWARE.
28#
29"""Read from and write to tar format archives.
30"""
31
Guido van Rossumd8faa362007-04-27 19:54:29 +000032version = "0.9.0"
Guido van Rossum98297ee2007-11-06 21:34:58 +000033__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
Senthil Kumaran7c9719c2011-07-28 22:32:49 +080034__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $"
35__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $"
Guido van Rossum98297ee2007-11-06 21:34:58 +000036__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000037
38#---------
39# Imports
40#---------
41import sys
42import os
Eli Bendersky74c503b2012-01-03 06:26:13 +020043import io
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000044import shutil
45import stat
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000046import time
47import struct
Thomas Wouters89f507f2006-12-13 04:49:30 +000048import copy
Guido van Rossumd8faa362007-04-27 19:54:29 +000049import re
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000050
51try:
52 import grp, pwd
Brett Cannoncd171c82013-07-04 17:43:24 -040053except ImportError:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000054 grp = pwd = None
55
Brian Curtin16633fa2010-07-09 13:54:27 +000056# os.symlink on Windows prior to 6.0 raises NotImplementedError
57symlink_exception = (AttributeError, NotImplementedError)
58try:
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020059 # OSError (winerror=1314) will be raised if the caller does not hold the
Brian Curtin16633fa2010-07-09 13:54:27 +000060 # SeCreateSymbolicLinkPrivilege privilege
Andrew Svetlov2606a6f2012-12-19 14:33:35 +020061 symlink_exception += (OSError,)
Brian Curtin16633fa2010-07-09 13:54:27 +000062except NameError:
63 pass
64
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000065# from tarfile import *
66__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
67
68#---------------------------------------------------------
69# tar constants
70#---------------------------------------------------------
Lars Gustäbelb506dc32007-08-07 18:36:16 +000071NUL = b"\0" # the null character
Guido van Rossumd8faa362007-04-27 19:54:29 +000072BLOCKSIZE = 512 # length of processing blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000073RECORDSIZE = BLOCKSIZE * 20 # length of records
Lars Gustäbelb506dc32007-08-07 18:36:16 +000074GNU_MAGIC = b"ustar \0" # magic gnu tar string
75POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000076
Guido van Rossumd8faa362007-04-27 19:54:29 +000077LENGTH_NAME = 100 # maximum length of a filename
78LENGTH_LINK = 100 # maximum length of a linkname
79LENGTH_PREFIX = 155 # maximum length of the prefix field
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000080
Lars Gustäbelb506dc32007-08-07 18:36:16 +000081REGTYPE = b"0" # regular file
82AREGTYPE = b"\0" # regular file
83LNKTYPE = b"1" # link (inside tarfile)
84SYMTYPE = b"2" # symbolic link
85CHRTYPE = b"3" # character special device
86BLKTYPE = b"4" # block special device
87DIRTYPE = b"5" # directory
88FIFOTYPE = b"6" # fifo special device
89CONTTYPE = b"7" # contiguous file
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +000090
Lars Gustäbelb506dc32007-08-07 18:36:16 +000091GNUTYPE_LONGNAME = b"L" # GNU tar longname
92GNUTYPE_LONGLINK = b"K" # GNU tar longlink
93GNUTYPE_SPARSE = b"S" # GNU tar sparse file
Guido van Rossumd8faa362007-04-27 19:54:29 +000094
Lars Gustäbelb506dc32007-08-07 18:36:16 +000095XHDTYPE = b"x" # POSIX.1-2001 extended header
96XGLTYPE = b"g" # POSIX.1-2001 global header
97SOLARIS_XHDTYPE = b"X" # Solaris extended header
Guido van Rossumd8faa362007-04-27 19:54:29 +000098
99USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
100GNU_FORMAT = 1 # GNU tar format
101PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
102DEFAULT_FORMAT = GNU_FORMAT
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000103
104#---------------------------------------------------------
105# tarfile constants
106#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000107# File types that tarfile supports:
108SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
109 SYMTYPE, DIRTYPE, FIFOTYPE,
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000110 CONTTYPE, CHRTYPE, BLKTYPE,
111 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
112 GNUTYPE_SPARSE)
113
Guido van Rossumd8faa362007-04-27 19:54:29 +0000114# File types that will be treated as a regular file.
115REGULAR_TYPES = (REGTYPE, AREGTYPE,
116 CONTTYPE, GNUTYPE_SPARSE)
117
118# File types that are part of the GNU tar format.
119GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
120 GNUTYPE_SPARSE)
121
122# Fields from a pax header that override a TarInfo attribute.
123PAX_FIELDS = ("path", "linkpath", "size", "mtime",
124 "uid", "gid", "uname", "gname")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000125
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000126# Fields from a pax header that are affected by hdrcharset.
127PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
128
Guido van Rossume7ba4952007-06-06 23:52:48 +0000129# Fields in a pax header that are numbers, all other fields
130# are treated as strings.
131PAX_NUMBER_FIELDS = {
132 "atime": float,
133 "ctime": float,
134 "mtime": float,
135 "uid": int,
136 "gid": int,
137 "size": int
138}
139
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000140#---------------------------------------------------------
Guido van Rossumd8faa362007-04-27 19:54:29 +0000141# initialization
142#---------------------------------------------------------
Victor Stinner0f35e2c2010-06-11 23:46:47 +0000143if os.name in ("nt", "ce"):
144 ENCODING = "utf-8"
145else:
146 ENCODING = sys.getfilesystemencoding()
Guido van Rossumd8faa362007-04-27 19:54:29 +0000147
148#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000149# Some useful functions
150#---------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000151
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000152def stn(s, length, encoding, errors):
153 """Convert a string to a null-terminated bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000154 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000155 s = s.encode(encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +0000156 return s[:length] + (length - len(s)) * NUL
Thomas Wouters477c8d52006-05-27 19:21:47 +0000157
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000158def nts(s, encoding, errors):
159 """Convert a null-terminated bytes object to a string.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000160 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000161 p = s.find(b"\0")
162 if p != -1:
163 s = s[:p]
164 return s.decode(encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000165
Thomas Wouters477c8d52006-05-27 19:21:47 +0000166def nti(s):
167 """Convert a number field to a python number.
168 """
169 # There are two possible encodings for a number field, see
170 # itn() below.
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200171 if s[0] in (0o200, 0o377):
172 n = 0
173 for i in range(len(s) - 1):
174 n <<= 8
175 n += s[i + 1]
176 if s[0] == 0o377:
177 n = -(256 ** (len(s) - 1) - n)
178 else:
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000179 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000180 n = int(nts(s, "ascii", "strict") or "0", 8)
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000181 except ValueError:
Lars Gustäbel9520a432009-11-22 18:48:49 +0000182 raise InvalidHeaderError("invalid header")
Thomas Wouters477c8d52006-05-27 19:21:47 +0000183 return n
184
Guido van Rossumd8faa362007-04-27 19:54:29 +0000185def itn(n, digits=8, format=DEFAULT_FORMAT):
Thomas Wouters477c8d52006-05-27 19:21:47 +0000186 """Convert a python number to a number field.
187 """
188 # POSIX 1003.1-1988 requires numbers to be encoded as a string of
189 # octal digits followed by a null-byte, this allows values up to
190 # (8**(digits-1))-1. GNU tar allows storing numbers greater than
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200191 # that if necessary. A leading 0o200 or 0o377 byte indicate this
192 # particular encoding, the following digits-1 bytes are a big-endian
193 # base-256 representation. This allows values up to (256**(digits-1))-1.
194 # A 0o200 byte indicates a positive number, a 0o377 byte a negative
195 # number.
Thomas Wouters477c8d52006-05-27 19:21:47 +0000196 if 0 <= n < 8 ** (digits - 1):
Ethan Furmandf3ed242014-01-05 06:50:30 -0800197 s = bytes("%0*o" % (digits - 1, int(n)), "ascii") + NUL
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200198 elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
199 if n >= 0:
200 s = bytearray([0o200])
201 else:
202 s = bytearray([0o377])
203 n = 256 ** digits + n
Thomas Wouters477c8d52006-05-27 19:21:47 +0000204
Guido van Rossum805365e2007-05-07 22:24:25 +0000205 for i in range(digits - 1):
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200206 s.insert(1, n & 0o377)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000207 n >>= 8
Lars Gustäbelac3d1372011-10-14 12:46:40 +0200208 else:
209 raise ValueError("overflow in number field")
210
Thomas Wouters477c8d52006-05-27 19:21:47 +0000211 return s
212
213def calc_chksums(buf):
214 """Calculate the checksum for a member's header by summing up all
215 characters except for the chksum field which is treated as if
216 it was filled with spaces. According to the GNU tar sources,
217 some tars (Sun and NeXT) calculate chksum with signed char,
218 which will be different if there are chars in the buffer with
219 the high bit set. So we calculate two checksums, unsigned and
220 signed.
221 """
Ross Lagerwall468ff4c2012-05-17 19:49:27 +0200222 unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
223 signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
Thomas Wouters477c8d52006-05-27 19:21:47 +0000224 return unsigned_chksum, signed_chksum
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000225
226def copyfileobj(src, dst, length=None):
227 """Copy length bytes from fileobj src to fileobj dst.
228 If length is None, copy the entire content.
229 """
230 if length == 0:
231 return
232 if length is None:
233 shutil.copyfileobj(src, dst)
234 return
235
236 BUFSIZE = 16 * 1024
237 blocks, remainder = divmod(length, BUFSIZE)
Guido van Rossum805365e2007-05-07 22:24:25 +0000238 for b in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000239 buf = src.read(BUFSIZE)
240 if len(buf) < BUFSIZE:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200241 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000242 dst.write(buf)
243
244 if remainder != 0:
245 buf = src.read(remainder)
246 if len(buf) < remainder:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200247 raise OSError("end of file reached")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000248 dst.write(buf)
249 return
250
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000251def filemode(mode):
Giampaolo Rodola'ffa1d0b2012-05-15 15:30:25 +0200252 """Deprecated in this location; use stat.filemode."""
253 import warnings
254 warnings.warn("deprecated in favor of stat.filemode",
255 DeprecationWarning, 2)
256 return stat.filemode(mode)
257
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +0200258def _safe_print(s):
259 encoding = getattr(sys.stdout, 'encoding', None)
260 if encoding is not None:
261 s = s.encode(encoding, 'backslashreplace').decode(encoding)
262 print(s, end=' ')
263
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000264
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000265class TarError(Exception):
266 """Base exception."""
267 pass
268class ExtractError(TarError):
269 """General exception for extract errors."""
270 pass
271class ReadError(TarError):
Ezio Melotti30b9d5d2013-08-17 15:50:46 +0300272 """Exception for unreadable tar archives."""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000273 pass
274class CompressionError(TarError):
275 """Exception for unavailable compression methods."""
276 pass
277class StreamError(TarError):
278 """Exception for unsupported operations on stream-like TarFiles."""
279 pass
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000280class HeaderError(TarError):
Lars Gustäbel9520a432009-11-22 18:48:49 +0000281 """Base exception for header errors."""
282 pass
283class EmptyHeaderError(HeaderError):
284 """Exception for empty headers."""
285 pass
286class TruncatedHeaderError(HeaderError):
287 """Exception for truncated headers."""
288 pass
289class EOFHeaderError(HeaderError):
290 """Exception for end of file headers."""
291 pass
292class InvalidHeaderError(HeaderError):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000293 """Exception for invalid headers."""
294 pass
Lars Gustäbel9520a432009-11-22 18:48:49 +0000295class SubsequentHeaderError(HeaderError):
296 """Exception for missing and invalid extended headers."""
297 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000298
299#---------------------------
300# internal stream interface
301#---------------------------
302class _LowLevelFile:
303 """Low-level file object. Supports reading and writing.
304 It is used instead of a regular file object for streaming
305 access.
306 """
307
308 def __init__(self, name, mode):
309 mode = {
310 "r": os.O_RDONLY,
311 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
312 }[mode]
313 if hasattr(os, "O_BINARY"):
314 mode |= os.O_BINARY
Lars Gustäbeld6eb70b2010-04-29 15:37:02 +0000315 self.fd = os.open(name, mode, 0o666)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000316
317 def close(self):
318 os.close(self.fd)
319
320 def read(self, size):
321 return os.read(self.fd, size)
322
323 def write(self, s):
324 os.write(self.fd, s)
325
326class _Stream:
327 """Class that serves as an adapter between TarFile and
328 a stream-like object. The stream-like object only
329 needs to have a read() or write() method and is accessed
330 blockwise. Use of gzip or bzip2 compression is possible.
331 A stream-like object could be for example: sys.stdin,
332 sys.stdout, a socket, a tape device etc.
333
334 _Stream is intended to be used only internally.
335 """
336
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000337 def __init__(self, name, mode, comptype, fileobj, bufsize):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000338 """Construct a _Stream object.
339 """
340 self._extfileobj = True
341 if fileobj is None:
342 fileobj = _LowLevelFile(name, mode)
343 self._extfileobj = False
344
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000345 if comptype == '*':
346 # Enable transparent compression detection for the
347 # stream interface
348 fileobj = _StreamProxy(fileobj)
349 comptype = fileobj.getcomptype()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000350
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000351 self.name = name or ""
352 self.mode = mode
353 self.comptype = comptype
354 self.fileobj = fileobj
355 self.bufsize = bufsize
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000356 self.buf = b""
Guido van Rossume2a383d2007-01-15 16:59:06 +0000357 self.pos = 0
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000358 self.closed = False
359
Antoine Pitrou605c2932010-09-23 20:15:14 +0000360 try:
361 if comptype == "gz":
362 try:
363 import zlib
Brett Cannoncd171c82013-07-04 17:43:24 -0400364 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000365 raise CompressionError("zlib module is not available")
366 self.zlib = zlib
367 self.crc = zlib.crc32(b"")
368 if mode == "r":
369 self._init_read_gz()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100370 self.exception = zlib.error
Antoine Pitrou605c2932010-09-23 20:15:14 +0000371 else:
372 self._init_write_gz()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000373
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100374 elif comptype == "bz2":
Antoine Pitrou605c2932010-09-23 20:15:14 +0000375 try:
376 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -0400377 except ImportError:
Antoine Pitrou605c2932010-09-23 20:15:14 +0000378 raise CompressionError("bz2 module is not available")
379 if mode == "r":
380 self.dbuf = b""
381 self.cmp = bz2.BZ2Decompressor()
Andrew Svetlovf7a17b42012-12-25 16:47:37 +0200382 self.exception = OSError
Antoine Pitrou605c2932010-09-23 20:15:14 +0000383 else:
384 self.cmp = bz2.BZ2Compressor()
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100385
386 elif comptype == "xz":
387 try:
388 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -0400389 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100390 raise CompressionError("lzma module is not available")
391 if mode == "r":
392 self.dbuf = b""
393 self.cmp = lzma.LZMADecompressor()
394 self.exception = lzma.LZMAError
395 else:
396 self.cmp = lzma.LZMACompressor()
397
398 elif comptype != "tar":
399 raise CompressionError("unknown compression type %r" % comptype)
400
Antoine Pitrou605c2932010-09-23 20:15:14 +0000401 except:
402 if not self._extfileobj:
403 self.fileobj.close()
404 self.closed = True
405 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000406
407 def __del__(self):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000408 if hasattr(self, "closed") and not self.closed:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000409 self.close()
410
411 def _init_write_gz(self):
412 """Initialize for writing with gzip compression.
413 """
414 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
415 -self.zlib.MAX_WBITS,
416 self.zlib.DEF_MEM_LEVEL,
417 0)
Guido van Rossume2a383d2007-01-15 16:59:06 +0000418 timestamp = struct.pack("<L", int(time.time()))
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000419 self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000420 if self.name.endswith(".gz"):
421 self.name = self.name[:-3]
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000422 # RFC1952 says we must use ISO-8859-1 for the FNAME field.
423 self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000424
425 def write(self, s):
426 """Write string s to the stream.
427 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000428 if self.comptype == "gz":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000429 self.crc = self.zlib.crc32(s, self.crc)
430 self.pos += len(s)
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000431 if self.comptype != "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000432 s = self.cmp.compress(s)
433 self.__write(s)
434
435 def __write(self, s):
436 """Write string s to the stream if a whole new block
437 is ready to be written.
438 """
439 self.buf += s
440 while len(self.buf) > self.bufsize:
441 self.fileobj.write(self.buf[:self.bufsize])
442 self.buf = self.buf[self.bufsize:]
443
444 def close(self):
445 """Close the _Stream object. No operation should be
446 done on it afterwards.
447 """
448 if self.closed:
449 return
450
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000451 if self.mode == "w" and self.comptype != "tar":
Martin v. Löwisc234a522004-08-22 21:28:33 +0000452 self.buf += self.cmp.flush()
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000453
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000454 if self.mode == "w" and self.buf:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000455 self.fileobj.write(self.buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000456 self.buf = b""
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000457 if self.comptype == "gz":
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000458 # The native zlib crc is an unsigned 32-bit integer, but
459 # the Python wrapper implicitly casts that to a signed C
460 # long. So, on a 32-bit box self.crc may "look negative",
461 # while the same crc on a 64-bit box may "look positive".
462 # To avoid irksome warnings from the `struct` module, force
463 # it to look positive on all boxes.
Guido van Rossume2a383d2007-01-15 16:59:06 +0000464 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffff))
465 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000466
467 if not self._extfileobj:
468 self.fileobj.close()
469
470 self.closed = True
471
472 def _init_read_gz(self):
473 """Initialize for reading a gzip compressed fileobj.
474 """
475 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000476 self.dbuf = b""
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000477
478 # taken from gzip.GzipFile with some alterations
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000479 if self.__read(2) != b"\037\213":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000480 raise ReadError("not a gzip file")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000481 if self.__read(1) != b"\010":
Thomas Wouters477c8d52006-05-27 19:21:47 +0000482 raise CompressionError("unsupported compression method")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000483
484 flag = ord(self.__read(1))
485 self.__read(6)
486
487 if flag & 4:
488 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
489 self.read(xlen)
490 if flag & 8:
491 while True:
492 s = self.__read(1)
493 if not s or s == NUL:
494 break
495 if flag & 16:
496 while True:
497 s = self.__read(1)
498 if not s or s == NUL:
499 break
500 if flag & 2:
501 self.__read(2)
502
503 def tell(self):
504 """Return the stream's file pointer position.
505 """
506 return self.pos
507
508 def seek(self, pos=0):
509 """Set the stream's file pointer to pos. Negative seeking
510 is forbidden.
511 """
512 if pos - self.pos >= 0:
513 blocks, remainder = divmod(pos - self.pos, self.bufsize)
Guido van Rossum805365e2007-05-07 22:24:25 +0000514 for i in range(blocks):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000515 self.read(self.bufsize)
516 self.read(remainder)
517 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +0000518 raise StreamError("seeking backwards is not allowed")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000519 return self.pos
520
521 def read(self, size=None):
522 """Return the next size number of bytes from the stream.
523 If size is not defined, return all bytes of the stream
524 up to EOF.
525 """
526 if size is None:
527 t = []
528 while True:
529 buf = self._read(self.bufsize)
530 if not buf:
531 break
532 t.append(buf)
533 buf = "".join(t)
534 else:
535 buf = self._read(size)
536 self.pos += len(buf)
537 return buf
538
539 def _read(self, size):
540 """Return size bytes from the stream.
541 """
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000542 if self.comptype == "tar":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000543 return self.__read(size)
544
545 c = len(self.dbuf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000546 while c < size:
547 buf = self.__read(self.bufsize)
548 if not buf:
549 break
Guido van Rossumd8faa362007-04-27 19:54:29 +0000550 try:
551 buf = self.cmp.decompress(buf)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100552 except self.exception:
Guido van Rossumd8faa362007-04-27 19:54:29 +0000553 raise ReadError("invalid compressed data")
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000554 self.dbuf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000555 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000556 buf = self.dbuf[:size]
557 self.dbuf = self.dbuf[size:]
558 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000559
560 def __read(self, size):
561 """Return size bytes from stream. If internal buffer is empty,
562 read another block from the stream.
563 """
564 c = len(self.buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000565 while c < size:
566 buf = self.fileobj.read(self.bufsize)
567 if not buf:
568 break
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000569 self.buf += buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000570 c += len(buf)
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000571 buf = self.buf[:size]
572 self.buf = self.buf[size:]
573 return buf
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000574# class _Stream
575
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000576class _StreamProxy(object):
577 """Small proxy class that enables transparent compression
578 detection for the Stream interface (mode 'r|*').
579 """
580
581 def __init__(self, fileobj):
582 self.fileobj = fileobj
583 self.buf = self.fileobj.read(BLOCKSIZE)
584
585 def read(self, size):
586 self.read = self.fileobj.read
587 return self.buf
588
589 def getcomptype(self):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100590 if self.buf.startswith(b"\x1f\x8b\x08"):
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000591 return "gz"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100592 elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000593 return "bz2"
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +0100594 elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):
595 return "xz"
596 else:
597 return "tar"
Martin v. Löwis78be7df2005-03-05 12:47:42 +0000598
599 def close(self):
600 self.fileobj.close()
601# class StreamProxy
602
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000603#------------------------
604# Extraction file object
605#------------------------
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000606class _FileInFile(object):
607 """A thin wrapper around an existing file object that
608 provides a part of its data as an individual file
609 object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000610 """
611
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000612 def __init__(self, fileobj, offset, size, blockinfo=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000613 self.fileobj = fileobj
614 self.offset = offset
615 self.size = size
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000616 self.position = 0
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200617 self.name = getattr(fileobj, "name", None)
618 self.closed = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000619
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000620 if blockinfo is None:
621 blockinfo = [(0, size)]
622
623 # Construct a map with data and zero blocks.
624 self.map_index = 0
625 self.map = []
626 lastpos = 0
627 realpos = self.offset
628 for offset, size in blockinfo:
629 if offset > lastpos:
630 self.map.append((False, lastpos, offset, None))
631 self.map.append((True, offset, offset + size, realpos))
632 realpos += size
633 lastpos = offset + size
634 if lastpos < self.size:
635 self.map.append((False, lastpos, self.size, None))
636
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200637 def flush(self):
638 pass
639
640 def readable(self):
641 return True
642
643 def writable(self):
644 return False
645
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000646 def seekable(self):
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000647 return self.fileobj.seekable()
648
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000649 def tell(self):
650 """Return the current file position.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000651 """
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000652 return self.position
653
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200654 def seek(self, position, whence=io.SEEK_SET):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000655 """Seek to a position in the file.
656 """
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200657 if whence == io.SEEK_SET:
658 self.position = min(max(position, 0), self.size)
659 elif whence == io.SEEK_CUR:
660 if position < 0:
661 self.position = max(self.position + position, 0)
662 else:
663 self.position = min(self.position + position, self.size)
664 elif whence == io.SEEK_END:
665 self.position = max(min(self.size + position, self.size), 0)
666 else:
667 raise ValueError("Invalid argument")
668 return self.position
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000669
670 def read(self, size=None):
671 """Read data from the file.
672 """
673 if size is None:
674 size = self.size - self.position
675 else:
676 size = min(size, self.size - self.position)
677
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000678 buf = b""
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000679 while size > 0:
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000680 while True:
681 data, start, stop, offset = self.map[self.map_index]
682 if start <= self.position < stop:
683 break
684 else:
685 self.map_index += 1
686 if self.map_index == len(self.map):
687 self.map_index = 0
688 length = min(size, stop - self.position)
689 if data:
Lars Gustäbeldd071042011-02-23 11:42:22 +0000690 self.fileobj.seek(offset + (self.position - start))
691 buf += self.fileobj.read(length)
Lars Gustäbel9cbdd752010-10-29 09:08:19 +0000692 else:
693 buf += NUL * length
694 size -= length
695 self.position += length
696 return buf
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000697
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200698 def readinto(self, b):
699 buf = self.read(len(b))
700 b[:len(buf)] = buf
701 return len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000702
703 def close(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000704 self.closed = True
Lars Gustäbel7a919e92012-05-05 18:15:03 +0200705#class _FileInFile
Martin v. Löwisdf241532005-03-03 08:17:42 +0000706
Lars Gustäbelb062a2f2012-05-14 13:18:16 +0200707class ExFileObject(io.BufferedReader):
708
709 def __init__(self, tarfile, tarinfo):
710 fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,
711 tarinfo.size, tarinfo.sparse)
712 super().__init__(fileobj)
713#class ExFileObject
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000714
715#------------------
716# Exported Classes
717#------------------
718class TarInfo(object):
719 """Informational class which holds the details about an
720 archive member given by a tar header block.
721 TarInfo objects are returned by TarFile.getmember(),
722 TarFile.getmembers() and TarFile.gettarinfo() and are
723 usually created internally.
724 """
725
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000726 __slots__ = ("name", "mode", "uid", "gid", "size", "mtime",
727 "chksum", "type", "linkname", "uname", "gname",
728 "devmajor", "devminor",
729 "offset", "offset_data", "pax_headers", "sparse",
730 "tarfile", "_sparse_structs", "_link_target")
731
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000732 def __init__(self, name=""):
733 """Construct a TarInfo object. name is the optional name
734 of the member.
735 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000736 self.name = name # member name
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000737 self.mode = 0o644 # file permissions
Thomas Wouters477c8d52006-05-27 19:21:47 +0000738 self.uid = 0 # user id
739 self.gid = 0 # group id
740 self.size = 0 # file size
741 self.mtime = 0 # modification time
742 self.chksum = 0 # header checksum
743 self.type = REGTYPE # member type
744 self.linkname = "" # link name
Lars Gustäbel331b8002010-10-04 15:18:47 +0000745 self.uname = "" # user name
746 self.gname = "" # group name
Thomas Wouters477c8d52006-05-27 19:21:47 +0000747 self.devmajor = 0 # device major number
748 self.devminor = 0 # device minor number
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000749
Thomas Wouters477c8d52006-05-27 19:21:47 +0000750 self.offset = 0 # the tar header starts here
751 self.offset_data = 0 # the file's data starts here
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000752
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +0000753 self.sparse = None # sparse member information
Guido van Rossumd8faa362007-04-27 19:54:29 +0000754 self.pax_headers = {} # pax header information
755
756 # In pax headers the "name" and "linkname" field are called
757 # "path" and "linkpath".
758 def _getpath(self):
759 return self.name
760 def _setpath(self, name):
761 self.name = name
762 path = property(_getpath, _setpath)
763
764 def _getlinkpath(self):
765 return self.linkname
766 def _setlinkpath(self, linkname):
767 self.linkname = linkname
768 linkpath = property(_getlinkpath, _setlinkpath)
769
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +0000770 def __repr__(self):
771 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
772
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000773 def get_info(self):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000774 """Return the TarInfo's attributes as a dictionary.
775 """
776 info = {
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000777 "name": self.name,
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000778 "mode": self.mode & 0o7777,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000779 "uid": self.uid,
780 "gid": self.gid,
781 "size": self.size,
782 "mtime": self.mtime,
783 "chksum": self.chksum,
784 "type": self.type,
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +0000785 "linkname": self.linkname,
Guido van Rossumd8faa362007-04-27 19:54:29 +0000786 "uname": self.uname,
787 "gname": self.gname,
788 "devmajor": self.devmajor,
789 "devminor": self.devminor
790 }
791
792 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
793 info["name"] += "/"
794
795 return info
796
Victor Stinnerde629d42010-05-05 21:43:57 +0000797 def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000798 """Return a tar header as a string of 512 byte blocks.
799 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000800 info = self.get_info()
Guido van Rossume7ba4952007-06-06 23:52:48 +0000801
Guido van Rossumd8faa362007-04-27 19:54:29 +0000802 if format == USTAR_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000803 return self.create_ustar_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000804 elif format == GNU_FORMAT:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000805 return self.create_gnu_header(info, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000806 elif format == PAX_FORMAT:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000807 return self.create_pax_header(info, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000808 else:
809 raise ValueError("invalid format")
810
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000811 def create_ustar_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000812 """Return the object as a ustar header block.
813 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000814 info["magic"] = POSIX_MAGIC
815
816 if len(info["linkname"]) > LENGTH_LINK:
817 raise ValueError("linkname is too long")
818
819 if len(info["name"]) > LENGTH_NAME:
820 info["prefix"], info["name"] = self._posix_split_name(info["name"])
821
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000822 return self._create_header(info, USTAR_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000823
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000824 def create_gnu_header(self, info, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000825 """Return the object as a GNU header block sequence.
826 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000827 info["magic"] = GNU_MAGIC
828
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000829 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000830 if len(info["linkname"]) > LENGTH_LINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000831 buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000832
833 if len(info["name"]) > LENGTH_NAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000834 buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000835
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000836 return buf + self._create_header(info, GNU_FORMAT, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000837
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000838 def create_pax_header(self, info, encoding):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000839 """Return the object as a ustar header block. If it cannot be
840 represented this way, prepend a pax extended header sequence
841 with supplement information.
842 """
Guido van Rossumd8faa362007-04-27 19:54:29 +0000843 info["magic"] = POSIX_MAGIC
844 pax_headers = self.pax_headers.copy()
845
846 # Test string fields for values that exceed the field length or cannot
847 # be represented in ASCII encoding.
848 for name, hname, length in (
849 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
850 ("uname", "uname", 32), ("gname", "gname", 32)):
851
Guido van Rossume7ba4952007-06-06 23:52:48 +0000852 if hname in pax_headers:
853 # The pax header has priority.
854 continue
855
Guido van Rossumd8faa362007-04-27 19:54:29 +0000856 # Try to encode the string as ASCII.
857 try:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000858 info[name].encode("ascii", "strict")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000859 except UnicodeEncodeError:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000860 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000861 continue
862
Guido van Rossume7ba4952007-06-06 23:52:48 +0000863 if len(info[name]) > length:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000864 pax_headers[hname] = info[name]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000865
866 # Test number fields for values that exceed the field limit or values
867 # that like to be stored as float.
868 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
Guido van Rossume7ba4952007-06-06 23:52:48 +0000869 if name in pax_headers:
870 # The pax header has priority. Avoid overflow.
871 info[name] = 0
872 continue
873
Guido van Rossumd8faa362007-04-27 19:54:29 +0000874 val = info[name]
875 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
Guido van Rossumef87d6e2007-05-02 19:09:54 +0000876 pax_headers[name] = str(val)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000877 info[name] = 0
878
Guido van Rossume7ba4952007-06-06 23:52:48 +0000879 # Create a pax extended header if necessary.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000880 if pax_headers:
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000881 buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000882 else:
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000883 buf = b""
Guido van Rossumd8faa362007-04-27 19:54:29 +0000884
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000885 return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000886
887 @classmethod
Lars Gustäbel3741eff2007-08-21 12:17:05 +0000888 def create_pax_global_header(cls, pax_headers):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000889 """Return the object as a pax global header block sequence.
890 """
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000891 return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")
Guido van Rossumd8faa362007-04-27 19:54:29 +0000892
893 def _posix_split_name(self, name):
894 """Split a name longer than 100 chars into a prefix
895 and a name part.
896 """
897 prefix = name[:LENGTH_PREFIX + 1]
898 while prefix and prefix[-1] != "/":
899 prefix = prefix[:-1]
900
901 name = name[len(prefix):]
902 prefix = prefix[:-1]
903
904 if not prefix or len(name) > LENGTH_NAME:
905 raise ValueError("name is too long")
906 return prefix, name
907
908 @staticmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000909 def _create_header(info, format, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000910 """Return a header block. info is a dictionary with file
911 information, format must be one of the *_FORMAT constants.
912 """
913 parts = [
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000914 stn(info.get("name", ""), 100, encoding, errors),
Guido van Rossumcd16bf62007-06-13 18:07:49 +0000915 itn(info.get("mode", 0) & 0o7777, 8, format),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000916 itn(info.get("uid", 0), 8, format),
917 itn(info.get("gid", 0), 8, format),
918 itn(info.get("size", 0), 12, format),
919 itn(info.get("mtime", 0), 12, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000920 b" ", # checksum field
Guido van Rossumd8faa362007-04-27 19:54:29 +0000921 info.get("type", REGTYPE),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000922 stn(info.get("linkname", ""), 100, encoding, errors),
923 info.get("magic", POSIX_MAGIC),
Lars Gustäbel331b8002010-10-04 15:18:47 +0000924 stn(info.get("uname", ""), 32, encoding, errors),
925 stn(info.get("gname", ""), 32, encoding, errors),
Guido van Rossumd8faa362007-04-27 19:54:29 +0000926 itn(info.get("devmajor", 0), 8, format),
927 itn(info.get("devminor", 0), 8, format),
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000928 stn(info.get("prefix", ""), 155, encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +0000929 ]
930
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000931 buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))
Guido van Rossumd8faa362007-04-27 19:54:29 +0000932 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
Lars Gustäbela280ca752007-08-28 07:34:33 +0000933 buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]
Guido van Rossumd8faa362007-04-27 19:54:29 +0000934 return buf
935
936 @staticmethod
937 def _create_payload(payload):
938 """Return the string payload filled with zero bytes
939 up to the next 512 byte border.
940 """
941 blocks, remainder = divmod(len(payload), BLOCKSIZE)
942 if remainder > 0:
943 payload += (BLOCKSIZE - remainder) * NUL
944 return payload
945
946 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000947 def _create_gnu_long_header(cls, name, type, encoding, errors):
Guido van Rossumd8faa362007-04-27 19:54:29 +0000948 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
949 for name.
950 """
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000951 name = name.encode(encoding, errors) + NUL
Guido van Rossumd8faa362007-04-27 19:54:29 +0000952
953 info = {}
954 info["name"] = "././@LongLink"
955 info["type"] = type
956 info["size"] = len(name)
957 info["magic"] = GNU_MAGIC
958
959 # create extended header + name blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000960 return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \
Guido van Rossumd8faa362007-04-27 19:54:29 +0000961 cls._create_payload(name)
962
963 @classmethod
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000964 def _create_pax_generic_header(cls, pax_headers, type, encoding):
965 """Return a POSIX.1-2008 extended or global header sequence
Guido van Rossumd8faa362007-04-27 19:54:29 +0000966 that contains a list of keyword, value pairs. The values
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000967 must be strings.
Guido van Rossumd8faa362007-04-27 19:54:29 +0000968 """
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000969 # Check if one of the fields contains surrogate characters and thereby
970 # forces hdrcharset=BINARY, see _proc_pax() for more information.
971 binary = False
972 for keyword, value in pax_headers.items():
973 try:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000974 value.encode("utf-8", "strict")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000975 except UnicodeEncodeError:
976 binary = True
977 break
978
Lars Gustäbelb506dc32007-08-07 18:36:16 +0000979 records = b""
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000980 if binary:
981 # Put the hdrcharset field at the beginning of the header.
982 records += b"21 hdrcharset=BINARY\n"
983
Guido van Rossumd8faa362007-04-27 19:54:29 +0000984 for keyword, value in pax_headers.items():
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000985 keyword = keyword.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000986 if binary:
987 # Try to restore the original byte representation of `value'.
988 # Needless to say, that the encoding must match the string.
989 value = value.encode(encoding, "surrogateescape")
990 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +0000991 value = value.encode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +0000992
Guido van Rossumd8faa362007-04-27 19:54:29 +0000993 l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
994 n = p = 0
995 while True:
996 n = l + len(str(p))
997 if n == p:
998 break
999 p = n
Lars Gustäbela280ca752007-08-28 07:34:33 +00001000 records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"
Guido van Rossumd8faa362007-04-27 19:54:29 +00001001
1002 # We use a hardcoded "././@PaxHeader" name like star does
1003 # instead of the one that POSIX recommends.
1004 info = {}
1005 info["name"] = "././@PaxHeader"
1006 info["type"] = type
1007 info["size"] = len(records)
1008 info["magic"] = POSIX_MAGIC
1009
1010 # Create pax header + record blocks.
Lars Gustäbel3741eff2007-08-21 12:17:05 +00001011 return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \
Guido van Rossumd8faa362007-04-27 19:54:29 +00001012 cls._create_payload(records)
1013
Guido van Rossum75b64e62005-01-16 00:16:11 +00001014 @classmethod
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001015 def frombuf(cls, buf, encoding, errors):
1016 """Construct a TarInfo object from a 512 byte bytes object.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001017 """
Lars Gustäbel9520a432009-11-22 18:48:49 +00001018 if len(buf) == 0:
1019 raise EmptyHeaderError("empty header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001020 if len(buf) != BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001021 raise TruncatedHeaderError("truncated header")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001022 if buf.count(NUL) == BLOCKSIZE:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001023 raise EOFHeaderError("end of file header")
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001024
1025 chksum = nti(buf[148:156])
1026 if chksum not in calc_chksums(buf):
Lars Gustäbel9520a432009-11-22 18:48:49 +00001027 raise InvalidHeaderError("bad checksum")
Thomas Wouters477c8d52006-05-27 19:21:47 +00001028
Guido van Rossumd8faa362007-04-27 19:54:29 +00001029 obj = cls()
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001030 obj.name = nts(buf[0:100], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001031 obj.mode = nti(buf[100:108])
1032 obj.uid = nti(buf[108:116])
1033 obj.gid = nti(buf[116:124])
1034 obj.size = nti(buf[124:136])
1035 obj.mtime = nti(buf[136:148])
1036 obj.chksum = chksum
1037 obj.type = buf[156:157]
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001038 obj.linkname = nts(buf[157:257], encoding, errors)
1039 obj.uname = nts(buf[265:297], encoding, errors)
1040 obj.gname = nts(buf[297:329], encoding, errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001041 obj.devmajor = nti(buf[329:337])
1042 obj.devminor = nti(buf[337:345])
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001043 prefix = nts(buf[345:500], encoding, errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001044
Guido van Rossumd8faa362007-04-27 19:54:29 +00001045 # Old V7 tar format represents a directory as a regular
1046 # file with a trailing slash.
1047 if obj.type == AREGTYPE and obj.name.endswith("/"):
1048 obj.type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001049
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001050 # The old GNU sparse format occupies some of the unused
1051 # space in the buffer for up to 4 sparse structures.
1052 # Save the them for later processing in _proc_sparse().
1053 if obj.type == GNUTYPE_SPARSE:
1054 pos = 386
1055 structs = []
1056 for i in range(4):
1057 try:
1058 offset = nti(buf[pos:pos + 12])
1059 numbytes = nti(buf[pos + 12:pos + 24])
1060 except ValueError:
1061 break
1062 structs.append((offset, numbytes))
1063 pos += 24
1064 isextended = bool(buf[482])
1065 origsize = nti(buf[483:495])
1066 obj._sparse_structs = (structs, isextended, origsize)
1067
Guido van Rossumd8faa362007-04-27 19:54:29 +00001068 # Remove redundant slashes from directories.
1069 if obj.isdir():
1070 obj.name = obj.name.rstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001071
Guido van Rossumd8faa362007-04-27 19:54:29 +00001072 # Reconstruct a ustar longname.
1073 if prefix and obj.type not in GNU_TYPES:
1074 obj.name = prefix + "/" + obj.name
1075 return obj
1076
1077 @classmethod
1078 def fromtarfile(cls, tarfile):
1079 """Return the next TarInfo object from TarFile object
1080 tarfile.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001081 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001082 buf = tarfile.fileobj.read(BLOCKSIZE)
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001083 obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001084 obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1085 return obj._proc_member(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001086
Guido van Rossumd8faa362007-04-27 19:54:29 +00001087 #--------------------------------------------------------------------------
1088 # The following are methods that are called depending on the type of a
1089 # member. The entry point is _proc_member() which can be overridden in a
1090 # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1091 # implement the following
1092 # operations:
1093 # 1. Set self.offset_data to the position where the data blocks begin,
1094 # if there is data that follows.
1095 # 2. Set tarfile.offset to the position where the next member's header will
1096 # begin.
1097 # 3. Return self or another valid TarInfo object.
1098 def _proc_member(self, tarfile):
1099 """Choose the right processing method depending on
1100 the type and call it.
Thomas Wouters89f507f2006-12-13 04:49:30 +00001101 """
Guido van Rossumd8faa362007-04-27 19:54:29 +00001102 if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1103 return self._proc_gnulong(tarfile)
1104 elif self.type == GNUTYPE_SPARSE:
1105 return self._proc_sparse(tarfile)
1106 elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1107 return self._proc_pax(tarfile)
1108 else:
1109 return self._proc_builtin(tarfile)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001110
Guido van Rossumd8faa362007-04-27 19:54:29 +00001111 def _proc_builtin(self, tarfile):
1112 """Process a builtin type or an unknown type which
1113 will be treated as a regular file.
1114 """
1115 self.offset_data = tarfile.fileobj.tell()
1116 offset = self.offset_data
1117 if self.isreg() or self.type not in SUPPORTED_TYPES:
1118 # Skip the following data blocks.
1119 offset += self._block(self.size)
1120 tarfile.offset = offset
Thomas Wouters89f507f2006-12-13 04:49:30 +00001121
Guido van Rossume7ba4952007-06-06 23:52:48 +00001122 # Patch the TarInfo object with saved global
Guido van Rossumd8faa362007-04-27 19:54:29 +00001123 # header information.
Guido van Rossume7ba4952007-06-06 23:52:48 +00001124 self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001125
1126 return self
1127
1128 def _proc_gnulong(self, tarfile):
1129 """Process the blocks that hold a GNU longname
1130 or longlink member.
1131 """
1132 buf = tarfile.fileobj.read(self._block(self.size))
1133
1134 # Fetch the next header and process it.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001135 try:
1136 next = self.fromtarfile(tarfile)
1137 except HeaderError:
1138 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001139
1140 # Patch the TarInfo object from the next header with
1141 # the longname information.
1142 next.offset = self.offset
1143 if self.type == GNUTYPE_LONGNAME:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001144 next.name = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001145 elif self.type == GNUTYPE_LONGLINK:
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001146 next.linkname = nts(buf, tarfile.encoding, tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001147
1148 return next
1149
1150 def _proc_sparse(self, tarfile):
1151 """Process a GNU sparse header plus extra headers.
1152 """
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001153 # We already collected some sparse structures in frombuf().
1154 structs, isextended, origsize = self._sparse_structs
1155 del self._sparse_structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001156
Lars Gustäbelc2ea8c62008-04-14 10:05:48 +00001157 # Collect sparse structures from extended header blocks.
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001158 while isextended:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001159 buf = tarfile.fileobj.read(BLOCKSIZE)
1160 pos = 0
Guido van Rossum805365e2007-05-07 22:24:25 +00001161 for i in range(21):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001162 try:
1163 offset = nti(buf[pos:pos + 12])
1164 numbytes = nti(buf[pos + 12:pos + 24])
1165 except ValueError:
1166 break
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001167 if offset and numbytes:
1168 structs.append((offset, numbytes))
Guido van Rossumd8faa362007-04-27 19:54:29 +00001169 pos += 24
Lars Gustäbelb506dc32007-08-07 18:36:16 +00001170 isextended = bool(buf[504])
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001171 self.sparse = structs
Guido van Rossumd8faa362007-04-27 19:54:29 +00001172
1173 self.offset_data = tarfile.fileobj.tell()
1174 tarfile.offset = self.offset_data + self._block(self.size)
1175 self.size = origsize
Guido van Rossumd8faa362007-04-27 19:54:29 +00001176 return self
1177
1178 def _proc_pax(self, tarfile):
1179 """Process an extended or global header as described in
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001180 POSIX.1-2008.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001181 """
1182 # Read the header information.
1183 buf = tarfile.fileobj.read(self._block(self.size))
1184
1185 # A pax header stores supplemental information for either
1186 # the following file (extended) or all following files
1187 # (global).
1188 if self.type == XGLTYPE:
1189 pax_headers = tarfile.pax_headers
1190 else:
1191 pax_headers = tarfile.pax_headers.copy()
1192
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001193 # Check if the pax header contains a hdrcharset field. This tells us
1194 # the encoding of the path, linkpath, uname and gname fields. Normally,
1195 # these fields are UTF-8 encoded but since POSIX.1-2008 tar
1196 # implementations are allowed to store them as raw binary strings if
1197 # the translation to UTF-8 fails.
1198 match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
1199 if match is not None:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001200 pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001201
1202 # For the time being, we don't care about anything other than "BINARY".
1203 # The only other value that is currently allowed by the standard is
1204 # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
1205 hdrcharset = pax_headers.get("hdrcharset")
1206 if hdrcharset == "BINARY":
1207 encoding = tarfile.encoding
1208 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001209 encoding = "utf-8"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001210
Guido van Rossumd8faa362007-04-27 19:54:29 +00001211 # Parse pax header information. A record looks like that:
1212 # "%d %s=%s\n" % (length, keyword, value). length is the size
1213 # of the complete record including the length field itself and
Guido van Rossume7ba4952007-06-06 23:52:48 +00001214 # the newline. keyword and value are both UTF-8 encoded strings.
Antoine Pitroufd036452008-08-19 17:56:33 +00001215 regex = re.compile(br"(\d+) ([^=]+)=")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001216 pos = 0
1217 while True:
1218 match = regex.match(buf, pos)
1219 if not match:
1220 break
1221
1222 length, keyword = match.groups()
1223 length = int(length)
1224 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1225
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001226 # Normally, we could just use "utf-8" as the encoding and "strict"
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001227 # as the error handler, but we better not take the risk. For
1228 # example, GNU tar <= 1.23 is known to store filenames it cannot
1229 # translate to UTF-8 as raw strings (unfortunately without a
1230 # hdrcharset=BINARY header).
1231 # We first try the strict standard encoding, and if that fails we
1232 # fall back on the user's encoding and error handler.
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001233 keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001234 tarfile.errors)
1235 if keyword in PAX_NAME_FIELDS:
1236 value = self._decode_pax_field(value, encoding, tarfile.encoding,
1237 tarfile.errors)
1238 else:
Marc-André Lemburg8f36af72011-02-25 15:42:01 +00001239 value = self._decode_pax_field(value, "utf-8", "utf-8",
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001240 tarfile.errors)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001241
1242 pax_headers[keyword] = value
1243 pos += length
1244
Guido van Rossume7ba4952007-06-06 23:52:48 +00001245 # Fetch the next header.
Lars Gustäbel9520a432009-11-22 18:48:49 +00001246 try:
1247 next = self.fromtarfile(tarfile)
1248 except HeaderError:
1249 raise SubsequentHeaderError("missing or bad subsequent header")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001250
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001251 # Process GNU sparse information.
1252 if "GNU.sparse.map" in pax_headers:
1253 # GNU extended sparse format version 0.1.
1254 self._proc_gnusparse_01(next, pax_headers)
1255
1256 elif "GNU.sparse.size" in pax_headers:
1257 # GNU extended sparse format version 0.0.
1258 self._proc_gnusparse_00(next, pax_headers, buf)
1259
1260 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
1261 # GNU extended sparse format version 1.0.
1262 self._proc_gnusparse_10(next, pax_headers, tarfile)
1263
Guido van Rossume7ba4952007-06-06 23:52:48 +00001264 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
Guido van Rossume7ba4952007-06-06 23:52:48 +00001265 # Patch the TarInfo object with the extended header info.
1266 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1267 next.offset = self.offset
1268
1269 if "size" in pax_headers:
1270 # If the extended header replaces the size field,
1271 # we need to recalculate the offset where the next
1272 # header starts.
1273 offset = next.offset_data
1274 if next.isreg() or next.type not in SUPPORTED_TYPES:
1275 offset += next._block(next.size)
1276 tarfile.offset = offset
1277
1278 return next
1279
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001280 def _proc_gnusparse_00(self, next, pax_headers, buf):
1281 """Process a GNU tar extended sparse header, version 0.0.
1282 """
1283 offsets = []
1284 for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
1285 offsets.append(int(match.group(1)))
1286 numbytes = []
1287 for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
1288 numbytes.append(int(match.group(1)))
1289 next.sparse = list(zip(offsets, numbytes))
1290
1291 def _proc_gnusparse_01(self, next, pax_headers):
1292 """Process a GNU tar extended sparse header, version 0.1.
1293 """
1294 sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]
1295 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1296
1297 def _proc_gnusparse_10(self, next, pax_headers, tarfile):
1298 """Process a GNU tar extended sparse header, version 1.0.
1299 """
1300 fields = None
1301 sparse = []
1302 buf = tarfile.fileobj.read(BLOCKSIZE)
1303 fields, buf = buf.split(b"\n", 1)
1304 fields = int(fields)
1305 while len(sparse) < fields * 2:
1306 if b"\n" not in buf:
1307 buf += tarfile.fileobj.read(BLOCKSIZE)
1308 number, buf = buf.split(b"\n", 1)
1309 sparse.append(int(number))
1310 next.offset_data = tarfile.fileobj.tell()
1311 next.sparse = list(zip(sparse[::2], sparse[1::2]))
1312
Guido van Rossume7ba4952007-06-06 23:52:48 +00001313 def _apply_pax_info(self, pax_headers, encoding, errors):
1314 """Replace fields with supplemental information from a previous
1315 pax extended or global header.
1316 """
1317 for keyword, value in pax_headers.items():
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001318 if keyword == "GNU.sparse.name":
1319 setattr(self, "path", value)
1320 elif keyword == "GNU.sparse.size":
1321 setattr(self, "size", int(value))
1322 elif keyword == "GNU.sparse.realsize":
1323 setattr(self, "size", int(value))
1324 elif keyword in PAX_FIELDS:
1325 if keyword in PAX_NUMBER_FIELDS:
1326 try:
1327 value = PAX_NUMBER_FIELDS[keyword](value)
1328 except ValueError:
1329 value = 0
1330 if keyword == "path":
1331 value = value.rstrip("/")
1332 setattr(self, keyword, value)
Guido van Rossume7ba4952007-06-06 23:52:48 +00001333
1334 self.pax_headers = pax_headers.copy()
Guido van Rossumd8faa362007-04-27 19:54:29 +00001335
Lars Gustäbel1465cc22010-05-17 18:02:50 +00001336 def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):
1337 """Decode a single field from a pax record.
1338 """
1339 try:
1340 return value.decode(encoding, "strict")
1341 except UnicodeDecodeError:
1342 return value.decode(fallback_encoding, fallback_errors)
1343
Guido van Rossumd8faa362007-04-27 19:54:29 +00001344 def _block(self, count):
1345 """Round up a byte count by BLOCKSIZE and return it,
1346 e.g. _block(834) => 1024.
1347 """
1348 blocks, remainder = divmod(count, BLOCKSIZE)
1349 if remainder:
1350 blocks += 1
1351 return blocks * BLOCKSIZE
Thomas Wouters89f507f2006-12-13 04:49:30 +00001352
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001353 def isreg(self):
1354 return self.type in REGULAR_TYPES
1355 def isfile(self):
1356 return self.isreg()
1357 def isdir(self):
1358 return self.type == DIRTYPE
1359 def issym(self):
1360 return self.type == SYMTYPE
1361 def islnk(self):
1362 return self.type == LNKTYPE
1363 def ischr(self):
1364 return self.type == CHRTYPE
1365 def isblk(self):
1366 return self.type == BLKTYPE
1367 def isfifo(self):
1368 return self.type == FIFOTYPE
1369 def issparse(self):
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00001370 return self.sparse is not None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001371 def isdev(self):
1372 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1373# class TarInfo
1374
1375class TarFile(object):
1376 """The TarFile Class provides an interface to tar archives.
1377 """
1378
1379 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1380
1381 dereference = False # If true, add content of linked file to the
1382 # tar file, else the link.
1383
1384 ignore_zeros = False # If true, skips empty or invalid blocks and
1385 # continues processing.
1386
Lars Gustäbel365aff32009-12-13 11:42:29 +00001387 errorlevel = 1 # If 0, fatal errors only appear in debug
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001388 # messages (if debug >= 0). If > 0, errors
1389 # are passed to the caller as exceptions.
1390
Guido van Rossumd8faa362007-04-27 19:54:29 +00001391 format = DEFAULT_FORMAT # The format to use when creating an archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001392
Guido van Rossume7ba4952007-06-06 23:52:48 +00001393 encoding = ENCODING # Encoding for 8-bit character strings.
1394
1395 errors = None # Error handler for unicode conversion.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001396
Guido van Rossumd8faa362007-04-27 19:54:29 +00001397 tarinfo = TarInfo # The default TarInfo class to use.
1398
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02001399 fileobject = ExFileObject # The file-object for extractfile().
Guido van Rossumd8faa362007-04-27 19:54:29 +00001400
1401 def __init__(self, name=None, mode="r", fileobj=None, format=None,
1402 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
Victor Stinnerde629d42010-05-05 21:43:57 +00001403 errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001404 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1405 read from an existing archive, 'a' to append data to an existing
1406 file or 'w' to create a new file overwriting an existing one. `mode'
1407 defaults to 'r'.
1408 If `fileobj' is given, it is used for reading or writing data. If it
1409 can be determined, `mode' is overridden by `fileobj's mode.
1410 `fileobj' is not closed, when TarFile is closed.
1411 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001412 modes = {"r": "rb", "a": "r+b", "w": "wb"}
1413 if mode not in modes:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001414 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001415 self.mode = mode
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001416 self._mode = modes[mode]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001417
1418 if not fileobj:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001419 if self.mode == "a" and not os.path.exists(name):
Thomas Wouterscf297e42007-02-23 15:07:44 +00001420 # Create nonexistent files in append mode.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001421 self.mode = "w"
1422 self._mode = "wb"
Guido van Rossume7ba4952007-06-06 23:52:48 +00001423 fileobj = bltn_open(name, self._mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001424 self._extfileobj = False
1425 else:
Serhiy Storchaka2c6a3ae2014-07-16 23:58:58 +03001426 if (name is None and hasattr(fileobj, "name") and
1427 isinstance(fileobj.name, (str, bytes))):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001428 name = fileobj.name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001429 if hasattr(fileobj, "mode"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001430 self._mode = fileobj.mode
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001431 self._extfileobj = True
Thomas Woutersed03b412007-08-28 21:37:11 +00001432 self.name = os.path.abspath(name) if name else None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001433 self.fileobj = fileobj
1434
Guido van Rossumd8faa362007-04-27 19:54:29 +00001435 # Init attributes.
1436 if format is not None:
1437 self.format = format
1438 if tarinfo is not None:
1439 self.tarinfo = tarinfo
1440 if dereference is not None:
1441 self.dereference = dereference
1442 if ignore_zeros is not None:
1443 self.ignore_zeros = ignore_zeros
1444 if encoding is not None:
1445 self.encoding = encoding
Victor Stinnerde629d42010-05-05 21:43:57 +00001446 self.errors = errors
Guido van Rossume7ba4952007-06-06 23:52:48 +00001447
1448 if pax_headers is not None and self.format == PAX_FORMAT:
1449 self.pax_headers = pax_headers
1450 else:
1451 self.pax_headers = {}
1452
Guido van Rossumd8faa362007-04-27 19:54:29 +00001453 if debug is not None:
1454 self.debug = debug
1455 if errorlevel is not None:
1456 self.errorlevel = errorlevel
1457
1458 # Init datastructures.
Thomas Wouters477c8d52006-05-27 19:21:47 +00001459 self.closed = False
1460 self.members = [] # list of members as TarInfo objects
1461 self._loaded = False # flag if all members have been read
Christian Heimesd8654cf2007-12-02 15:22:16 +00001462 self.offset = self.fileobj.tell()
1463 # current position in the archive file
Thomas Wouters477c8d52006-05-27 19:21:47 +00001464 self.inodes = {} # dictionary caching the inodes of
1465 # archive members already added
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001466
Lars Gustäbel7b465392009-11-18 20:29:25 +00001467 try:
1468 if self.mode == "r":
1469 self.firstmember = None
1470 self.firstmember = self.next()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001471
Lars Gustäbel7b465392009-11-18 20:29:25 +00001472 if self.mode == "a":
1473 # Move to the end of the archive,
1474 # before the first empty block.
Lars Gustäbel7b465392009-11-18 20:29:25 +00001475 while True:
Lars Gustäbel9520a432009-11-22 18:48:49 +00001476 self.fileobj.seek(self.offset)
1477 try:
1478 tarinfo = self.tarinfo.fromtarfile(self)
1479 self.members.append(tarinfo)
1480 except EOFHeaderError:
1481 self.fileobj.seek(self.offset)
Lars Gustäbel7b465392009-11-18 20:29:25 +00001482 break
Lars Gustäbel9520a432009-11-22 18:48:49 +00001483 except HeaderError as e:
1484 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001485
Lars Gustäbel7b465392009-11-18 20:29:25 +00001486 if self.mode in "aw":
1487 self._loaded = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001488
Lars Gustäbel7b465392009-11-18 20:29:25 +00001489 if self.pax_headers:
1490 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1491 self.fileobj.write(buf)
1492 self.offset += len(buf)
1493 except:
1494 if not self._extfileobj:
1495 self.fileobj.close()
1496 self.closed = True
1497 raise
Guido van Rossumd8faa362007-04-27 19:54:29 +00001498
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001499 #--------------------------------------------------------------------------
1500 # Below are the classmethods which act as alternate constructors to the
1501 # TarFile class. The open() method is the only one that is needed for
1502 # public use; it is the "super"-constructor and is able to select an
1503 # adequate "sub"-constructor for a particular compression using the mapping
1504 # from OPEN_METH.
1505 #
1506 # This concept allows one to subclass TarFile without losing the comfort of
1507 # the super-constructor. A sub-constructor is registered and made available
1508 # by adding it to the mapping in OPEN_METH.
1509
Guido van Rossum75b64e62005-01-16 00:16:11 +00001510 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001511 def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001512 """Open a tar archive for reading, writing or appending. Return
1513 an appropriate TarFile class.
1514
1515 mode:
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001516 'r' or 'r:*' open for reading with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001517 'r:' open for reading exclusively uncompressed
1518 'r:gz' open for reading with gzip compression
1519 'r:bz2' open for reading with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001520 'r:xz' open for reading with lzma compression
Thomas Wouterscf297e42007-02-23 15:07:44 +00001521 'a' or 'a:' open for appending, creating the file if necessary
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001522 'w' or 'w:' open for writing without compression
1523 'w:gz' open for writing with gzip compression
1524 'w:bz2' open for writing with bzip2 compression
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001525 'w:xz' open for writing with lzma compression
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001526
1527 'r|*' open a stream of tar blocks with transparent compression
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001528 'r|' open an uncompressed stream of tar blocks for reading
1529 'r|gz' open a gzip compressed stream of tar blocks
1530 'r|bz2' open a bzip2 compressed stream of tar blocks
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001531 'r|xz' open an lzma compressed stream of tar blocks
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001532 'w|' open an uncompressed stream for writing
1533 'w|gz' open a gzip compressed stream for writing
1534 'w|bz2' open a bzip2 compressed stream for writing
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001535 'w|xz' open an lzma compressed stream for writing
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001536 """
1537
1538 if not name and not fileobj:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001539 raise ValueError("nothing to open")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001540
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001541 if mode in ("r", "r:*"):
1542 # Find out which *open() is appropriate for opening the file.
1543 for comptype in cls.OPEN_METH:
1544 func = getattr(cls, cls.OPEN_METH[comptype])
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001545 if fileobj is not None:
1546 saved_pos = fileobj.tell()
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001547 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001548 return func(name, "r", fileobj, **kwargs)
1549 except (ReadError, CompressionError) as e:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001550 if fileobj is not None:
1551 fileobj.seek(saved_pos)
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001552 continue
Thomas Wouters477c8d52006-05-27 19:21:47 +00001553 raise ReadError("file could not be opened successfully")
Martin v. Löwis78be7df2005-03-05 12:47:42 +00001554
1555 elif ":" in mode:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001556 filemode, comptype = mode.split(":", 1)
1557 filemode = filemode or "r"
1558 comptype = comptype or "tar"
1559
1560 # Select the *open() function according to
1561 # given compression.
1562 if comptype in cls.OPEN_METH:
1563 func = getattr(cls, cls.OPEN_METH[comptype])
1564 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001565 raise CompressionError("unknown compression type %r" % comptype)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001566 return func(name, filemode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001567
1568 elif "|" in mode:
1569 filemode, comptype = mode.split("|", 1)
1570 filemode = filemode or "r"
1571 comptype = comptype or "tar"
1572
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001573 if filemode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001574 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001575
Antoine Pitrou605c2932010-09-23 20:15:14 +00001576 stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1577 try:
1578 t = cls(name, filemode, stream, **kwargs)
1579 except:
1580 stream.close()
1581 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001582 t._extfileobj = False
1583 return t
1584
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001585 elif mode in ("a", "w"):
Guido van Rossumd8faa362007-04-27 19:54:29 +00001586 return cls.taropen(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001587
Thomas Wouters477c8d52006-05-27 19:21:47 +00001588 raise ValueError("undiscernible mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001589
Guido van Rossum75b64e62005-01-16 00:16:11 +00001590 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001591 def taropen(cls, name, mode="r", fileobj=None, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001592 """Open uncompressed tar archive name for reading or writing.
1593 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001594 if mode not in ("r", "a", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001595 raise ValueError("mode must be 'r', 'a' or 'w'")
Guido van Rossumd8faa362007-04-27 19:54:29 +00001596 return cls(name, mode, fileobj, **kwargs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001597
Guido van Rossum75b64e62005-01-16 00:16:11 +00001598 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001599 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001600 """Open gzip compressed tar archive name for reading or writing.
1601 Appending is not allowed.
1602 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001603 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001604 raise ValueError("mode must be 'r' or 'w'")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001605
1606 try:
1607 import gzip
Neal Norwitz4ec68242003-04-11 03:05:56 +00001608 gzip.GzipFile
1609 except (ImportError, AttributeError):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001610 raise CompressionError("gzip module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001611
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001612 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001613 fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj)
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001614 except OSError:
1615 if fileobj is not None and mode == 'r':
1616 raise ReadError("not a gzip file")
1617 raise
1618
1619 try:
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001620 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001621 except OSError:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001622 fileobj.close()
1623 if mode == 'r':
1624 raise ReadError("not a gzip file")
1625 raise
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001626 except:
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001627 fileobj.close()
Antoine Pitroue1eca4e2010-10-29 23:49:49 +00001628 raise
Serhiy Storchaka9fbec7a2014-01-18 15:53:05 +02001629 t._extfileobj = False
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001630 return t
1631
Guido van Rossum75b64e62005-01-16 00:16:11 +00001632 @classmethod
Guido van Rossumd8faa362007-04-27 19:54:29 +00001633 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001634 """Open bzip2 compressed tar archive name for reading or writing.
1635 Appending is not allowed.
1636 """
Serhiy Storchaka53ad0cd2014-01-18 15:35:37 +02001637 if mode not in ("r", "w"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00001638 raise ValueError("mode must be 'r' or 'w'.")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001639
1640 try:
1641 import bz2
Brett Cannoncd171c82013-07-04 17:43:24 -04001642 except ImportError:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001643 raise CompressionError("bz2 module is not available")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001644
Nadeem Vawdaaebcdba2012-06-04 23:31:20 +02001645 fileobj = bz2.BZ2File(fileobj or name, mode,
1646 compresslevel=compresslevel)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001647
1648 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00001649 t = cls.taropen(name, mode, fileobj, **kwargs)
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02001650 except (OSError, EOFError):
Antoine Pitrou95f55602010-09-23 18:36:46 +00001651 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001652 if mode == 'r':
1653 raise ReadError("not a bzip2 file")
1654 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001655 except:
1656 fileobj.close()
1657 raise
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001658 t._extfileobj = False
1659 return t
1660
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001661 @classmethod
Lars Gustäbelc5e11992012-01-18 14:01:17 +01001662 def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001663 """Open lzma compressed tar archive name for reading or writing.
1664 Appending is not allowed.
1665 """
1666 if mode not in ("r", "w"):
1667 raise ValueError("mode must be 'r' or 'w'")
1668
1669 try:
1670 import lzma
Brett Cannoncd171c82013-07-04 17:43:24 -04001671 except ImportError:
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001672 raise CompressionError("lzma module is not available")
1673
Nadeem Vawda33c34da2012-06-04 23:34:07 +02001674 fileobj = lzma.LZMAFile(fileobj or name, mode, preset=preset)
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001675
1676 try:
1677 t = cls.taropen(name, mode, fileobj, **kwargs)
1678 except (lzma.LZMAError, EOFError):
1679 fileobj.close()
Serhiy Storchakac2d01422014-01-18 16:14:10 +02001680 if mode == 'r':
1681 raise ReadError("not an lzma file")
1682 raise
Serhiy Storchakae413cde2014-01-18 16:28:08 +02001683 except:
1684 fileobj.close()
1685 raise
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001686 t._extfileobj = False
1687 return t
1688
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001689 # All *open() methods are registered here.
1690 OPEN_METH = {
1691 "tar": "taropen", # uncompressed tar
1692 "gz": "gzopen", # gzip compressed tar
Lars Gustäbel0a9dd2f2011-12-10 20:38:14 +01001693 "bz2": "bz2open", # bzip2 compressed tar
1694 "xz": "xzopen" # lzma compressed tar
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001695 }
1696
1697 #--------------------------------------------------------------------------
1698 # The public methods which TarFile provides:
1699
1700 def close(self):
1701 """Close the TarFile. In write-mode, two finishing zero blocks are
1702 appended to the archive.
1703 """
1704 if self.closed:
1705 return
1706
Guido van Rossumd8faa362007-04-27 19:54:29 +00001707 if self.mode in "aw":
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001708 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1709 self.offset += (BLOCKSIZE * 2)
1710 # fill up the end with zero-blocks
1711 # (like option -b20 for tar does)
1712 blocks, remainder = divmod(self.offset, RECORDSIZE)
1713 if remainder > 0:
1714 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1715
1716 if not self._extfileobj:
1717 self.fileobj.close()
1718 self.closed = True
1719
1720 def getmember(self, name):
1721 """Return a TarInfo object for member `name'. If `name' can not be
1722 found in the archive, KeyError is raised. If a member occurs more
Mark Dickinson934896d2009-02-21 20:59:32 +00001723 than once in the archive, its last occurrence is assumed to be the
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001724 most up-to-date version.
1725 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001726 tarinfo = self._getmember(name)
1727 if tarinfo is None:
Thomas Wouters477c8d52006-05-27 19:21:47 +00001728 raise KeyError("filename %r not found" % name)
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001729 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001730
1731 def getmembers(self):
1732 """Return the members of the archive as a list of TarInfo objects. The
1733 list has the same order as the members in the archive.
1734 """
1735 self._check()
1736 if not self._loaded: # if we want to obtain a list of
1737 self._load() # all members, we first have to
1738 # scan the whole archive.
1739 return self.members
1740
1741 def getnames(self):
1742 """Return the members of the archive as a list of their names. It has
1743 the same order as the list returned by getmembers().
1744 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001745 return [tarinfo.name for tarinfo in self.getmembers()]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001746
1747 def gettarinfo(self, name=None, arcname=None, fileobj=None):
1748 """Create a TarInfo object for either the file `name' or the file
1749 object `fileobj' (using os.fstat on its file descriptor). You can
1750 modify some of the TarInfo's attributes before you add it using
1751 addfile(). If given, `arcname' specifies an alternative name for the
1752 file in the archive.
1753 """
1754 self._check("aw")
1755
1756 # When fileobj is given, replace name by
1757 # fileobj's real name.
1758 if fileobj is not None:
1759 name = fileobj.name
1760
1761 # Building the name of the member in the archive.
1762 # Backward slashes are converted to forward slashes,
1763 # Absolute paths are turned to relative paths.
1764 if arcname is None:
1765 arcname = name
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001766 drv, arcname = os.path.splitdrive(arcname)
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00001767 arcname = arcname.replace(os.sep, "/")
1768 arcname = arcname.lstrip("/")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001769
1770 # Now, fill the TarInfo object with
1771 # information specific for the file.
Guido van Rossumd8faa362007-04-27 19:54:29 +00001772 tarinfo = self.tarinfo()
1773 tarinfo.tarfile = self
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001774
1775 # Use os.stat or os.lstat, depending on platform
1776 # and if symlinks shall be resolved.
1777 if fileobj is None:
1778 if hasattr(os, "lstat") and not self.dereference:
1779 statres = os.lstat(name)
1780 else:
1781 statres = os.stat(name)
1782 else:
1783 statres = os.fstat(fileobj.fileno())
1784 linkname = ""
1785
1786 stmd = statres.st_mode
1787 if stat.S_ISREG(stmd):
1788 inode = (statres.st_ino, statres.st_dev)
Guido van Rossumd8faa362007-04-27 19:54:29 +00001789 if not self.dereference and statres.st_nlink > 1 and \
1790 inode in self.inodes and arcname != self.inodes[inode]:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001791 # Is it a hardlink to an already
1792 # archived file?
1793 type = LNKTYPE
1794 linkname = self.inodes[inode]
1795 else:
1796 # The inode is added only if its valid.
1797 # For win32 it is always 0.
1798 type = REGTYPE
1799 if inode[0]:
1800 self.inodes[inode] = arcname
1801 elif stat.S_ISDIR(stmd):
1802 type = DIRTYPE
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001803 elif stat.S_ISFIFO(stmd):
1804 type = FIFOTYPE
1805 elif stat.S_ISLNK(stmd):
1806 type = SYMTYPE
1807 linkname = os.readlink(name)
1808 elif stat.S_ISCHR(stmd):
1809 type = CHRTYPE
1810 elif stat.S_ISBLK(stmd):
1811 type = BLKTYPE
1812 else:
1813 return None
1814
1815 # Fill the TarInfo object with all
1816 # information we can get.
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001817 tarinfo.name = arcname
1818 tarinfo.mode = stmd
1819 tarinfo.uid = statres.st_uid
1820 tarinfo.gid = statres.st_gid
Lars Gustäbel2470ff12010-06-03 10:11:52 +00001821 if type == REGTYPE:
Martin v. Löwis61d77e02004-08-20 06:35:46 +00001822 tarinfo.size = statres.st_size
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001823 else:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001824 tarinfo.size = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001825 tarinfo.mtime = statres.st_mtime
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001826 tarinfo.type = type
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001827 tarinfo.linkname = linkname
1828 if pwd:
1829 try:
1830 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1831 except KeyError:
1832 pass
1833 if grp:
1834 try:
1835 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1836 except KeyError:
1837 pass
1838
1839 if type in (CHRTYPE, BLKTYPE):
1840 if hasattr(os, "major") and hasattr(os, "minor"):
1841 tarinfo.devmajor = os.major(statres.st_rdev)
1842 tarinfo.devminor = os.minor(statres.st_rdev)
1843 return tarinfo
1844
1845 def list(self, verbose=True):
1846 """Print a table of contents to sys.stdout. If `verbose' is False, only
1847 the names of the members are printed. If it is True, an `ls -l'-like
1848 output is produced.
1849 """
1850 self._check()
1851
1852 for tarinfo in self:
1853 if verbose:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001854 _safe_print(stat.filemode(tarinfo.mode))
1855 _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
1856 tarinfo.gname or tarinfo.gid))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001857 if tarinfo.ischr() or tarinfo.isblk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001858 _safe_print("%10s" %
1859 ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001860 else:
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001861 _safe_print("%10d" % tarinfo.size)
1862 _safe_print("%d-%02d-%02d %02d:%02d:%02d" \
1863 % time.localtime(tarinfo.mtime)[:6])
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001864
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001865 _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001866
1867 if verbose:
1868 if tarinfo.issym():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001869 _safe_print("-> " + tarinfo.linkname)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001870 if tarinfo.islnk():
Serhiy Storchaka3b4f1592014-02-05 20:53:36 +02001871 _safe_print("link to " + tarinfo.linkname)
Guido van Rossumbe19ed72007-02-09 05:37:30 +00001872 print()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001873
Raymond Hettingera63a3122011-01-26 20:34:14 +00001874 def add(self, name, arcname=None, recursive=True, exclude=None, *, filter=None):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001875 """Add the file `name' to the archive. `name' may be any type of file
1876 (directory, fifo, symbolic link, etc.). If given, `arcname'
1877 specifies an alternative name for the file in the archive.
1878 Directories are added recursively by default. This can be avoided by
Guido van Rossum486364b2007-06-30 05:01:58 +00001879 setting `recursive' to False. `exclude' is a function that should
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001880 return True for each filename to be excluded. `filter' is a function
1881 that expects a TarInfo object argument and returns the changed
1882 TarInfo object, if it returns None the TarInfo object will be
1883 excluded from the archive.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001884 """
1885 self._check("aw")
1886
1887 if arcname is None:
1888 arcname = name
1889
Guido van Rossum486364b2007-06-30 05:01:58 +00001890 # Exclude pathnames.
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001891 if exclude is not None:
1892 import warnings
1893 warnings.warn("use the filter argument instead",
1894 DeprecationWarning, 2)
1895 if exclude(name):
1896 self._dbg(2, "tarfile: Excluded %r" % name)
1897 return
Guido van Rossum486364b2007-06-30 05:01:58 +00001898
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001899 # Skip if somebody tries to archive the archive...
Thomas Wouters902d6eb2007-01-09 23:18:33 +00001900 if self.name is not None and os.path.abspath(name) == self.name:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001901 self._dbg(2, "tarfile: Skipped %r" % name)
1902 return
1903
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001904 self._dbg(1, name)
1905
1906 # Create a TarInfo object from the file.
1907 tarinfo = self.gettarinfo(name, arcname)
1908
1909 if tarinfo is None:
1910 self._dbg(1, "tarfile: Unsupported type %r" % name)
1911 return
1912
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001913 # Change or exclude the TarInfo object.
1914 if filter is not None:
1915 tarinfo = filter(tarinfo)
1916 if tarinfo is None:
1917 self._dbg(2, "tarfile: Excluded %r" % name)
1918 return
1919
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001920 # Append the tar header and data to the archive.
1921 if tarinfo.isreg():
Andrew Svetlov718df1d2012-11-29 14:20:47 +02001922 with bltn_open(name, "rb") as f:
1923 self.addfile(tarinfo, f)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001924
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001925 elif tarinfo.isdir():
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001926 self.addfile(tarinfo)
1927 if recursive:
1928 for f in os.listdir(name):
Lars Gustäbel049d2aa2009-09-12 10:44:00 +00001929 self.add(os.path.join(name, f), os.path.join(arcname, f),
Raymond Hettingera63a3122011-01-26 20:34:14 +00001930 recursive, exclude, filter=filter)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001931
Martin v. Löwis5dbdc592005-08-27 10:07:56 +00001932 else:
1933 self.addfile(tarinfo)
1934
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001935 def addfile(self, tarinfo, fileobj=None):
1936 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1937 given, tarinfo.size bytes are read from it and added to the archive.
1938 You can create TarInfo objects using gettarinfo().
1939 On Windows platforms, `fileobj' should always be opened with mode
1940 'rb' to avoid irritation about the file size.
1941 """
1942 self._check("aw")
1943
Thomas Wouters89f507f2006-12-13 04:49:30 +00001944 tarinfo = copy.copy(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001945
Guido van Rossume7ba4952007-06-06 23:52:48 +00001946 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
Thomas Wouters89f507f2006-12-13 04:49:30 +00001947 self.fileobj.write(buf)
1948 self.offset += len(buf)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001949
1950 # If there's data to follow, append it.
1951 if fileobj is not None:
1952 copyfileobj(fileobj, self.fileobj, tarinfo.size)
1953 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1954 if remainder > 0:
1955 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1956 blocks += 1
1957 self.offset += blocks * BLOCKSIZE
1958
Martin v. Löwisf3c56112004-09-18 09:08:52 +00001959 self.members.append(tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00001960
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001961 def extractall(self, path=".", members=None):
1962 """Extract all members from the archive to the current working
1963 directory and set owner, modification time and permissions on
1964 directories afterwards. `path' specifies a different directory
1965 to extract to. `members' is optional and must be a subset of the
1966 list returned by getmembers().
1967 """
1968 directories = []
1969
1970 if members is None:
1971 members = self
1972
1973 for tarinfo in members:
1974 if tarinfo.isdir():
Christian Heimes2202f872008-02-06 14:31:34 +00001975 # Extract directories with a safe mode.
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001976 directories.append(tarinfo)
Christian Heimes2202f872008-02-06 14:31:34 +00001977 tarinfo = copy.copy(tarinfo)
1978 tarinfo.mode = 0o700
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001979 # Do not set_attrs directories, as we will do that further down
1980 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir())
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001981
1982 # Reverse sort directories.
Raymond Hettingerd4cb56d2008-01-30 02:55:10 +00001983 directories.sort(key=lambda a: a.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001984 directories.reverse()
1985
1986 # Set correct owner, mtime and filemode on directories.
1987 for tarinfo in directories:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001988 dirpath = os.path.join(path, tarinfo.name)
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001989 try:
Christian Heimesfaf2f632008-01-06 16:59:19 +00001990 self.chown(tarinfo, dirpath)
1991 self.utime(tarinfo, dirpath)
1992 self.chmod(tarinfo, dirpath)
Guido van Rossumb940e112007-01-10 16:19:56 +00001993 except ExtractError as e:
Martin v. Löwis00a73e72005-03-04 19:40:34 +00001994 if self.errorlevel > 1:
1995 raise
1996 else:
1997 self._dbg(1, "tarfile: %s" % e)
1998
Martin v. Löwis16f344d2010-11-01 21:39:13 +00001999 def extract(self, member, path="", set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002000 """Extract a member from the archive to the current working directory,
2001 using its full name. Its file information is extracted as accurately
2002 as possible. `member' may be a filename or a TarInfo object. You can
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002003 specify a different directory using `path'. File attributes (owner,
2004 mtime, mode) are set unless `set_attrs' is False.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002005 """
2006 self._check("r")
2007
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002008 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002009 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002010 else:
2011 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002012
Neal Norwitza4f651a2004-07-20 22:07:44 +00002013 # Prepare the link target for makelink().
2014 if tarinfo.islnk():
2015 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2016
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002017 try:
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002018 self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
2019 set_attrs=set_attrs)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002020 except OSError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002021 if self.errorlevel > 0:
2022 raise
2023 else:
2024 if e.filename is None:
2025 self._dbg(1, "tarfile: %s" % e.strerror)
2026 else:
2027 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
Guido van Rossumb940e112007-01-10 16:19:56 +00002028 except ExtractError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002029 if self.errorlevel > 1:
2030 raise
2031 else:
2032 self._dbg(1, "tarfile: %s" % e)
2033
2034 def extractfile(self, member):
2035 """Extract a member from the archive as a file object. `member' may be
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002036 a filename or a TarInfo object. If `member' is a regular file or a
2037 link, an io.BufferedReader object is returned. Otherwise, None is
2038 returned.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002039 """
2040 self._check("r")
2041
Guido van Rossum3172c5d2007-10-16 18:12:55 +00002042 if isinstance(member, str):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002043 tarinfo = self.getmember(member)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002044 else:
2045 tarinfo = member
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002046
Lars Gustäbel7a919e92012-05-05 18:15:03 +02002047 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
2048 # Members with unknown types are treated as regular files.
Lars Gustäbelb062a2f2012-05-14 13:18:16 +02002049 return self.fileobject(self, tarinfo)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002050
2051 elif tarinfo.islnk() or tarinfo.issym():
2052 if isinstance(self.fileobj, _Stream):
2053 # A small but ugly workaround for the case that someone tries
2054 # to extract a (sym)link as a file-object from a non-seekable
2055 # stream of tar blocks.
Thomas Wouters477c8d52006-05-27 19:21:47 +00002056 raise StreamError("cannot extract (sym)link as file object")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002057 else:
Georg Brandl7eb4b7d2005-07-22 21:49:32 +00002058 # A (sym)link's file object is its target's file object.
Lars Gustäbel1b512722010-06-03 12:45:16 +00002059 return self.extractfile(self._find_link_target(tarinfo))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002060 else:
2061 # If there's no data associated with the member (directory, chrdev,
2062 # blkdev, etc.), return None instead of a file object.
2063 return None
2064
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002065 def _extract_member(self, tarinfo, targetpath, set_attrs=True):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002066 """Extract the TarInfo object tarinfo to a physical
2067 file called targetpath.
2068 """
2069 # Fetch the TarInfo object for the given name
2070 # and build the destination pathname, replacing
2071 # forward slashes to platform specific separators.
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002072 targetpath = targetpath.rstrip("/")
2073 targetpath = targetpath.replace("/", os.sep)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002074
2075 # Create all upper directories.
2076 upperdirs = os.path.dirname(targetpath)
2077 if upperdirs and not os.path.exists(upperdirs):
Christian Heimes2202f872008-02-06 14:31:34 +00002078 # Create directories that are not part of the archive with
2079 # default permissions.
Thomas Woutersb2137042007-02-01 18:02:27 +00002080 os.makedirs(upperdirs)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002081
2082 if tarinfo.islnk() or tarinfo.issym():
2083 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2084 else:
2085 self._dbg(1, tarinfo.name)
2086
2087 if tarinfo.isreg():
2088 self.makefile(tarinfo, targetpath)
2089 elif tarinfo.isdir():
2090 self.makedir(tarinfo, targetpath)
2091 elif tarinfo.isfifo():
2092 self.makefifo(tarinfo, targetpath)
2093 elif tarinfo.ischr() or tarinfo.isblk():
2094 self.makedev(tarinfo, targetpath)
2095 elif tarinfo.islnk() or tarinfo.issym():
2096 self.makelink(tarinfo, targetpath)
2097 elif tarinfo.type not in SUPPORTED_TYPES:
2098 self.makeunknown(tarinfo, targetpath)
2099 else:
2100 self.makefile(tarinfo, targetpath)
2101
Martin v. Löwis16f344d2010-11-01 21:39:13 +00002102 if set_attrs:
2103 self.chown(tarinfo, targetpath)
2104 if not tarinfo.issym():
2105 self.chmod(tarinfo, targetpath)
2106 self.utime(tarinfo, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002107
2108 #--------------------------------------------------------------------------
2109 # Below are the different file methods. They are called via
2110 # _extract_member() when extract() is called. They can be replaced in a
2111 # subclass to implement other functionality.
2112
2113 def makedir(self, tarinfo, targetpath):
2114 """Make a directory called targetpath.
2115 """
2116 try:
Christian Heimes2202f872008-02-06 14:31:34 +00002117 # Use a safe mode for the directory, the real mode is set
2118 # later in _extract_member().
2119 os.mkdir(targetpath, 0o700)
Florent Xicluna68f71a32011-10-28 16:06:23 +02002120 except FileExistsError:
2121 pass
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002122
2123 def makefile(self, tarinfo, targetpath):
2124 """Make a file called targetpath.
2125 """
Lars Gustäbel9cbdd752010-10-29 09:08:19 +00002126 source = self.fileobj
2127 source.seek(tarinfo.offset_data)
Andrew Svetlov718df1d2012-11-29 14:20:47 +02002128 with bltn_open(targetpath, "wb") as target:
2129 if tarinfo.sparse is not None:
2130 for offset, size in tarinfo.sparse:
2131 target.seek(offset)
2132 copyfileobj(source, target, size)
2133 else:
2134 copyfileobj(source, target, tarinfo.size)
2135 target.seek(tarinfo.size)
2136 target.truncate()
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002137
2138 def makeunknown(self, tarinfo, targetpath):
2139 """Make a file from a TarInfo object with an unknown type
2140 at targetpath.
2141 """
2142 self.makefile(tarinfo, targetpath)
2143 self._dbg(1, "tarfile: Unknown file type %r, " \
2144 "extracted as regular file." % tarinfo.type)
2145
2146 def makefifo(self, tarinfo, targetpath):
2147 """Make a fifo called targetpath.
2148 """
2149 if hasattr(os, "mkfifo"):
2150 os.mkfifo(targetpath)
2151 else:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002152 raise ExtractError("fifo not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002153
2154 def makedev(self, tarinfo, targetpath):
2155 """Make a character or block device called targetpath.
2156 """
2157 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
Thomas Wouters477c8d52006-05-27 19:21:47 +00002158 raise ExtractError("special devices not supported by system")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002159
2160 mode = tarinfo.mode
2161 if tarinfo.isblk():
2162 mode |= stat.S_IFBLK
2163 else:
2164 mode |= stat.S_IFCHR
2165
2166 os.mknod(targetpath, mode,
2167 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2168
2169 def makelink(self, tarinfo, targetpath):
2170 """Make a (symbolic) link called targetpath. If it cannot be created
2171 (platform limitation), we try to make a copy of the referenced file
2172 instead of a link.
2173 """
Brian Curtind40e6f72010-07-08 21:39:08 +00002174 try:
Lars Gustäbel1b512722010-06-03 12:45:16 +00002175 # For systems that support symbolic and hard links.
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002176 if tarinfo.issym():
Lars Gustäbelbfdfdda2009-08-28 19:59:59 +00002177 os.symlink(tarinfo.linkname, targetpath)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002178 else:
Neal Norwitza4f651a2004-07-20 22:07:44 +00002179 # See extract().
Lars Gustäbel1b512722010-06-03 12:45:16 +00002180 if os.path.exists(tarinfo._link_target):
2181 os.link(tarinfo._link_target, targetpath)
2182 else:
Brian Curtin82df53e2010-09-24 21:04:05 +00002183 self._extract_member(self._find_link_target(tarinfo),
2184 targetpath)
Brian Curtin16633fa2010-07-09 13:54:27 +00002185 except symlink_exception:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002186 try:
Brian Curtin16633fa2010-07-09 13:54:27 +00002187 self._extract_member(self._find_link_target(tarinfo),
2188 targetpath)
Lars Gustäbel1b512722010-06-03 12:45:16 +00002189 except KeyError:
2190 raise ExtractError("unable to resolve link inside archive")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002191
2192 def chown(self, tarinfo, targetpath):
2193 """Set owner of targetpath according to tarinfo.
2194 """
2195 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2196 # We have to be root to do so.
2197 try:
2198 g = grp.getgrnam(tarinfo.gname)[2]
2199 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002200 g = tarinfo.gid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002201 try:
2202 u = pwd.getpwnam(tarinfo.uname)[2]
2203 except KeyError:
Lars Gustäbel2e7ddd32011-09-05 16:58:14 +02002204 u = tarinfo.uid
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002205 try:
2206 if tarinfo.issym() and hasattr(os, "lchown"):
2207 os.lchown(targetpath, u, g)
2208 else:
Jesus Cea4791a242012-10-05 03:15:39 +02002209 os.chown(targetpath, u, g)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002210 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002211 raise ExtractError("could not change owner")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002212
2213 def chmod(self, tarinfo, targetpath):
2214 """Set file permissions of targetpath according to tarinfo.
2215 """
Jack Jansen834eff62003-03-07 12:47:06 +00002216 if hasattr(os, 'chmod'):
2217 try:
2218 os.chmod(targetpath, tarinfo.mode)
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002219 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002220 raise ExtractError("could not change mode")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002221
2222 def utime(self, tarinfo, targetpath):
2223 """Set modification time of targetpath according to tarinfo.
2224 """
Jack Jansen834eff62003-03-07 12:47:06 +00002225 if not hasattr(os, 'utime'):
Tim Petersf9347782003-03-07 15:36:41 +00002226 return
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002227 try:
2228 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
Andrew Svetlov3438fa42012-12-17 23:35:18 +02002229 except OSError as e:
Thomas Wouters477c8d52006-05-27 19:21:47 +00002230 raise ExtractError("could not change modification time")
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002231
2232 #--------------------------------------------------------------------------
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002233 def next(self):
2234 """Return the next member of the archive as a TarInfo object, when
2235 TarFile is opened for reading. Return None if there is no more
2236 available.
2237 """
2238 self._check("ra")
2239 if self.firstmember is not None:
2240 m = self.firstmember
2241 self.firstmember = None
2242 return m
2243
2244 # Read the next block.
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002245 self.fileobj.seek(self.offset)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002246 tarinfo = None
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002247 while True:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002248 try:
Guido van Rossumd8faa362007-04-27 19:54:29 +00002249 tarinfo = self.tarinfo.fromtarfile(self)
Lars Gustäbel9520a432009-11-22 18:48:49 +00002250 except EOFHeaderError as e:
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002251 if self.ignore_zeros:
Thomas Wouters902d6eb2007-01-09 23:18:33 +00002252 self._dbg(2, "0x%X: %s" % (self.offset, e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002253 self.offset += BLOCKSIZE
2254 continue
Lars Gustäbel9520a432009-11-22 18:48:49 +00002255 except InvalidHeaderError as e:
2256 if self.ignore_zeros:
2257 self._dbg(2, "0x%X: %s" % (self.offset, e))
2258 self.offset += BLOCKSIZE
2259 continue
2260 elif self.offset == 0:
2261 raise ReadError(str(e))
2262 except EmptyHeaderError:
2263 if self.offset == 0:
2264 raise ReadError("empty file")
2265 except TruncatedHeaderError as e:
2266 if self.offset == 0:
2267 raise ReadError(str(e))
2268 except SubsequentHeaderError as e:
2269 raise ReadError(str(e))
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002270 break
2271
Lars Gustäbel9520a432009-11-22 18:48:49 +00002272 if tarinfo is not None:
2273 self.members.append(tarinfo)
2274 else:
2275 self._loaded = True
2276
Thomas Wouters477c8d52006-05-27 19:21:47 +00002277 return tarinfo
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002278
2279 #--------------------------------------------------------------------------
2280 # Little helper methods:
2281
Lars Gustäbel1b512722010-06-03 12:45:16 +00002282 def _getmember(self, name, tarinfo=None, normalize=False):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002283 """Find an archive member by name from bottom to top.
2284 If tarinfo is given, it is used as the starting point.
2285 """
Martin v. Löwisf3c56112004-09-18 09:08:52 +00002286 # Ensure that all members have been loaded.
2287 members = self.getmembers()
2288
Lars Gustäbel1b512722010-06-03 12:45:16 +00002289 # Limit the member search list up to tarinfo.
2290 if tarinfo is not None:
2291 members = members[:members.index(tarinfo)]
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002292
Lars Gustäbel1b512722010-06-03 12:45:16 +00002293 if normalize:
2294 name = os.path.normpath(name)
2295
2296 for member in reversed(members):
2297 if normalize:
2298 member_name = os.path.normpath(member.name)
2299 else:
2300 member_name = member.name
2301
2302 if name == member_name:
2303 return member
Andrew M. Kuchling864bba12004-07-10 22:02:11 +00002304
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002305 def _load(self):
2306 """Read through the entire archive file and look for readable
2307 members.
2308 """
2309 while True:
2310 tarinfo = self.next()
2311 if tarinfo is None:
2312 break
2313 self._loaded = True
2314
2315 def _check(self, mode=None):
2316 """Check if TarFile is still open, and if the operation's mode
2317 corresponds to TarFile's mode.
2318 """
2319 if self.closed:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002320 raise OSError("%s is closed" % self.__class__.__name__)
Guido van Rossumd8faa362007-04-27 19:54:29 +00002321 if mode is not None and self.mode not in mode:
Andrew Svetlovf7a17b42012-12-25 16:47:37 +02002322 raise OSError("bad operation for mode %r" % self.mode)
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002323
Lars Gustäbel1b512722010-06-03 12:45:16 +00002324 def _find_link_target(self, tarinfo):
2325 """Find the target member of a symlink or hardlink member in the
2326 archive.
2327 """
2328 if tarinfo.issym():
2329 # Always search the entire archive.
Lars Gustäbel1ef9eda2012-04-24 21:04:40 +02002330 linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
Lars Gustäbel1b512722010-06-03 12:45:16 +00002331 limit = None
2332 else:
2333 # Search the archive before the link, because a hard link is
2334 # just a reference to an already archived file.
2335 linkname = tarinfo.linkname
2336 limit = tarinfo
2337
2338 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2339 if member is None:
2340 raise KeyError("linkname %r not found" % linkname)
2341 return member
2342
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002343 def __iter__(self):
2344 """Provide an iterator object.
2345 """
2346 if self._loaded:
2347 return iter(self.members)
2348 else:
2349 return TarIter(self)
2350
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002351 def _dbg(self, level, msg):
2352 """Write debugging output to sys.stderr.
2353 """
2354 if level <= self.debug:
Guido van Rossumbe19ed72007-02-09 05:37:30 +00002355 print(msg, file=sys.stderr)
Lars Gustäbel01385812010-03-03 12:08:54 +00002356
2357 def __enter__(self):
2358 self._check()
2359 return self
2360
2361 def __exit__(self, type, value, traceback):
2362 if type is None:
2363 self.close()
2364 else:
2365 # An exception occurred. We must not call close() because
2366 # it would try to write end-of-archive blocks and padding.
2367 if not self._extfileobj:
2368 self.fileobj.close()
2369 self.closed = True
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002370# class TarFile
2371
2372class TarIter:
2373 """Iterator Class.
2374
2375 for tarinfo in TarFile(...):
2376 suite...
2377 """
2378
2379 def __init__(self, tarfile):
2380 """Construct a TarIter object.
2381 """
2382 self.tarfile = tarfile
Martin v. Löwis637431b2005-03-03 23:12:42 +00002383 self.index = 0
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002384 def __iter__(self):
2385 """Return iterator object.
2386 """
2387 return self
Georg Brandla18af4e2007-04-21 15:47:16 +00002388 def __next__(self):
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002389 """Return the next item using TarFile's next() method.
2390 When all members have been read, set TarFile as _loaded.
2391 """
Martin v. Löwis637431b2005-03-03 23:12:42 +00002392 # Fix for SF #1100429: Under rare circumstances it can
2393 # happen that getmembers() is called during iteration,
2394 # which will cause TarIter to stop prematurely.
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002395
2396 if self.index == 0 and self.tarfile.firstmember is not None:
2397 tarinfo = self.tarfile.next()
2398 elif self.index < len(self.tarfile.members):
2399 tarinfo = self.tarfile.members[self.index]
2400 elif not self.tarfile._loaded:
Martin v. Löwis637431b2005-03-03 23:12:42 +00002401 tarinfo = self.tarfile.next()
2402 if not tarinfo:
2403 self.tarfile._loaded = True
2404 raise StopIteration
2405 else:
Serhiy Storchaka263fab92013-05-09 14:22:26 +03002406 raise StopIteration
Martin v. Löwis637431b2005-03-03 23:12:42 +00002407 self.index += 1
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002408 return tarinfo
2409
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002410#--------------------
2411# exported functions
2412#--------------------
2413def is_tarfile(name):
2414 """Return True if name points to a tar archive that we
2415 are able to handle, else return False.
2416 """
2417 try:
2418 t = open(name)
2419 t.close()
2420 return True
2421 except TarError:
2422 return False
2423
Guido van Rossume7ba4952007-06-06 23:52:48 +00002424bltn_open = open
Neal Norwitzb9ef4ae2003-01-05 23:19:43 +00002425open = TarFile.open
Serhiy Storchakad27b4552013-11-24 01:53:29 +02002426
2427
2428def main():
2429 import argparse
2430
2431 description = 'A simple command line interface for tarfile module.'
2432 parser = argparse.ArgumentParser(description=description)
2433 parser.add_argument('-v', '--verbose', action='store_true', default=False,
2434 help='Verbose output')
2435 group = parser.add_mutually_exclusive_group()
2436 group.add_argument('-l', '--list', metavar='<tarfile>',
2437 help='Show listing of a tarfile')
2438 group.add_argument('-e', '--extract', nargs='+',
2439 metavar=('<tarfile>', '<output_dir>'),
2440 help='Extract tarfile into target dir')
2441 group.add_argument('-c', '--create', nargs='+',
2442 metavar=('<name>', '<file>'),
2443 help='Create tarfile from sources')
2444 group.add_argument('-t', '--test', metavar='<tarfile>',
2445 help='Test if a tarfile is valid')
2446 args = parser.parse_args()
2447
2448 if args.test:
2449 src = args.test
2450 if is_tarfile(src):
2451 with open(src, 'r') as tar:
2452 tar.getmembers()
2453 print(tar.getmembers(), file=sys.stderr)
2454 if args.verbose:
2455 print('{!r} is a tar archive.'.format(src))
2456 else:
2457 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2458
2459 elif args.list:
2460 src = args.list
2461 if is_tarfile(src):
2462 with TarFile.open(src, 'r:*') as tf:
2463 tf.list(verbose=args.verbose)
2464 else:
2465 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2466
2467 elif args.extract:
2468 if len(args.extract) == 1:
2469 src = args.extract[0]
2470 curdir = os.curdir
2471 elif len(args.extract) == 2:
2472 src, curdir = args.extract
2473 else:
2474 parser.exit(1, parser.format_help())
2475
2476 if is_tarfile(src):
2477 with TarFile.open(src, 'r:*') as tf:
2478 tf.extractall(path=curdir)
2479 if args.verbose:
2480 if curdir == '.':
2481 msg = '{!r} file is extracted.'.format(src)
2482 else:
2483 msg = ('{!r} file is extracted '
2484 'into {!r} directory.').format(src, curdir)
2485 print(msg)
2486 else:
2487 parser.exit(1, '{!r} is not a tar archive.\n'.format(src))
2488
2489 elif args.create:
2490 tar_name = args.create.pop(0)
2491 _, ext = os.path.splitext(tar_name)
2492 compressions = {
2493 # gz
2494 'gz': 'gz',
2495 'tgz': 'gz',
2496 # xz
2497 'xz': 'xz',
2498 'txz': 'xz',
2499 # bz2
2500 'bz2': 'bz2',
2501 'tbz': 'bz2',
2502 'tbz2': 'bz2',
2503 'tb2': 'bz2',
2504 }
2505 tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'
2506 tar_files = args.create
2507
2508 with TarFile.open(tar_name, tar_mode) as tf:
2509 for file_name in tar_files:
2510 tf.add(file_name)
2511
2512 if args.verbose:
2513 print('{!r} file created.'.format(tar_name))
2514
2515 else:
2516 parser.exit(1, parser.format_help())
2517
2518if __name__ == '__main__':
2519 main()